LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
129 Function *&NewFn) {
130 if (F->getReturnType()->getScalarType()->isBFloatTy())
131 return false;
132
133 rename(F);
134 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
135 return true;
136}
137
139 Function *&NewFn) {
140 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
141 return false;
142
143 rename(F);
144 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
145 return true;
146}
147
149 // All of the intrinsics matches below should be marked with which llvm
150 // version started autoupgrading them. At some point in the future we would
151 // like to use this information to remove upgrade code for some older
152 // intrinsics. It is currently undecided how we will determine that future
153 // point.
154 if (Name.consume_front("avx."))
155 return (Name.starts_with("blend.p") || // Added in 3.7
156 Name == "cvt.ps2.pd.256" || // Added in 3.9
157 Name == "cvtdq2.pd.256" || // Added in 3.9
158 Name == "cvtdq2.ps.256" || // Added in 7.0
159 Name.starts_with("movnt.") || // Added in 3.2
160 Name.starts_with("sqrt.p") || // Added in 7.0
161 Name.starts_with("storeu.") || // Added in 3.9
162 Name.starts_with("vbroadcast.s") || // Added in 3.5
163 Name.starts_with("vbroadcastf128") || // Added in 4.0
164 Name.starts_with("vextractf128.") || // Added in 3.7
165 Name.starts_with("vinsertf128.") || // Added in 3.7
166 Name.starts_with("vperm2f128.") || // Added in 6.0
167 Name.starts_with("vpermil.")); // Added in 3.1
168
169 if (Name.consume_front("avx2."))
170 return (Name == "movntdqa" || // Added in 5.0
171 Name.starts_with("pabs.") || // Added in 6.0
172 Name.starts_with("padds.") || // Added in 8.0
173 Name.starts_with("paddus.") || // Added in 8.0
174 Name.starts_with("pblendd.") || // Added in 3.7
175 Name == "pblendw" || // Added in 3.7
176 Name.starts_with("pbroadcast") || // Added in 3.8
177 Name.starts_with("pcmpeq.") || // Added in 3.1
178 Name.starts_with("pcmpgt.") || // Added in 3.1
179 Name.starts_with("pmax") || // Added in 3.9
180 Name.starts_with("pmin") || // Added in 3.9
181 Name.starts_with("pmovsx") || // Added in 3.9
182 Name.starts_with("pmovzx") || // Added in 3.9
183 Name == "pmul.dq" || // Added in 7.0
184 Name == "pmulu.dq" || // Added in 7.0
185 Name.starts_with("psll.dq") || // Added in 3.7
186 Name.starts_with("psrl.dq") || // Added in 3.7
187 Name.starts_with("psubs.") || // Added in 8.0
188 Name.starts_with("psubus.") || // Added in 8.0
189 Name.starts_with("vbroadcast") || // Added in 3.8
190 Name == "vbroadcasti128" || // Added in 3.7
191 Name == "vextracti128" || // Added in 3.7
192 Name == "vinserti128" || // Added in 3.7
193 Name == "vperm2i128"); // Added in 6.0
194
195 if (Name.consume_front("avx512.")) {
196 if (Name.consume_front("mask."))
197 // 'avx512.mask.*'
198 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
199 Name.starts_with("and.") || // Added in 3.9
200 Name.starts_with("andn.") || // Added in 3.9
201 Name.starts_with("broadcast.s") || // Added in 3.9
202 Name.starts_with("broadcastf32x4.") || // Added in 6.0
203 Name.starts_with("broadcastf32x8.") || // Added in 6.0
204 Name.starts_with("broadcastf64x2.") || // Added in 6.0
205 Name.starts_with("broadcastf64x4.") || // Added in 6.0
206 Name.starts_with("broadcasti32x4.") || // Added in 6.0
207 Name.starts_with("broadcasti32x8.") || // Added in 6.0
208 Name.starts_with("broadcasti64x2.") || // Added in 6.0
209 Name.starts_with("broadcasti64x4.") || // Added in 6.0
210 Name.starts_with("cmp.b") || // Added in 5.0
211 Name.starts_with("cmp.d") || // Added in 5.0
212 Name.starts_with("cmp.q") || // Added in 5.0
213 Name.starts_with("cmp.w") || // Added in 5.0
214 Name.starts_with("compress.b") || // Added in 9.0
215 Name.starts_with("compress.d") || // Added in 9.0
216 Name.starts_with("compress.p") || // Added in 9.0
217 Name.starts_with("compress.q") || // Added in 9.0
218 Name.starts_with("compress.store.") || // Added in 7.0
219 Name.starts_with("compress.w") || // Added in 9.0
220 Name.starts_with("conflict.") || // Added in 9.0
221 Name.starts_with("cvtdq2pd.") || // Added in 4.0
222 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
223 Name == "cvtpd2dq.256" || // Added in 7.0
224 Name == "cvtpd2ps.256" || // Added in 7.0
225 Name == "cvtps2pd.128" || // Added in 7.0
226 Name == "cvtps2pd.256" || // Added in 7.0
227 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
228 Name == "cvtqq2ps.256" || // Added in 9.0
229 Name == "cvtqq2ps.512" || // Added in 9.0
230 Name == "cvttpd2dq.256" || // Added in 7.0
231 Name == "cvttps2dq.128" || // Added in 7.0
232 Name == "cvttps2dq.256" || // Added in 7.0
233 Name.starts_with("cvtudq2pd.") || // Added in 4.0
234 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
235 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
236 Name == "cvtuqq2ps.256" || // Added in 9.0
237 Name == "cvtuqq2ps.512" || // Added in 9.0
238 Name.starts_with("dbpsadbw.") || // Added in 7.0
239 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
240 Name.starts_with("expand.b") || // Added in 9.0
241 Name.starts_with("expand.d") || // Added in 9.0
242 Name.starts_with("expand.load.") || // Added in 7.0
243 Name.starts_with("expand.p") || // Added in 9.0
244 Name.starts_with("expand.q") || // Added in 9.0
245 Name.starts_with("expand.w") || // Added in 9.0
246 Name.starts_with("fpclass.p") || // Added in 7.0
247 Name.starts_with("insert") || // Added in 4.0
248 Name.starts_with("load.") || // Added in 3.9
249 Name.starts_with("loadu.") || // Added in 3.9
250 Name.starts_with("lzcnt.") || // Added in 5.0
251 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
252 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
253 Name.starts_with("movddup") || // Added in 3.9
254 Name.starts_with("move.s") || // Added in 4.0
255 Name.starts_with("movshdup") || // Added in 3.9
256 Name.starts_with("movsldup") || // Added in 3.9
257 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("or.") || // Added in 3.9
259 Name.starts_with("pabs.") || // Added in 6.0
260 Name.starts_with("packssdw.") || // Added in 5.0
261 Name.starts_with("packsswb.") || // Added in 5.0
262 Name.starts_with("packusdw.") || // Added in 5.0
263 Name.starts_with("packuswb.") || // Added in 5.0
264 Name.starts_with("padd.") || // Added in 4.0
265 Name.starts_with("padds.") || // Added in 8.0
266 Name.starts_with("paddus.") || // Added in 8.0
267 Name.starts_with("palignr.") || // Added in 3.9
268 Name.starts_with("pand.") || // Added in 3.9
269 Name.starts_with("pandn.") || // Added in 3.9
270 Name.starts_with("pavg") || // Added in 6.0
271 Name.starts_with("pbroadcast") || // Added in 6.0
272 Name.starts_with("pcmpeq.") || // Added in 3.9
273 Name.starts_with("pcmpgt.") || // Added in 3.9
274 Name.starts_with("perm.df.") || // Added in 3.9
275 Name.starts_with("perm.di.") || // Added in 3.9
276 Name.starts_with("permvar.") || // Added in 7.0
277 Name.starts_with("pmaddubs.w.") || // Added in 7.0
278 Name.starts_with("pmaddw.d.") || // Added in 7.0
279 Name.starts_with("pmax") || // Added in 4.0
280 Name.starts_with("pmin") || // Added in 4.0
281 Name == "pmov.qd.256" || // Added in 9.0
282 Name == "pmov.qd.512" || // Added in 9.0
283 Name == "pmov.wb.256" || // Added in 9.0
284 Name == "pmov.wb.512" || // Added in 9.0
285 Name.starts_with("pmovsx") || // Added in 4.0
286 Name.starts_with("pmovzx") || // Added in 4.0
287 Name.starts_with("pmul.dq.") || // Added in 4.0
288 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
289 Name.starts_with("pmulh.w.") || // Added in 7.0
290 Name.starts_with("pmulhu.w.") || // Added in 7.0
291 Name.starts_with("pmull.") || // Added in 4.0
292 Name.starts_with("pmultishift.qb.") || // Added in 8.0
293 Name.starts_with("pmulu.dq.") || // Added in 4.0
294 Name.starts_with("por.") || // Added in 3.9
295 Name.starts_with("prol.") || // Added in 8.0
296 Name.starts_with("prolv.") || // Added in 8.0
297 Name.starts_with("pror.") || // Added in 8.0
298 Name.starts_with("prorv.") || // Added in 8.0
299 Name.starts_with("pshuf.b.") || // Added in 4.0
300 Name.starts_with("pshuf.d.") || // Added in 3.9
301 Name.starts_with("pshufh.w.") || // Added in 3.9
302 Name.starts_with("pshufl.w.") || // Added in 3.9
303 Name.starts_with("psll.d") || // Added in 4.0
304 Name.starts_with("psll.q") || // Added in 4.0
305 Name.starts_with("psll.w") || // Added in 4.0
306 Name.starts_with("pslli") || // Added in 4.0
307 Name.starts_with("psllv") || // Added in 4.0
308 Name.starts_with("psra.d") || // Added in 4.0
309 Name.starts_with("psra.q") || // Added in 4.0
310 Name.starts_with("psra.w") || // Added in 4.0
311 Name.starts_with("psrai") || // Added in 4.0
312 Name.starts_with("psrav") || // Added in 4.0
313 Name.starts_with("psrl.d") || // Added in 4.0
314 Name.starts_with("psrl.q") || // Added in 4.0
315 Name.starts_with("psrl.w") || // Added in 4.0
316 Name.starts_with("psrli") || // Added in 4.0
317 Name.starts_with("psrlv") || // Added in 4.0
318 Name.starts_with("psub.") || // Added in 4.0
319 Name.starts_with("psubs.") || // Added in 8.0
320 Name.starts_with("psubus.") || // Added in 8.0
321 Name.starts_with("pternlog.") || // Added in 7.0
322 Name.starts_with("punpckh") || // Added in 3.9
323 Name.starts_with("punpckl") || // Added in 3.9
324 Name.starts_with("pxor.") || // Added in 3.9
325 Name.starts_with("shuf.f") || // Added in 6.0
326 Name.starts_with("shuf.i") || // Added in 6.0
327 Name.starts_with("shuf.p") || // Added in 4.0
328 Name.starts_with("sqrt.p") || // Added in 7.0
329 Name.starts_with("store.b.") || // Added in 3.9
330 Name.starts_with("store.d.") || // Added in 3.9
331 Name.starts_with("store.p") || // Added in 3.9
332 Name.starts_with("store.q.") || // Added in 3.9
333 Name.starts_with("store.w.") || // Added in 3.9
334 Name == "store.ss" || // Added in 7.0
335 Name.starts_with("storeu.") || // Added in 3.9
336 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
337 Name.starts_with("ucmp.") || // Added in 5.0
338 Name.starts_with("unpckh.") || // Added in 3.9
339 Name.starts_with("unpckl.") || // Added in 3.9
340 Name.starts_with("valign.") || // Added in 4.0
341 Name == "vcvtph2ps.128" || // Added in 11.0
342 Name == "vcvtph2ps.256" || // Added in 11.0
343 Name.starts_with("vextract") || // Added in 4.0
344 Name.starts_with("vfmadd.") || // Added in 7.0
345 Name.starts_with("vfmaddsub.") || // Added in 7.0
346 Name.starts_with("vfnmadd.") || // Added in 7.0
347 Name.starts_with("vfnmsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermi2var.") || // Added in 7.0
353 Name.starts_with("vpermil.p") || // Added in 3.9
354 Name.starts_with("vpermilvar.") || // Added in 4.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshld.") || // Added in 7.0
358 Name.starts_with("vpshldv.") || // Added in 8.0
359 Name.starts_with("vpshrd.") || // Added in 7.0
360 Name.starts_with("vpshrdv.") || // Added in 8.0
361 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
362 Name.starts_with("xor.")); // Added in 3.9
363
364 if (Name.consume_front("mask3."))
365 // 'avx512.mask3.*'
366 return (Name.starts_with("vfmadd.") || // Added in 7.0
367 Name.starts_with("vfmaddsub.") || // Added in 7.0
368 Name.starts_with("vfmsub.") || // Added in 7.0
369 Name.starts_with("vfmsubadd.") || // Added in 7.0
370 Name.starts_with("vfnmsub.")); // Added in 7.0
371
372 if (Name.consume_front("maskz."))
373 // 'avx512.maskz.*'
374 return (Name.starts_with("pternlog.") || // Added in 7.0
375 Name.starts_with("vfmadd.") || // Added in 7.0
376 Name.starts_with("vfmaddsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermt2var.") || // Added in 7.0
382 Name.starts_with("vpmadd52") || // Added in 7.0
383 Name.starts_with("vpshldv.") || // Added in 8.0
384 Name.starts_with("vpshrdv.")); // Added in 8.0
385
386 // 'avx512.*'
387 return (Name == "movntdqa" || // Added in 5.0
388 Name == "pmul.dq.512" || // Added in 7.0
389 Name == "pmulu.dq.512" || // Added in 7.0
390 Name.starts_with("broadcastm") || // Added in 6.0
391 Name.starts_with("cmp.p") || // Added in 12.0
392 Name.starts_with("cvtb2mask.") || // Added in 7.0
393 Name.starts_with("cvtd2mask.") || // Added in 7.0
394 Name.starts_with("cvtmask2") || // Added in 5.0
395 Name.starts_with("cvtq2mask.") || // Added in 7.0
396 Name == "cvtusi2sd" || // Added in 7.0
397 Name.starts_with("cvtw2mask.") || // Added in 7.0
398 Name == "kand.w" || // Added in 7.0
399 Name == "kandn.w" || // Added in 7.0
400 Name == "knot.w" || // Added in 7.0
401 Name == "kor.w" || // Added in 7.0
402 Name == "kortestc.w" || // Added in 7.0
403 Name == "kortestz.w" || // Added in 7.0
404 Name.starts_with("kunpck") || // added in 6.0
405 Name == "kxnor.w" || // Added in 7.0
406 Name == "kxor.w" || // Added in 7.0
407 Name.starts_with("padds.") || // Added in 8.0
408 Name.starts_with("pbroadcast") || // Added in 3.9
409 Name.starts_with("prol") || // Added in 8.0
410 Name.starts_with("pror") || // Added in 8.0
411 Name.starts_with("psll.dq") || // Added in 3.9
412 Name.starts_with("psrl.dq") || // Added in 3.9
413 Name.starts_with("psubs.") || // Added in 8.0
414 Name.starts_with("ptestm") || // Added in 6.0
415 Name.starts_with("ptestnm") || // Added in 6.0
416 Name.starts_with("storent.") || // Added in 3.9
417 Name.starts_with("vbroadcast.s") || // Added in 7.0
418 Name.starts_with("vpshld.") || // Added in 8.0
419 Name.starts_with("vpshrd.")); // Added in 8.0
420 }
421
422 if (Name.consume_front("fma."))
423 return (Name.starts_with("vfmadd.") || // Added in 7.0
424 Name.starts_with("vfmsub.") || // Added in 7.0
425 Name.starts_with("vfmsubadd.") || // Added in 7.0
426 Name.starts_with("vfnmadd.") || // Added in 7.0
427 Name.starts_with("vfnmsub.")); // Added in 7.0
428
429 if (Name.consume_front("fma4."))
430 return Name.starts_with("vfmadd.s"); // Added in 7.0
431
432 if (Name.consume_front("sse."))
433 return (Name == "add.ss" || // Added in 4.0
434 Name == "cvtsi2ss" || // Added in 7.0
435 Name == "cvtsi642ss" || // Added in 7.0
436 Name == "div.ss" || // Added in 4.0
437 Name == "mul.ss" || // Added in 4.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.ss" || // Added in 7.0
440 Name.starts_with("storeu.") || // Added in 3.9
441 Name == "sub.ss"); // Added in 4.0
442
443 if (Name.consume_front("sse2."))
444 return (Name == "add.sd" || // Added in 4.0
445 Name == "cvtdq2pd" || // Added in 3.9
446 Name == "cvtdq2ps" || // Added in 7.0
447 Name == "cvtps2pd" || // Added in 3.9
448 Name == "cvtsi2sd" || // Added in 7.0
449 Name == "cvtsi642sd" || // Added in 7.0
450 Name == "cvtss2sd" || // Added in 7.0
451 Name == "div.sd" || // Added in 4.0
452 Name == "mul.sd" || // Added in 4.0
453 Name.starts_with("padds.") || // Added in 8.0
454 Name.starts_with("paddus.") || // Added in 8.0
455 Name.starts_with("pcmpeq.") || // Added in 3.1
456 Name.starts_with("pcmpgt.") || // Added in 3.1
457 Name == "pmaxs.w" || // Added in 3.9
458 Name == "pmaxu.b" || // Added in 3.9
459 Name == "pmins.w" || // Added in 3.9
460 Name == "pminu.b" || // Added in 3.9
461 Name == "pmulu.dq" || // Added in 7.0
462 Name.starts_with("pshuf") || // Added in 3.9
463 Name.starts_with("psll.dq") || // Added in 3.7
464 Name.starts_with("psrl.dq") || // Added in 3.7
465 Name.starts_with("psubs.") || // Added in 8.0
466 Name.starts_with("psubus.") || // Added in 8.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.sd" || // Added in 7.0
469 Name == "storel.dq" || // Added in 3.9
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.sd"); // Added in 4.0
472
473 if (Name.consume_front("sse41."))
474 return (Name.starts_with("blendp") || // Added in 3.7
475 Name == "movntdqa" || // Added in 5.0
476 Name == "pblendw" || // Added in 3.7
477 Name == "pmaxsb" || // Added in 3.9
478 Name == "pmaxsd" || // Added in 3.9
479 Name == "pmaxud" || // Added in 3.9
480 Name == "pmaxuw" || // Added in 3.9
481 Name == "pminsb" || // Added in 3.9
482 Name == "pminsd" || // Added in 3.9
483 Name == "pminud" || // Added in 3.9
484 Name == "pminuw" || // Added in 3.9
485 Name.starts_with("pmovsx") || // Added in 3.8
486 Name.starts_with("pmovzx") || // Added in 3.9
487 Name == "pmuldq"); // Added in 7.0
488
489 if (Name.consume_front("sse42."))
490 return Name == "crc32.64.8"; // Added in 3.4
491
492 if (Name.consume_front("sse4a."))
493 return Name.starts_with("movnt."); // Added in 3.9
494
495 if (Name.consume_front("ssse3."))
496 return (Name == "pabs.b.128" || // Added in 6.0
497 Name == "pabs.d.128" || // Added in 6.0
498 Name == "pabs.w.128"); // Added in 6.0
499
500 if (Name.consume_front("xop."))
501 return (Name == "vpcmov" || // Added in 3.8
502 Name == "vpcmov.256" || // Added in 5.0
503 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
504 Name.starts_with("vprot")); // Added in 8.0
505
506 return (Name == "addcarry.u32" || // Added in 8.0
507 Name == "addcarry.u64" || // Added in 8.0
508 Name == "addcarryx.u32" || // Added in 8.0
509 Name == "addcarryx.u64" || // Added in 8.0
510 Name == "subborrow.u32" || // Added in 8.0
511 Name == "subborrow.u64" || // Added in 8.0
512 Name.starts_with("vcvtph2ps.")); // Added in 11.0
513}
514
516 Function *&NewFn) {
517 // Only handle intrinsics that start with "x86.".
518 if (!Name.consume_front("x86."))
519 return false;
520
521 if (shouldUpgradeX86Intrinsic(F, Name)) {
522 NewFn = nullptr;
523 return true;
524 }
525
526 if (Name == "rdtscp") { // Added in 8.0
527 // If this intrinsic has 0 operands, it's the new version.
528 if (F->getFunctionType()->getNumParams() == 0)
529 return false;
530
531 rename(F);
532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
533 Intrinsic::x86_rdtscp);
534 return true;
535 }
536
538
539 // SSE4.1 ptest functions may have an old signature.
540 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
542 .Case("c", Intrinsic::x86_sse41_ptestc)
543 .Case("z", Intrinsic::x86_sse41_ptestz)
544 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
547 return upgradePTESTIntrinsic(F, ID, NewFn);
548
549 return false;
550 }
551
552 // Several blend and other instructions with masks used the wrong number of
553 // bits.
554
555 // Added in 3.6
557 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
565 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
566
567 if (Name.consume_front("avx512.")) {
568 if (Name.consume_front("mask.cmp.")) {
569 // Added in 7.0
571 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
579 return upgradeX86MaskedFPCompare(F, ID, NewFn);
580 } else if (Name.starts_with("vpdpbusd.") ||
581 Name.starts_with("vpdpbusds.")) {
582 // Added in 21.1
584 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
592 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
593 }
594 return false; // No other 'x86.avx512.*'.
595 }
596
597 if (Name.consume_front("avx2.vpdpb")) {
598 // Added in 21.1
600 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
601 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
602 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
603 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
604 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
605 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
606 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
607 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
608 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
609 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
610 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
611 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
614 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
615 return false; // No other 'x86.avx2.*'
616 }
617
618 if (Name.consume_front("avx10.vpdpb")) {
619 // Added in 21.1
621 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
622 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
623 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
624 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
625 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
626 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
629 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
630 return false; // No other 'x86.avx10.*'
631 }
632
633 if (Name.consume_front("avx512bf16.")) {
634 // Added in 9.0
636 .Case("cvtne2ps2bf16.128",
637 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
638 .Case("cvtne2ps2bf16.256",
639 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
640 .Case("cvtne2ps2bf16.512",
641 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
642 .Case("mask.cvtneps2bf16.128",
643 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
644 .Case("cvtneps2bf16.256",
645 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
646 .Case("cvtneps2bf16.512",
647 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
650 return upgradeX86BF16Intrinsic(F, ID, NewFn);
651
652 // Added in 9.0
654 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
655 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
656 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
659 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
660 return false; // No other 'x86.avx512bf16.*'.
661 }
662
663 if (Name.consume_front("xop.")) {
665 if (Name.starts_with("vpermil2")) { // Added in 3.9
666 // Upgrade any XOP PERMIL2 index operand still using a float/double
667 // vector.
668 auto Idx = F->getFunctionType()->getParamType(2);
669 if (Idx->isFPOrFPVectorTy()) {
670 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
671 unsigned EltSize = Idx->getScalarSizeInBits();
672 if (EltSize == 64 && IdxSize == 128)
673 ID = Intrinsic::x86_xop_vpermil2pd;
674 else if (EltSize == 32 && IdxSize == 128)
675 ID = Intrinsic::x86_xop_vpermil2ps;
676 else if (EltSize == 64 && IdxSize == 256)
677 ID = Intrinsic::x86_xop_vpermil2pd_256;
678 else
679 ID = Intrinsic::x86_xop_vpermil2ps_256;
680 }
681 } else if (F->arg_size() == 2)
682 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
684 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
685 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
687
689 rename(F);
690 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
691 return true;
692 }
693 return false; // No other 'x86.xop.*'
694 }
695
696 if (Name == "seh.recoverfp") {
697 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
698 Intrinsic::eh_recoverfp);
699 return true;
700 }
701
702 return false;
703}
704
705// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
706// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
708 StringRef Name,
709 Function *&NewFn) {
710 if (Name.starts_with("rbit")) {
711 // '(arm|aarch64).rbit'.
713 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
714 return true;
715 }
716
717 if (Name == "thread.pointer") {
718 // '(arm|aarch64).thread.pointer'.
720 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
721 return true;
722 }
723
724 bool Neon = Name.consume_front("neon.");
725 if (Neon) {
726 // '(arm|aarch64).neon.*'.
727 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
728 // v16i8 respectively.
729 if (Name.consume_front("bfdot.")) {
730 // (arm|aarch64).neon.bfdot.*'.
733 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
734 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
735 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
738 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
739 assert((OperandWidth == 64 || OperandWidth == 128) &&
740 "Unexpected operand width");
741 LLVMContext &Ctx = F->getParent()->getContext();
742 std::array<Type *, 2> Tys{
743 {F->getReturnType(),
744 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
745 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
746 return true;
747 }
748 return false; // No other '(arm|aarch64).neon.bfdot.*'.
749 }
750
751 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
752 // anymore and accept v8bf16 instead of v16i8.
753 if (Name.consume_front("bfm")) {
754 // (arm|aarch64).neon.bfm*'.
755 if (Name.consume_back(".v4f32.v16i8")) {
756 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
759 .Case("mla",
760 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
761 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
762 .Case("lalb",
763 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
764 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
765 .Case("lalt",
766 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
767 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
771 return true;
772 }
773 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
774 }
775 return false; // No other '(arm|aarch64).neon.bfm*.
776 }
777 // Continue on to Aarch64 Neon or Arm Neon.
778 }
779 // Continue on to Arm or Aarch64.
780
781 if (IsArm) {
782 // 'arm.*'.
783 if (Neon) {
784 // 'arm.neon.*'.
786 .StartsWith("vclz.", Intrinsic::ctlz)
787 .StartsWith("vcnt.", Intrinsic::ctpop)
788 .StartsWith("vqadds.", Intrinsic::sadd_sat)
789 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
790 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
791 .StartsWith("vqsubu.", Intrinsic::usub_sat)
792 .StartsWith("vrinta.", Intrinsic::round)
793 .StartsWith("vrintn.", Intrinsic::roundeven)
794 .StartsWith("vrintm.", Intrinsic::floor)
795 .StartsWith("vrintp.", Intrinsic::ceil)
796 .StartsWith("vrintx.", Intrinsic::rint)
797 .StartsWith("vrintz.", Intrinsic::trunc)
800 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
801 F->arg_begin()->getType());
802 return true;
803 }
804
805 if (Name.consume_front("vst")) {
806 // 'arm.neon.vst*'.
807 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
809 if (vstRegex.match(Name, &Groups)) {
810 static const Intrinsic::ID StoreInts[] = {
811 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
812 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
813
814 static const Intrinsic::ID StoreLaneInts[] = {
815 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
816 Intrinsic::arm_neon_vst4lane};
817
818 auto fArgs = F->getFunctionType()->params();
819 Type *Tys[] = {fArgs[0], fArgs[1]};
820 if (Groups[1].size() == 1)
822 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
823 else
825 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
826 return true;
827 }
828 return false; // No other 'arm.neon.vst*'.
829 }
830
831 return false; // No other 'arm.neon.*'.
832 }
833
834 if (Name.consume_front("mve.")) {
835 // 'arm.mve.*'.
836 if (Name == "vctp64") {
837 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
838 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
839 // the function and deal with it below in UpgradeIntrinsicCall.
840 rename(F);
841 return true;
842 }
843 return false; // Not 'arm.mve.vctp64'.
844 }
845
846 if (Name.starts_with("vrintn.v")) {
848 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
849 return true;
850 }
851
852 // These too are changed to accept a v2i1 instead of the old v4i1.
853 if (Name.consume_back(".v4i1")) {
854 // 'arm.mve.*.v4i1'.
855 if (Name.consume_back(".predicated.v2i64.v4i32"))
856 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
857 return Name == "mull.int" || Name == "vqdmull";
858
859 if (Name.consume_back(".v2i64")) {
860 // 'arm.mve.*.v2i64.v4i1'
861 bool IsGather = Name.consume_front("vldr.gather.");
862 if (IsGather || Name.consume_front("vstr.scatter.")) {
863 if (Name.consume_front("base.")) {
864 // Optional 'wb.' prefix.
865 Name.consume_front("wb.");
866 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
867 // predicated.v2i64.v2i64.v4i1'.
868 return Name == "predicated.v2i64";
869 }
870
871 if (Name.consume_front("offset.predicated."))
872 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
873 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
874
875 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
876 return false;
877 }
878
879 return false; // No other 'arm.mve.*.v2i64.v4i1'.
880 }
881 return false; // No other 'arm.mve.*.v4i1'.
882 }
883 return false; // No other 'arm.mve.*'.
884 }
885
886 if (Name.consume_front("cde.vcx")) {
887 // 'arm.cde.vcx*'.
888 if (Name.consume_back(".predicated.v2i64.v4i1"))
889 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
890 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
891 Name == "3q" || Name == "3qa";
892
893 return false; // No other 'arm.cde.vcx*'.
894 }
895 } else {
896 // 'aarch64.*'.
897 if (Neon) {
898 // 'aarch64.neon.*'.
900 .StartsWith("frintn", Intrinsic::roundeven)
901 .StartsWith("rbit", Intrinsic::bitreverse)
904 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
905 F->arg_begin()->getType());
906 return true;
907 }
908
909 if (Name.starts_with("addp")) {
910 // 'aarch64.neon.addp*'.
911 if (F->arg_size() != 2)
912 return false; // Invalid IR.
913 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
914 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
916 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
917 return true;
918 }
919 }
920
921 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
922 if (Name.starts_with("bfcvt")) {
923 NewFn = nullptr;
924 return true;
925 }
926
927 return false; // No other 'aarch64.neon.*'.
928 }
929 if (Name.consume_front("sve.")) {
930 // 'aarch64.sve.*'.
931 if (Name.consume_front("bf")) {
932 if (Name.consume_back(".lane")) {
933 // 'aarch64.sve.bf*.lane'.
936 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
937 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
938 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
941 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
942 return true;
943 }
944 return false; // No other 'aarch64.sve.bf*.lane'.
945 }
946 return false; // No other 'aarch64.sve.bf*'.
947 }
948
949 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
950 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
951 NewFn = nullptr;
952 return true;
953 }
954
955 if (Name.consume_front("addqv")) {
956 // 'aarch64.sve.addqv'.
957 if (!F->getReturnType()->isFPOrFPVectorTy())
958 return false;
959
960 auto Args = F->getFunctionType()->params();
961 Type *Tys[] = {F->getReturnType(), Args[1]};
963 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
964 return true;
965 }
966
967 if (Name.consume_front("ld")) {
968 // 'aarch64.sve.ld*'.
969 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
970 if (LdRegex.match(Name)) {
971 Type *ScalarTy =
972 cast<VectorType>(F->getReturnType())->getElementType();
973 ElementCount EC =
974 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
975 Type *Ty = VectorType::get(ScalarTy, EC);
976 static const Intrinsic::ID LoadIDs[] = {
977 Intrinsic::aarch64_sve_ld2_sret,
978 Intrinsic::aarch64_sve_ld3_sret,
979 Intrinsic::aarch64_sve_ld4_sret,
980 };
981 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
982 LoadIDs[Name[0] - '2'], Ty);
983 return true;
984 }
985 return false; // No other 'aarch64.sve.ld*'.
986 }
987
988 if (Name.consume_front("tuple.")) {
989 // 'aarch64.sve.tuple.*'.
990 if (Name.starts_with("get")) {
991 // 'aarch64.sve.tuple.get*'.
992 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
994 F->getParent(), Intrinsic::vector_extract, Tys);
995 return true;
996 }
997
998 if (Name.starts_with("set")) {
999 // 'aarch64.sve.tuple.set*'.
1000 auto Args = F->getFunctionType()->params();
1001 Type *Tys[] = {Args[0], Args[2], Args[1]};
1003 F->getParent(), Intrinsic::vector_insert, Tys);
1004 return true;
1005 }
1006
1007 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1008 if (CreateTupleRegex.match(Name)) {
1009 // 'aarch64.sve.tuple.create*'.
1010 auto Args = F->getFunctionType()->params();
1011 Type *Tys[] = {F->getReturnType(), Args[1]};
1013 F->getParent(), Intrinsic::vector_insert, Tys);
1014 return true;
1015 }
1016 return false; // No other 'aarch64.sve.tuple.*'.
1017 }
1018
1019 if (Name.starts_with("rev.nxv")) {
1020 // 'aarch64.sve.rev.<Ty>'
1022 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1023 return true;
1024 }
1025
1026 return false; // No other 'aarch64.sve.*'.
1027 }
1028 }
1029 return false; // No other 'arm.*', 'aarch64.*'.
1030}
1031
1033 StringRef Name) {
1034 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1037 .Case("im2col.3d",
1038 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1039 .Case("im2col.4d",
1040 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1041 .Case("im2col.5d",
1042 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1043 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1044 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1045 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1046 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1047 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1049
1051 return ID;
1052
1053 // These intrinsics may need upgrade for two reasons:
1054 // (1) When the address-space of the first argument is shared[AS=3]
1055 // (and we upgrade it to use shared_cluster address-space[AS=7])
1056 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1058 return ID;
1059
1060 // (2) When there are only two boolean flag arguments at the end:
1061 //
1062 // The last three parameters of the older version of these
1063 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1064 //
1065 // The newer version reads as:
1066 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1067 //
1068 // So, when the type of the [N-3]rd argument is "not i1", then
1069 // it is the older version and we need to upgrade.
1070 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1071 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1072 if (!ArgType->isIntegerTy(1))
1073 return ID;
1074 }
1075
1077}
1078
1080 StringRef Name) {
1081 if (Name.consume_front("mapa.shared.cluster"))
1082 if (F->getReturnType()->getPointerAddressSpace() ==
1084 return Intrinsic::nvvm_mapa_shared_cluster;
1085
1086 if (Name.consume_front("cp.async.bulk.")) {
1089 .Case("global.to.shared.cluster",
1090 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1091 .Case("shared.cta.to.cluster",
1092 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1094
1096 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1098 return ID;
1099 }
1100
1102}
1103
1105 if (Name.consume_front("fma.rn."))
1106 return StringSwitch<Intrinsic::ID>(Name)
1107 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1108 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1109 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1110 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1111 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1112 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1113 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1114 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1115 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1116 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1117 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1118 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1120
1121 if (Name.consume_front("fmax."))
1122 return StringSwitch<Intrinsic::ID>(Name)
1123 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1124 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1125 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1126 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1127 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1128 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1129 .Case("ftz.nan.xorsign.abs.bf16",
1130 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1131 .Case("ftz.nan.xorsign.abs.bf16x2",
1132 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1133 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1134 .Case("ftz.xorsign.abs.bf16x2",
1135 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1136 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1137 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1138 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1139 .Case("nan.xorsign.abs.bf16x2",
1140 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1141 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1142 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1144
1145 if (Name.consume_front("fmin."))
1146 return StringSwitch<Intrinsic::ID>(Name)
1147 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1148 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1149 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1150 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1151 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1152 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1153 .Case("ftz.nan.xorsign.abs.bf16",
1154 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1155 .Case("ftz.nan.xorsign.abs.bf16x2",
1156 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1157 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1158 .Case("ftz.xorsign.abs.bf16x2",
1159 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1160 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1161 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1162 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1163 .Case("nan.xorsign.abs.bf16x2",
1164 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1165 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1166 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1168
1169 if (Name.consume_front("neg."))
1170 return StringSwitch<Intrinsic::ID>(Name)
1171 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1172 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1174
1176}
1177
1179 return Name.consume_front("local") || Name.consume_front("shared") ||
1180 Name.consume_front("global") || Name.consume_front("constant") ||
1181 Name.consume_front("param");
1182}
1183
1185 bool CanUpgradeDebugIntrinsicsToRecords) {
1186 assert(F && "Illegal to upgrade a non-existent Function.");
1187
1188 StringRef Name = F->getName();
1189
1190 // Quickly eliminate it, if it's not a candidate.
1191 if (!Name.consume_front("llvm.") || Name.empty())
1192 return false;
1193
1194 switch (Name[0]) {
1195 default: break;
1196 case 'a': {
1197 bool IsArm = Name.consume_front("arm.");
1198 if (IsArm || Name.consume_front("aarch64.")) {
1199 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1200 return true;
1201 break;
1202 }
1203
1204 if (Name.consume_front("amdgcn.")) {
1205 if (Name == "alignbit") {
1206 // Target specific intrinsic became redundant
1208 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1209 return true;
1210 }
1211
1212 if (Name.consume_front("atomic.")) {
1213 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1214 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1215 // there's no new declaration.
1216 NewFn = nullptr;
1217 return true;
1218 }
1219 break; // No other 'amdgcn.atomic.*'
1220 }
1221
1222 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1223 Name.consume_front("flat.atomic.")) {
1224 if (Name.starts_with("fadd") ||
1225 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1226 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1227 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1228 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1229 // declaration.
1230 NewFn = nullptr;
1231 return true;
1232 }
1233 }
1234
1235 if (Name.starts_with("ldexp.")) {
1236 // Target specific intrinsic became redundant
1238 F->getParent(), Intrinsic::ldexp,
1239 {F->getReturnType(), F->getArg(1)->getType()});
1240 return true;
1241 }
1242 break; // No other 'amdgcn.*'
1243 }
1244
1245 break;
1246 }
1247 case 'c': {
1248 if (F->arg_size() == 1) {
1250 .StartsWith("ctlz.", Intrinsic::ctlz)
1251 .StartsWith("cttz.", Intrinsic::cttz)
1254 rename(F);
1255 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1256 F->arg_begin()->getType());
1257 return true;
1258 }
1259 }
1260
1261 if (F->arg_size() == 2 && Name == "coro.end") {
1262 rename(F);
1263 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1264 Intrinsic::coro_end);
1265 return true;
1266 }
1267
1268 break;
1269 }
1270 case 'd':
1271 if (Name.consume_front("dbg.")) {
1272 // Mark debug intrinsics for upgrade to new debug format.
1273 if (CanUpgradeDebugIntrinsicsToRecords) {
1274 if (Name == "addr" || Name == "value" || Name == "assign" ||
1275 Name == "declare" || Name == "label") {
1276 // There's no function to replace these with.
1277 NewFn = nullptr;
1278 // But we do want these to get upgraded.
1279 return true;
1280 }
1281 }
1282 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1283 // converted to DbgVariableRecords later.
1284 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1285 rename(F);
1286 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1287 Intrinsic::dbg_value);
1288 return true;
1289 }
1290 break; // No other 'dbg.*'.
1291 }
1292 break;
1293 case 'e':
1294 if (Name.consume_front("experimental.vector.")) {
1297 // Skip over extract.last.active, otherwise it will be 'upgraded'
1298 // to a regular vector extract which is a different operation.
1299 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1300 .StartsWith("extract.", Intrinsic::vector_extract)
1301 .StartsWith("insert.", Intrinsic::vector_insert)
1302 .StartsWith("splice.", Intrinsic::vector_splice)
1303 .StartsWith("reverse.", Intrinsic::vector_reverse)
1304 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1305 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1306 .StartsWith("partial.reduce.add",
1307 Intrinsic::vector_partial_reduce_add)
1310 const auto *FT = F->getFunctionType();
1312 if (ID == Intrinsic::vector_extract ||
1313 ID == Intrinsic::vector_interleave2)
1314 // Extracting overloads the return type.
1315 Tys.push_back(FT->getReturnType());
1316 if (ID != Intrinsic::vector_interleave2)
1317 Tys.push_back(FT->getParamType(0));
1318 if (ID == Intrinsic::vector_insert ||
1319 ID == Intrinsic::vector_partial_reduce_add)
1320 // Inserting overloads the inserted type.
1321 Tys.push_back(FT->getParamType(1));
1322 rename(F);
1323 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1324 return true;
1325 }
1326
1327 if (Name.consume_front("reduce.")) {
1329 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1330 if (R.match(Name, &Groups))
1332 .Case("add", Intrinsic::vector_reduce_add)
1333 .Case("mul", Intrinsic::vector_reduce_mul)
1334 .Case("and", Intrinsic::vector_reduce_and)
1335 .Case("or", Intrinsic::vector_reduce_or)
1336 .Case("xor", Intrinsic::vector_reduce_xor)
1337 .Case("smax", Intrinsic::vector_reduce_smax)
1338 .Case("smin", Intrinsic::vector_reduce_smin)
1339 .Case("umax", Intrinsic::vector_reduce_umax)
1340 .Case("umin", Intrinsic::vector_reduce_umin)
1341 .Case("fmax", Intrinsic::vector_reduce_fmax)
1342 .Case("fmin", Intrinsic::vector_reduce_fmin)
1344
1345 bool V2 = false;
1347 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1348 Groups.clear();
1349 V2 = true;
1350 if (R2.match(Name, &Groups))
1352 .Case("fadd", Intrinsic::vector_reduce_fadd)
1353 .Case("fmul", Intrinsic::vector_reduce_fmul)
1355 }
1357 rename(F);
1358 auto Args = F->getFunctionType()->params();
1359 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1360 {Args[V2 ? 1 : 0]});
1361 return true;
1362 }
1363 break; // No other 'expermental.vector.reduce.*'.
1364 }
1365 break; // No other 'experimental.vector.*'.
1366 }
1367 if (Name.consume_front("experimental.stepvector.")) {
1368 Intrinsic::ID ID = Intrinsic::stepvector;
1369 rename(F);
1371 F->getParent(), ID, F->getFunctionType()->getReturnType());
1372 return true;
1373 }
1374 break; // No other 'e*'.
1375 case 'f':
1376 if (Name.starts_with("flt.rounds")) {
1377 rename(F);
1378 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1379 Intrinsic::get_rounding);
1380 return true;
1381 }
1382 break;
1383 case 'i':
1384 if (Name.starts_with("invariant.group.barrier")) {
1385 // Rename invariant.group.barrier to launder.invariant.group
1386 auto Args = F->getFunctionType()->params();
1387 Type* ObjectPtr[1] = {Args[0]};
1388 rename(F);
1390 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1391 return true;
1392 }
1393 break;
1394 case 'l':
1395 if ((Name.starts_with("lifetime.start") ||
1396 Name.starts_with("lifetime.end")) &&
1397 F->arg_size() == 2) {
1398 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1399 ? Intrinsic::lifetime_start
1400 : Intrinsic::lifetime_end;
1401 rename(F);
1402 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1403 F->getArg(0)->getType());
1404 return true;
1405 }
1406 break;
1407 case 'm': {
1408 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1409 // alignment parameter to embedding the alignment as an attribute of
1410 // the pointer args.
1411 if (unsigned ID = StringSwitch<unsigned>(Name)
1412 .StartsWith("memcpy.", Intrinsic::memcpy)
1413 .StartsWith("memmove.", Intrinsic::memmove)
1414 .Default(0)) {
1415 if (F->arg_size() == 5) {
1416 rename(F);
1417 // Get the types of dest, src, and len
1418 ArrayRef<Type *> ParamTypes =
1419 F->getFunctionType()->params().slice(0, 3);
1420 NewFn =
1421 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1422 return true;
1423 }
1424 }
1425 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1426 rename(F);
1427 // Get the types of dest, and len
1428 const auto *FT = F->getFunctionType();
1429 Type *ParamTypes[2] = {
1430 FT->getParamType(0), // Dest
1431 FT->getParamType(2) // len
1432 };
1433 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1434 Intrinsic::memset, ParamTypes);
1435 return true;
1436 }
1437
1438 unsigned MaskedID =
1440 .StartsWith("masked.load", Intrinsic::masked_load)
1441 .StartsWith("masked.gather", Intrinsic::masked_gather)
1442 .StartsWith("masked.store", Intrinsic::masked_store)
1443 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1444 .Default(0);
1445 if (MaskedID && F->arg_size() == 4) {
1446 rename(F);
1447 if (MaskedID == Intrinsic::masked_load ||
1448 MaskedID == Intrinsic::masked_gather) {
1450 F->getParent(), MaskedID,
1451 {F->getReturnType(), F->getArg(0)->getType()});
1452 return true;
1453 }
1455 F->getParent(), MaskedID,
1456 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1457 return true;
1458 }
1459 break;
1460 }
1461 case 'n': {
1462 if (Name.consume_front("nvvm.")) {
1463 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1464 if (F->arg_size() == 1) {
1465 Intrinsic::ID IID =
1467 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1468 .Case("clz.i", Intrinsic::ctlz)
1469 .Case("popc.i", Intrinsic::ctpop)
1471 if (IID != Intrinsic::not_intrinsic) {
1472 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1473 {F->getReturnType()});
1474 return true;
1475 }
1476 }
1477
1478 // Check for nvvm intrinsics that need a return type adjustment.
1479 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1481 if (IID != Intrinsic::not_intrinsic) {
1482 NewFn = nullptr;
1483 return true;
1484 }
1485 }
1486
1487 // Upgrade Distributed Shared Memory Intrinsics
1489 if (IID != Intrinsic::not_intrinsic) {
1490 rename(F);
1491 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1492 return true;
1493 }
1494
1495 // Upgrade TMA copy G2S Intrinsics
1497 if (IID != Intrinsic::not_intrinsic) {
1498 rename(F);
1499 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1500 return true;
1501 }
1502
1503 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1504 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1505 //
1506 // TODO: We could add lohi.i2d.
1507 bool Expand = false;
1508 if (Name.consume_front("abs."))
1509 // nvvm.abs.{i,ii}
1510 Expand =
1511 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1512 else if (Name.consume_front("fabs."))
1513 // nvvm.fabs.{f,ftz.f,d}
1514 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1515 else if (Name.consume_front("ex2.approx."))
1516 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1517 Expand =
1518 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1519 else if (Name.consume_front("max.") || Name.consume_front("min."))
1520 // nvvm.{min,max}.{i,ii,ui,ull}
1521 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1522 Name == "ui" || Name == "ull";
1523 else if (Name.consume_front("atomic.load."))
1524 // nvvm.atomic.load.add.{f32,f64}.p
1525 // nvvm.atomic.load.{inc,dec}.32.p
1526 Expand = StringSwitch<bool>(Name)
1527 .StartsWith("add.f32.p", true)
1528 .StartsWith("add.f64.p", true)
1529 .StartsWith("inc.32.p", true)
1530 .StartsWith("dec.32.p", true)
1531 .Default(false);
1532 else if (Name.consume_front("bitcast."))
1533 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1534 Expand =
1535 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1536 else if (Name.consume_front("rotate."))
1537 // nvvm.rotate.{b32,b64,right.b64}
1538 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1539 else if (Name.consume_front("ptr.gen.to."))
1540 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1541 Expand = consumeNVVMPtrAddrSpace(Name);
1542 else if (Name.consume_front("ptr."))
1543 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1544 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1545 else if (Name.consume_front("ldg.global."))
1546 // nvvm.ldg.global.{i,p,f}
1547 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1548 Name.starts_with("p."));
1549 else
1550 Expand = StringSwitch<bool>(Name)
1551 .Case("barrier0", true)
1552 .Case("barrier.n", true)
1553 .Case("barrier.sync.cnt", true)
1554 .Case("barrier.sync", true)
1555 .Case("barrier", true)
1556 .Case("bar.sync", true)
1557 .Case("clz.ll", true)
1558 .Case("popc.ll", true)
1559 .Case("h2f", true)
1560 .Case("swap.lo.hi.b64", true)
1561 .Case("tanh.approx.f32", true)
1562 .Default(false);
1563
1564 if (Expand) {
1565 NewFn = nullptr;
1566 return true;
1567 }
1568 break; // No other 'nvvm.*'.
1569 }
1570 break;
1571 }
1572 case 'o':
1573 if (Name.starts_with("objectsize.")) {
1574 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1575 if (F->arg_size() == 2 || F->arg_size() == 3) {
1576 rename(F);
1577 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1578 Intrinsic::objectsize, Tys);
1579 return true;
1580 }
1581 }
1582 break;
1583
1584 case 'p':
1585 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1586 rename(F);
1588 F->getParent(), Intrinsic::ptr_annotation,
1589 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1590 return true;
1591 }
1592 break;
1593
1594 case 'r': {
1595 if (Name.consume_front("riscv.")) {
1598 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1599 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1600 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1601 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1604 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1605 rename(F);
1606 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1607 return true;
1608 }
1609 break; // No other applicable upgrades.
1610 }
1611
1613 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1614 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1617 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1618 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1619 rename(F);
1620 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1621 return true;
1622 }
1623 break; // No other applicable upgrades.
1624 }
1625
1627 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1628 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1629 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1630 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1631 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1632 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1635 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1636 rename(F);
1637 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1638 return true;
1639 }
1640 break; // No other applicable upgrades.
1641 }
1642 break; // No other 'riscv.*' intrinsics
1643 }
1644 } break;
1645
1646 case 's':
1647 if (Name == "stackprotectorcheck") {
1648 NewFn = nullptr;
1649 return true;
1650 }
1651 break;
1652
1653 case 't':
1654 if (Name == "thread.pointer") {
1656 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1657 return true;
1658 }
1659 break;
1660
1661 case 'v': {
1662 if (Name == "var.annotation" && F->arg_size() == 4) {
1663 rename(F);
1665 F->getParent(), Intrinsic::var_annotation,
1666 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1667 return true;
1668 }
1669 break;
1670 }
1671
1672 case 'w':
1673 if (Name.consume_front("wasm.")) {
1676 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1677 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1678 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1681 rename(F);
1682 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1683 F->getReturnType());
1684 return true;
1685 }
1686
1687 if (Name.consume_front("dot.i8x16.i7x16.")) {
1689 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1690 .Case("add.signed",
1691 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1694 rename(F);
1695 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1696 return true;
1697 }
1698 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1699 }
1700 break; // No other 'wasm.*'.
1701 }
1702 break;
1703
1704 case 'x':
1705 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1706 return true;
1707 }
1708
1709 auto *ST = dyn_cast<StructType>(F->getReturnType());
1710 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1711 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1712 // Replace return type with literal non-packed struct. Only do this for
1713 // intrinsics declared to return a struct, not for intrinsics with
1714 // overloaded return type, in which case the exact struct type will be
1715 // mangled into the name.
1718 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1719 auto *FT = F->getFunctionType();
1720 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1721 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1722 std::string Name = F->getName().str();
1723 rename(F);
1724 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1725 Name, F->getParent());
1726
1727 // The new function may also need remangling.
1728 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1729 NewFn = *Result;
1730 return true;
1731 }
1732 }
1733
1734 // Remangle our intrinsic since we upgrade the mangling
1736 if (Result != std::nullopt) {
1737 NewFn = *Result;
1738 return true;
1739 }
1740
1741 // This may not belong here. This function is effectively being overloaded
1742 // to both detect an intrinsic which needs upgrading, and to provide the
1743 // upgraded form of the intrinsic. We should perhaps have two separate
1744 // functions for this.
1745 return false;
1746}
1747
1749 bool CanUpgradeDebugIntrinsicsToRecords) {
1750 NewFn = nullptr;
1751 bool Upgraded =
1752 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1753
1754 // Upgrade intrinsic attributes. This does not change the function.
1755 if (NewFn)
1756 F = NewFn;
1757 if (Intrinsic::ID id = F->getIntrinsicID()) {
1758 // Only do this if the intrinsic signature is valid.
1759 SmallVector<Type *> OverloadTys;
1760 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1761 F->setAttributes(
1762 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1763 }
1764 return Upgraded;
1765}
1766
1768 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1769 GV->getName() == "llvm.global_dtors")) ||
1770 !GV->hasInitializer())
1771 return nullptr;
1773 if (!ATy)
1774 return nullptr;
1776 if (!STy || STy->getNumElements() != 2)
1777 return nullptr;
1778
1779 LLVMContext &C = GV->getContext();
1780 IRBuilder<> IRB(C);
1781 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1782 IRB.getPtrTy());
1783 Constant *Init = GV->getInitializer();
1784 unsigned N = Init->getNumOperands();
1785 std::vector<Constant *> NewCtors(N);
1786 for (unsigned i = 0; i != N; ++i) {
1787 auto Ctor = cast<Constant>(Init->getOperand(i));
1788 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1789 Ctor->getAggregateElement(1),
1791 }
1792 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1793
1794 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1795 NewInit, GV->getName());
1796}
1797
1798// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1799// to byte shuffles.
1801 unsigned Shift) {
1802 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1803 unsigned NumElts = ResultTy->getNumElements() * 8;
1804
1805 // Bitcast from a 64-bit element type to a byte element type.
1806 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1807 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1808
1809 // We'll be shuffling in zeroes.
1810 Value *Res = Constant::getNullValue(VecTy);
1811
1812 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1813 // we'll just return the zero vector.
1814 if (Shift < 16) {
1815 int Idxs[64];
1816 // 256/512-bit version is split into 2/4 16-byte lanes.
1817 for (unsigned l = 0; l != NumElts; l += 16)
1818 for (unsigned i = 0; i != 16; ++i) {
1819 unsigned Idx = NumElts + i - Shift;
1820 if (Idx < NumElts)
1821 Idx -= NumElts - 16; // end of lane, switch operand.
1822 Idxs[l + i] = Idx + l;
1823 }
1824
1825 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1826 }
1827
1828 // Bitcast back to a 64-bit element type.
1829 return Builder.CreateBitCast(Res, ResultTy, "cast");
1830}
1831
1832// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1833// to byte shuffles.
1835 unsigned Shift) {
1836 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1837 unsigned NumElts = ResultTy->getNumElements() * 8;
1838
1839 // Bitcast from a 64-bit element type to a byte element type.
1840 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1841 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1842
1843 // We'll be shuffling in zeroes.
1844 Value *Res = Constant::getNullValue(VecTy);
1845
1846 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1847 // we'll just return the zero vector.
1848 if (Shift < 16) {
1849 int Idxs[64];
1850 // 256/512-bit version is split into 2/4 16-byte lanes.
1851 for (unsigned l = 0; l != NumElts; l += 16)
1852 for (unsigned i = 0; i != 16; ++i) {
1853 unsigned Idx = i + Shift;
1854 if (Idx >= 16)
1855 Idx += NumElts - 16; // end of lane, switch operand.
1856 Idxs[l + i] = Idx + l;
1857 }
1858
1859 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1860 }
1861
1862 // Bitcast back to a 64-bit element type.
1863 return Builder.CreateBitCast(Res, ResultTy, "cast");
1864}
1865
1866static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1867 unsigned NumElts) {
1868 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1870 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1871 Mask = Builder.CreateBitCast(Mask, MaskTy);
1872
1873 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1874 // i8 and we need to extract down to the right number of elements.
1875 if (NumElts <= 4) {
1876 int Indices[4];
1877 for (unsigned i = 0; i != NumElts; ++i)
1878 Indices[i] = i;
1879 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1880 "extract");
1881 }
1882
1883 return Mask;
1884}
1885
1886static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1887 Value *Op1) {
1888 // If the mask is all ones just emit the first operation.
1889 if (const auto *C = dyn_cast<Constant>(Mask))
1890 if (C->isAllOnesValue())
1891 return Op0;
1892
1893 Mask = getX86MaskVec(Builder, Mask,
1894 cast<FixedVectorType>(Op0->getType())->getNumElements());
1895 return Builder.CreateSelect(Mask, Op0, Op1);
1896}
1897
1898static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1899 Value *Op1) {
1900 // If the mask is all ones just emit the first operation.
1901 if (const auto *C = dyn_cast<Constant>(Mask))
1902 if (C->isAllOnesValue())
1903 return Op0;
1904
1905 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1906 Mask->getType()->getIntegerBitWidth());
1907 Mask = Builder.CreateBitCast(Mask, MaskTy);
1908 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1909 return Builder.CreateSelect(Mask, Op0, Op1);
1910}
1911
1912// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1913// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1914// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1916 Value *Op1, Value *Shift,
1917 Value *Passthru, Value *Mask,
1918 bool IsVALIGN) {
1919 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1920
1921 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1922 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1923 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1924 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1925
1926 // Mask the immediate for VALIGN.
1927 if (IsVALIGN)
1928 ShiftVal &= (NumElts - 1);
1929
1930 // If palignr is shifting the pair of vectors more than the size of two
1931 // lanes, emit zero.
1932 if (ShiftVal >= 32)
1934
1935 // If palignr is shifting the pair of input vectors more than one lane,
1936 // but less than two lanes, convert to shifting in zeroes.
1937 if (ShiftVal > 16) {
1938 ShiftVal -= 16;
1939 Op1 = Op0;
1941 }
1942
1943 int Indices[64];
1944 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1945 for (unsigned l = 0; l < NumElts; l += 16) {
1946 for (unsigned i = 0; i != 16; ++i) {
1947 unsigned Idx = ShiftVal + i;
1948 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1949 Idx += NumElts - 16; // End of lane, switch operand.
1950 Indices[l + i] = Idx + l;
1951 }
1952 }
1953
1954 Value *Align = Builder.CreateShuffleVector(
1955 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1956
1957 return emitX86Select(Builder, Mask, Align, Passthru);
1958}
1959
1961 bool ZeroMask, bool IndexForm) {
1962 Type *Ty = CI.getType();
1963 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1964 unsigned EltWidth = Ty->getScalarSizeInBits();
1965 bool IsFloat = Ty->isFPOrFPVectorTy();
1966 Intrinsic::ID IID;
1967 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1968 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1969 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1970 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1971 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1972 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1973 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1974 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1975 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1976 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1977 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1978 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1979 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1980 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1981 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1982 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1983 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1984 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1985 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1986 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1987 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1988 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1989 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1990 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1991 else if (VecWidth == 128 && EltWidth == 16)
1992 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1993 else if (VecWidth == 256 && EltWidth == 16)
1994 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1995 else if (VecWidth == 512 && EltWidth == 16)
1996 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1997 else if (VecWidth == 128 && EltWidth == 8)
1998 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1999 else if (VecWidth == 256 && EltWidth == 8)
2000 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2001 else if (VecWidth == 512 && EltWidth == 8)
2002 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2003 else
2004 llvm_unreachable("Unexpected intrinsic");
2005
2006 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2007 CI.getArgOperand(2) };
2008
2009 // If this isn't index form we need to swap operand 0 and 1.
2010 if (!IndexForm)
2011 std::swap(Args[0], Args[1]);
2012
2013 Value *V = Builder.CreateIntrinsic(IID, Args);
2014 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2015 : Builder.CreateBitCast(CI.getArgOperand(1),
2016 Ty);
2017 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2018}
2019
2021 Intrinsic::ID IID) {
2022 Type *Ty = CI.getType();
2023 Value *Op0 = CI.getOperand(0);
2024 Value *Op1 = CI.getOperand(1);
2025 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2026
2027 if (CI.arg_size() == 4) { // For masked intrinsics.
2028 Value *VecSrc = CI.getOperand(2);
2029 Value *Mask = CI.getOperand(3);
2030 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2031 }
2032 return Res;
2033}
2034
2036 bool IsRotateRight) {
2037 Type *Ty = CI.getType();
2038 Value *Src = CI.getArgOperand(0);
2039 Value *Amt = CI.getArgOperand(1);
2040
2041 // Amount may be scalar immediate, in which case create a splat vector.
2042 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2043 // we only care about the lowest log2 bits anyway.
2044 if (Amt->getType() != Ty) {
2045 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2046 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2047 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2048 }
2049
2050 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2051 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2052
2053 if (CI.arg_size() == 4) { // For masked intrinsics.
2054 Value *VecSrc = CI.getOperand(2);
2055 Value *Mask = CI.getOperand(3);
2056 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2057 }
2058 return Res;
2059}
2060
2061static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2062 bool IsSigned) {
2063 Type *Ty = CI.getType();
2064 Value *LHS = CI.getArgOperand(0);
2065 Value *RHS = CI.getArgOperand(1);
2066
2067 CmpInst::Predicate Pred;
2068 switch (Imm) {
2069 case 0x0:
2070 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2071 break;
2072 case 0x1:
2073 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2074 break;
2075 case 0x2:
2076 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2077 break;
2078 case 0x3:
2079 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2080 break;
2081 case 0x4:
2082 Pred = ICmpInst::ICMP_EQ;
2083 break;
2084 case 0x5:
2085 Pred = ICmpInst::ICMP_NE;
2086 break;
2087 case 0x6:
2088 return Constant::getNullValue(Ty); // FALSE
2089 case 0x7:
2090 return Constant::getAllOnesValue(Ty); // TRUE
2091 default:
2092 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2093 }
2094
2095 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2096 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2097 return Ext;
2098}
2099
2101 bool IsShiftRight, bool ZeroMask) {
2102 Type *Ty = CI.getType();
2103 Value *Op0 = CI.getArgOperand(0);
2104 Value *Op1 = CI.getArgOperand(1);
2105 Value *Amt = CI.getArgOperand(2);
2106
2107 if (IsShiftRight)
2108 std::swap(Op0, Op1);
2109
2110 // Amount may be scalar immediate, in which case create a splat vector.
2111 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2112 // we only care about the lowest log2 bits anyway.
2113 if (Amt->getType() != Ty) {
2114 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2115 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2116 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2117 }
2118
2119 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2120 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2121
2122 unsigned NumArgs = CI.arg_size();
2123 if (NumArgs >= 4) { // For masked intrinsics.
2124 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2125 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2126 CI.getArgOperand(0);
2127 Value *Mask = CI.getOperand(NumArgs - 1);
2128 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2129 }
2130 return Res;
2131}
2132
2134 Value *Mask, bool Aligned) {
2135 const Align Alignment =
2136 Aligned
2137 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2138 : Align(1);
2139
2140 // If the mask is all ones just emit a regular store.
2141 if (const auto *C = dyn_cast<Constant>(Mask))
2142 if (C->isAllOnesValue())
2143 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2144
2145 // Convert the mask from an integer type to a vector of i1.
2146 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2147 Mask = getX86MaskVec(Builder, Mask, NumElts);
2148 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2149}
2150
2152 Value *Passthru, Value *Mask, bool Aligned) {
2153 Type *ValTy = Passthru->getType();
2154 const Align Alignment =
2155 Aligned
2156 ? Align(
2158 8)
2159 : Align(1);
2160
2161 // If the mask is all ones just emit a regular store.
2162 if (const auto *C = dyn_cast<Constant>(Mask))
2163 if (C->isAllOnesValue())
2164 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2165
2166 // Convert the mask from an integer type to a vector of i1.
2167 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2168 Mask = getX86MaskVec(Builder, Mask, NumElts);
2169 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2170}
2171
2172static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2173 Type *Ty = CI.getType();
2174 Value *Op0 = CI.getArgOperand(0);
2175 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2176 {Op0, Builder.getInt1(false)});
2177 if (CI.arg_size() == 3)
2178 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2179 return Res;
2180}
2181
2182static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2183 Type *Ty = CI.getType();
2184
2185 // Arguments have a vXi32 type so cast to vXi64.
2186 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2187 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2188
2189 if (IsSigned) {
2190 // Shift left then arithmetic shift right.
2191 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2192 LHS = Builder.CreateShl(LHS, ShiftAmt);
2193 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2194 RHS = Builder.CreateShl(RHS, ShiftAmt);
2195 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2196 } else {
2197 // Clear the upper bits.
2198 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2199 LHS = Builder.CreateAnd(LHS, Mask);
2200 RHS = Builder.CreateAnd(RHS, Mask);
2201 }
2202
2203 Value *Res = Builder.CreateMul(LHS, RHS);
2204
2205 if (CI.arg_size() == 4)
2206 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2207
2208 return Res;
2209}
2210
2211// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2213 Value *Mask) {
2214 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2215 if (Mask) {
2216 const auto *C = dyn_cast<Constant>(Mask);
2217 if (!C || !C->isAllOnesValue())
2218 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2219 }
2220
2221 if (NumElts < 8) {
2222 int Indices[8];
2223 for (unsigned i = 0; i != NumElts; ++i)
2224 Indices[i] = i;
2225 for (unsigned i = NumElts; i != 8; ++i)
2226 Indices[i] = NumElts + i % NumElts;
2227 Vec = Builder.CreateShuffleVector(Vec,
2229 Indices);
2230 }
2231 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2232}
2233
2235 unsigned CC, bool Signed) {
2236 Value *Op0 = CI.getArgOperand(0);
2237 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2238
2239 Value *Cmp;
2240 if (CC == 3) {
2242 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2243 } else if (CC == 7) {
2245 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2246 } else {
2248 switch (CC) {
2249 default: llvm_unreachable("Unknown condition code");
2250 case 0: Pred = ICmpInst::ICMP_EQ; break;
2251 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2252 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2253 case 4: Pred = ICmpInst::ICMP_NE; break;
2254 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2255 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2256 }
2257 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2258 }
2259
2260 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2261
2262 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2263}
2264
2265// Replace a masked intrinsic with an older unmasked intrinsic.
2267 Intrinsic::ID IID) {
2268 Value *Rep =
2269 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2270 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2271}
2272
2274 Value* A = CI.getArgOperand(0);
2275 Value* B = CI.getArgOperand(1);
2276 Value* Src = CI.getArgOperand(2);
2277 Value* Mask = CI.getArgOperand(3);
2278
2279 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2280 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2281 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2282 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2283 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2284 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2285}
2286
2288 Value* Op = CI.getArgOperand(0);
2289 Type* ReturnOp = CI.getType();
2290 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2291 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2292 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2293}
2294
2295// Replace intrinsic with unmasked version and a select.
2297 CallBase &CI, Value *&Rep) {
2298 Name = Name.substr(12); // Remove avx512.mask.
2299
2300 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2301 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2302 Intrinsic::ID IID;
2303 if (Name.starts_with("max.p")) {
2304 if (VecWidth == 128 && EltWidth == 32)
2305 IID = Intrinsic::x86_sse_max_ps;
2306 else if (VecWidth == 128 && EltWidth == 64)
2307 IID = Intrinsic::x86_sse2_max_pd;
2308 else if (VecWidth == 256 && EltWidth == 32)
2309 IID = Intrinsic::x86_avx_max_ps_256;
2310 else if (VecWidth == 256 && EltWidth == 64)
2311 IID = Intrinsic::x86_avx_max_pd_256;
2312 else
2313 llvm_unreachable("Unexpected intrinsic");
2314 } else if (Name.starts_with("min.p")) {
2315 if (VecWidth == 128 && EltWidth == 32)
2316 IID = Intrinsic::x86_sse_min_ps;
2317 else if (VecWidth == 128 && EltWidth == 64)
2318 IID = Intrinsic::x86_sse2_min_pd;
2319 else if (VecWidth == 256 && EltWidth == 32)
2320 IID = Intrinsic::x86_avx_min_ps_256;
2321 else if (VecWidth == 256 && EltWidth == 64)
2322 IID = Intrinsic::x86_avx_min_pd_256;
2323 else
2324 llvm_unreachable("Unexpected intrinsic");
2325 } else if (Name.starts_with("pshuf.b.")) {
2326 if (VecWidth == 128)
2327 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2328 else if (VecWidth == 256)
2329 IID = Intrinsic::x86_avx2_pshuf_b;
2330 else if (VecWidth == 512)
2331 IID = Intrinsic::x86_avx512_pshuf_b_512;
2332 else
2333 llvm_unreachable("Unexpected intrinsic");
2334 } else if (Name.starts_with("pmul.hr.sw.")) {
2335 if (VecWidth == 128)
2336 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2337 else if (VecWidth == 256)
2338 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2339 else if (VecWidth == 512)
2340 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2341 else
2342 llvm_unreachable("Unexpected intrinsic");
2343 } else if (Name.starts_with("pmulh.w.")) {
2344 if (VecWidth == 128)
2345 IID = Intrinsic::x86_sse2_pmulh_w;
2346 else if (VecWidth == 256)
2347 IID = Intrinsic::x86_avx2_pmulh_w;
2348 else if (VecWidth == 512)
2349 IID = Intrinsic::x86_avx512_pmulh_w_512;
2350 else
2351 llvm_unreachable("Unexpected intrinsic");
2352 } else if (Name.starts_with("pmulhu.w.")) {
2353 if (VecWidth == 128)
2354 IID = Intrinsic::x86_sse2_pmulhu_w;
2355 else if (VecWidth == 256)
2356 IID = Intrinsic::x86_avx2_pmulhu_w;
2357 else if (VecWidth == 512)
2358 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2359 else
2360 llvm_unreachable("Unexpected intrinsic");
2361 } else if (Name.starts_with("pmaddw.d.")) {
2362 if (VecWidth == 128)
2363 IID = Intrinsic::x86_sse2_pmadd_wd;
2364 else if (VecWidth == 256)
2365 IID = Intrinsic::x86_avx2_pmadd_wd;
2366 else if (VecWidth == 512)
2367 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2368 else
2369 llvm_unreachable("Unexpected intrinsic");
2370 } else if (Name.starts_with("pmaddubs.w.")) {
2371 if (VecWidth == 128)
2372 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2373 else if (VecWidth == 256)
2374 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2375 else if (VecWidth == 512)
2376 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2377 else
2378 llvm_unreachable("Unexpected intrinsic");
2379 } else if (Name.starts_with("packsswb.")) {
2380 if (VecWidth == 128)
2381 IID = Intrinsic::x86_sse2_packsswb_128;
2382 else if (VecWidth == 256)
2383 IID = Intrinsic::x86_avx2_packsswb;
2384 else if (VecWidth == 512)
2385 IID = Intrinsic::x86_avx512_packsswb_512;
2386 else
2387 llvm_unreachable("Unexpected intrinsic");
2388 } else if (Name.starts_with("packssdw.")) {
2389 if (VecWidth == 128)
2390 IID = Intrinsic::x86_sse2_packssdw_128;
2391 else if (VecWidth == 256)
2392 IID = Intrinsic::x86_avx2_packssdw;
2393 else if (VecWidth == 512)
2394 IID = Intrinsic::x86_avx512_packssdw_512;
2395 else
2396 llvm_unreachable("Unexpected intrinsic");
2397 } else if (Name.starts_with("packuswb.")) {
2398 if (VecWidth == 128)
2399 IID = Intrinsic::x86_sse2_packuswb_128;
2400 else if (VecWidth == 256)
2401 IID = Intrinsic::x86_avx2_packuswb;
2402 else if (VecWidth == 512)
2403 IID = Intrinsic::x86_avx512_packuswb_512;
2404 else
2405 llvm_unreachable("Unexpected intrinsic");
2406 } else if (Name.starts_with("packusdw.")) {
2407 if (VecWidth == 128)
2408 IID = Intrinsic::x86_sse41_packusdw;
2409 else if (VecWidth == 256)
2410 IID = Intrinsic::x86_avx2_packusdw;
2411 else if (VecWidth == 512)
2412 IID = Intrinsic::x86_avx512_packusdw_512;
2413 else
2414 llvm_unreachable("Unexpected intrinsic");
2415 } else if (Name.starts_with("vpermilvar.")) {
2416 if (VecWidth == 128 && EltWidth == 32)
2417 IID = Intrinsic::x86_avx_vpermilvar_ps;
2418 else if (VecWidth == 128 && EltWidth == 64)
2419 IID = Intrinsic::x86_avx_vpermilvar_pd;
2420 else if (VecWidth == 256 && EltWidth == 32)
2421 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2422 else if (VecWidth == 256 && EltWidth == 64)
2423 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2424 else if (VecWidth == 512 && EltWidth == 32)
2425 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2426 else if (VecWidth == 512 && EltWidth == 64)
2427 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2428 else
2429 llvm_unreachable("Unexpected intrinsic");
2430 } else if (Name == "cvtpd2dq.256") {
2431 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2432 } else if (Name == "cvtpd2ps.256") {
2433 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2434 } else if (Name == "cvttpd2dq.256") {
2435 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2436 } else if (Name == "cvttps2dq.128") {
2437 IID = Intrinsic::x86_sse2_cvttps2dq;
2438 } else if (Name == "cvttps2dq.256") {
2439 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2440 } else if (Name.starts_with("permvar.")) {
2441 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2442 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2443 IID = Intrinsic::x86_avx2_permps;
2444 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2445 IID = Intrinsic::x86_avx2_permd;
2446 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2447 IID = Intrinsic::x86_avx512_permvar_df_256;
2448 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2449 IID = Intrinsic::x86_avx512_permvar_di_256;
2450 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2451 IID = Intrinsic::x86_avx512_permvar_sf_512;
2452 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2453 IID = Intrinsic::x86_avx512_permvar_si_512;
2454 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2455 IID = Intrinsic::x86_avx512_permvar_df_512;
2456 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2457 IID = Intrinsic::x86_avx512_permvar_di_512;
2458 else if (VecWidth == 128 && EltWidth == 16)
2459 IID = Intrinsic::x86_avx512_permvar_hi_128;
2460 else if (VecWidth == 256 && EltWidth == 16)
2461 IID = Intrinsic::x86_avx512_permvar_hi_256;
2462 else if (VecWidth == 512 && EltWidth == 16)
2463 IID = Intrinsic::x86_avx512_permvar_hi_512;
2464 else if (VecWidth == 128 && EltWidth == 8)
2465 IID = Intrinsic::x86_avx512_permvar_qi_128;
2466 else if (VecWidth == 256 && EltWidth == 8)
2467 IID = Intrinsic::x86_avx512_permvar_qi_256;
2468 else if (VecWidth == 512 && EltWidth == 8)
2469 IID = Intrinsic::x86_avx512_permvar_qi_512;
2470 else
2471 llvm_unreachable("Unexpected intrinsic");
2472 } else if (Name.starts_with("dbpsadbw.")) {
2473 if (VecWidth == 128)
2474 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2475 else if (VecWidth == 256)
2476 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2477 else if (VecWidth == 512)
2478 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2479 else
2480 llvm_unreachable("Unexpected intrinsic");
2481 } else if (Name.starts_with("pmultishift.qb.")) {
2482 if (VecWidth == 128)
2483 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2484 else if (VecWidth == 256)
2485 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2486 else if (VecWidth == 512)
2487 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2488 else
2489 llvm_unreachable("Unexpected intrinsic");
2490 } else if (Name.starts_with("conflict.")) {
2491 if (Name[9] == 'd' && VecWidth == 128)
2492 IID = Intrinsic::x86_avx512_conflict_d_128;
2493 else if (Name[9] == 'd' && VecWidth == 256)
2494 IID = Intrinsic::x86_avx512_conflict_d_256;
2495 else if (Name[9] == 'd' && VecWidth == 512)
2496 IID = Intrinsic::x86_avx512_conflict_d_512;
2497 else if (Name[9] == 'q' && VecWidth == 128)
2498 IID = Intrinsic::x86_avx512_conflict_q_128;
2499 else if (Name[9] == 'q' && VecWidth == 256)
2500 IID = Intrinsic::x86_avx512_conflict_q_256;
2501 else if (Name[9] == 'q' && VecWidth == 512)
2502 IID = Intrinsic::x86_avx512_conflict_q_512;
2503 else
2504 llvm_unreachable("Unexpected intrinsic");
2505 } else if (Name.starts_with("pavg.")) {
2506 if (Name[5] == 'b' && VecWidth == 128)
2507 IID = Intrinsic::x86_sse2_pavg_b;
2508 else if (Name[5] == 'b' && VecWidth == 256)
2509 IID = Intrinsic::x86_avx2_pavg_b;
2510 else if (Name[5] == 'b' && VecWidth == 512)
2511 IID = Intrinsic::x86_avx512_pavg_b_512;
2512 else if (Name[5] == 'w' && VecWidth == 128)
2513 IID = Intrinsic::x86_sse2_pavg_w;
2514 else if (Name[5] == 'w' && VecWidth == 256)
2515 IID = Intrinsic::x86_avx2_pavg_w;
2516 else if (Name[5] == 'w' && VecWidth == 512)
2517 IID = Intrinsic::x86_avx512_pavg_w_512;
2518 else
2519 llvm_unreachable("Unexpected intrinsic");
2520 } else
2521 return false;
2522
2523 SmallVector<Value *, 4> Args(CI.args());
2524 Args.pop_back();
2525 Args.pop_back();
2526 Rep = Builder.CreateIntrinsic(IID, Args);
2527 unsigned NumArgs = CI.arg_size();
2528 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2529 CI.getArgOperand(NumArgs - 2));
2530 return true;
2531}
2532
2533/// Upgrade comment in call to inline asm that represents an objc retain release
2534/// marker.
2535void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2536 size_t Pos;
2537 if (AsmStr->find("mov\tfp") == 0 &&
2538 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2539 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2540 AsmStr->replace(Pos, 1, ";");
2541 }
2542}
2543
2545 Function *F, IRBuilder<> &Builder) {
2546 Value *Rep = nullptr;
2547
2548 if (Name == "abs.i" || Name == "abs.ll") {
2549 Value *Arg = CI->getArgOperand(0);
2550 Value *Neg = Builder.CreateNeg(Arg, "neg");
2551 Value *Cmp = Builder.CreateICmpSGE(
2552 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2553 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2554 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2555 Type *Ty = (Name == "abs.bf16")
2556 ? Builder.getBFloatTy()
2557 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2558 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2559 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2560 Rep = Builder.CreateBitCast(Abs, CI->getType());
2561 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2562 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2563 : Intrinsic::nvvm_fabs;
2564 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2565 } else if (Name.consume_front("ex2.approx.")) {
2566 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2567 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2568 : Intrinsic::nvvm_ex2_approx;
2569 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2570 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2571 Name.starts_with("atomic.load.add.f64.p")) {
2572 Value *Ptr = CI->getArgOperand(0);
2573 Value *Val = CI->getArgOperand(1);
2574 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2576 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2577 Name.starts_with("atomic.load.dec.32.p")) {
2578 Value *Ptr = CI->getArgOperand(0);
2579 Value *Val = CI->getArgOperand(1);
2580 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2582 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2584 } else if (Name.consume_front("max.") &&
2585 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2586 Name == "ui" || Name == "ull")) {
2587 Value *Arg0 = CI->getArgOperand(0);
2588 Value *Arg1 = CI->getArgOperand(1);
2589 Value *Cmp = Name.starts_with("u")
2590 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2591 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2592 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2593 } else if (Name.consume_front("min.") &&
2594 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2595 Name == "ui" || Name == "ull")) {
2596 Value *Arg0 = CI->getArgOperand(0);
2597 Value *Arg1 = CI->getArgOperand(1);
2598 Value *Cmp = Name.starts_with("u")
2599 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2600 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2601 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2602 } else if (Name == "clz.ll") {
2603 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2604 Value *Arg = CI->getArgOperand(0);
2605 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2606 {Arg, Builder.getFalse()},
2607 /*FMFSource=*/nullptr, "ctlz");
2608 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2609 } else if (Name == "popc.ll") {
2610 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2611 // i64.
2612 Value *Arg = CI->getArgOperand(0);
2613 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2614 Arg, /*FMFSource=*/nullptr, "ctpop");
2615 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2616 } else if (Name == "h2f") {
2617 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2618 {Builder.getFloatTy()}, CI->getArgOperand(0),
2619 /*FMFSource=*/nullptr, "h2f");
2620 } else if (Name.consume_front("bitcast.") &&
2621 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2622 Name == "d2ll")) {
2623 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2624 } else if (Name == "rotate.b32") {
2625 Value *Arg = CI->getOperand(0);
2626 Value *ShiftAmt = CI->getOperand(1);
2627 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2628 {Arg, Arg, ShiftAmt});
2629 } else if (Name == "rotate.b64") {
2630 Type *Int64Ty = Builder.getInt64Ty();
2631 Value *Arg = CI->getOperand(0);
2632 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2633 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2634 {Arg, Arg, ZExtShiftAmt});
2635 } else if (Name == "rotate.right.b64") {
2636 Type *Int64Ty = Builder.getInt64Ty();
2637 Value *Arg = CI->getOperand(0);
2638 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2639 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2640 {Arg, Arg, ZExtShiftAmt});
2641 } else if (Name == "swap.lo.hi.b64") {
2642 Type *Int64Ty = Builder.getInt64Ty();
2643 Value *Arg = CI->getOperand(0);
2644 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2645 {Arg, Arg, Builder.getInt64(32)});
2646 } else if ((Name.consume_front("ptr.gen.to.") &&
2647 consumeNVVMPtrAddrSpace(Name)) ||
2648 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2649 Name.starts_with(".to.gen"))) {
2650 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2651 } else if (Name.consume_front("ldg.global")) {
2652 Value *Ptr = CI->getArgOperand(0);
2653 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2654 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2655 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2656 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2657 MDNode *MD = MDNode::get(Builder.getContext(), {});
2658 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2659 return LD;
2660 } else if (Name == "tanh.approx.f32") {
2661 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2662 FastMathFlags FMF;
2663 FMF.setApproxFunc();
2664 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2665 FMF);
2666 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2667 Value *Arg =
2668 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2669 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2670 {}, {Arg});
2671 } else if (Name == "barrier") {
2672 Rep = Builder.CreateIntrinsic(
2673 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2674 {CI->getArgOperand(0), CI->getArgOperand(1)});
2675 } else if (Name == "barrier.sync") {
2676 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2677 {CI->getArgOperand(0)});
2678 } else if (Name == "barrier.sync.cnt") {
2679 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2680 {CI->getArgOperand(0), CI->getArgOperand(1)});
2681 } else {
2683 if (IID != Intrinsic::not_intrinsic &&
2684 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2685 rename(F);
2686 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2688 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2689 Value *Arg = CI->getArgOperand(I);
2690 Type *OldType = Arg->getType();
2691 Type *NewType = NewFn->getArg(I)->getType();
2692 Args.push_back(
2693 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2694 ? Builder.CreateBitCast(Arg, NewType)
2695 : Arg);
2696 }
2697 Rep = Builder.CreateCall(NewFn, Args);
2698 if (F->getReturnType()->isIntegerTy())
2699 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2700 }
2701 }
2702
2703 return Rep;
2704}
2705
2707 IRBuilder<> &Builder) {
2708 LLVMContext &C = F->getContext();
2709 Value *Rep = nullptr;
2710
2711 if (Name.starts_with("sse4a.movnt.")) {
2713 Elts.push_back(
2714 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2715 MDNode *Node = MDNode::get(C, Elts);
2716
2717 Value *Arg0 = CI->getArgOperand(0);
2718 Value *Arg1 = CI->getArgOperand(1);
2719
2720 // Nontemporal (unaligned) store of the 0'th element of the float/double
2721 // vector.
2722 Value *Extract =
2723 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2724
2725 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2726 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2727 } else if (Name.starts_with("avx.movnt.") ||
2728 Name.starts_with("avx512.storent.")) {
2730 Elts.push_back(
2731 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2732 MDNode *Node = MDNode::get(C, Elts);
2733
2734 Value *Arg0 = CI->getArgOperand(0);
2735 Value *Arg1 = CI->getArgOperand(1);
2736
2737 StoreInst *SI = Builder.CreateAlignedStore(
2738 Arg1, Arg0,
2740 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2741 } else if (Name == "sse2.storel.dq") {
2742 Value *Arg0 = CI->getArgOperand(0);
2743 Value *Arg1 = CI->getArgOperand(1);
2744
2745 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2746 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2747 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2748 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2749 } else if (Name.starts_with("sse.storeu.") ||
2750 Name.starts_with("sse2.storeu.") ||
2751 Name.starts_with("avx.storeu.")) {
2752 Value *Arg0 = CI->getArgOperand(0);
2753 Value *Arg1 = CI->getArgOperand(1);
2754 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2755 } else if (Name == "avx512.mask.store.ss") {
2756 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2757 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2758 Mask, false);
2759 } else if (Name.starts_with("avx512.mask.store")) {
2760 // "avx512.mask.storeu." or "avx512.mask.store."
2761 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2762 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2763 CI->getArgOperand(2), Aligned);
2764 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2765 // Upgrade packed integer vector compare intrinsics to compare instructions.
2766 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2767 bool CmpEq = Name[9] == 'e';
2768 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2769 CI->getArgOperand(0), CI->getArgOperand(1));
2770 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2771 } else if (Name.starts_with("avx512.broadcastm")) {
2772 Type *ExtTy = Type::getInt32Ty(C);
2773 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2774 ExtTy = Type::getInt64Ty(C);
2775 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2776 ExtTy->getPrimitiveSizeInBits();
2777 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2778 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2779 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2780 Value *Vec = CI->getArgOperand(0);
2781 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2782 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2783 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2784 } else if (Name.starts_with("avx.sqrt.p") ||
2785 Name.starts_with("sse2.sqrt.p") ||
2786 Name.starts_with("sse.sqrt.p")) {
2787 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2788 {CI->getArgOperand(0)});
2789 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2790 if (CI->arg_size() == 4 &&
2791 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2792 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2793 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2794 : Intrinsic::x86_avx512_sqrt_pd_512;
2795
2796 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2797 Rep = Builder.CreateIntrinsic(IID, Args);
2798 } else {
2799 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2800 {CI->getArgOperand(0)});
2801 }
2802 Rep =
2803 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2804 } else if (Name.starts_with("avx512.ptestm") ||
2805 Name.starts_with("avx512.ptestnm")) {
2806 Value *Op0 = CI->getArgOperand(0);
2807 Value *Op1 = CI->getArgOperand(1);
2808 Value *Mask = CI->getArgOperand(2);
2809 Rep = Builder.CreateAnd(Op0, Op1);
2810 llvm::Type *Ty = Op0->getType();
2812 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2815 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2816 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2817 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2818 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2819 ->getNumElements();
2820 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2821 Rep =
2822 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2823 } else if (Name.starts_with("avx512.kunpck")) {
2824 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2825 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2826 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2827 int Indices[64];
2828 for (unsigned i = 0; i != NumElts; ++i)
2829 Indices[i] = i;
2830
2831 // First extract half of each vector. This gives better codegen than
2832 // doing it in a single shuffle.
2833 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2834 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2835 // Concat the vectors.
2836 // NOTE: Operands have to be swapped to match intrinsic definition.
2837 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2838 Rep = Builder.CreateBitCast(Rep, CI->getType());
2839 } else if (Name == "avx512.kand.w") {
2840 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2841 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2842 Rep = Builder.CreateAnd(LHS, RHS);
2843 Rep = Builder.CreateBitCast(Rep, CI->getType());
2844 } else if (Name == "avx512.kandn.w") {
2845 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2846 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2847 LHS = Builder.CreateNot(LHS);
2848 Rep = Builder.CreateAnd(LHS, RHS);
2849 Rep = Builder.CreateBitCast(Rep, CI->getType());
2850 } else if (Name == "avx512.kor.w") {
2851 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2852 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2853 Rep = Builder.CreateOr(LHS, RHS);
2854 Rep = Builder.CreateBitCast(Rep, CI->getType());
2855 } else if (Name == "avx512.kxor.w") {
2856 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2857 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2858 Rep = Builder.CreateXor(LHS, RHS);
2859 Rep = Builder.CreateBitCast(Rep, CI->getType());
2860 } else if (Name == "avx512.kxnor.w") {
2861 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2862 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2863 LHS = Builder.CreateNot(LHS);
2864 Rep = Builder.CreateXor(LHS, RHS);
2865 Rep = Builder.CreateBitCast(Rep, CI->getType());
2866 } else if (Name == "avx512.knot.w") {
2867 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2868 Rep = Builder.CreateNot(Rep);
2869 Rep = Builder.CreateBitCast(Rep, CI->getType());
2870 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2871 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2872 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2873 Rep = Builder.CreateOr(LHS, RHS);
2874 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2875 Value *C;
2876 if (Name[14] == 'c')
2877 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2878 else
2879 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2880 Rep = Builder.CreateICmpEQ(Rep, C);
2881 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2882 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2883 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2884 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2885 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2886 Type *I32Ty = Type::getInt32Ty(C);
2887 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2888 ConstantInt::get(I32Ty, 0));
2889 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2890 ConstantInt::get(I32Ty, 0));
2891 Value *EltOp;
2892 if (Name.contains(".add."))
2893 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2894 else if (Name.contains(".sub."))
2895 EltOp = Builder.CreateFSub(Elt0, Elt1);
2896 else if (Name.contains(".mul."))
2897 EltOp = Builder.CreateFMul(Elt0, Elt1);
2898 else
2899 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2900 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2901 ConstantInt::get(I32Ty, 0));
2902 } else if (Name.starts_with("avx512.mask.pcmp")) {
2903 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2904 bool CmpEq = Name[16] == 'e';
2905 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2906 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2907 Type *OpTy = CI->getArgOperand(0)->getType();
2908 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2909 Intrinsic::ID IID;
2910 switch (VecWidth) {
2911 default:
2912 llvm_unreachable("Unexpected intrinsic");
2913 case 128:
2914 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2915 break;
2916 case 256:
2917 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2918 break;
2919 case 512:
2920 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2921 break;
2922 }
2923
2924 Rep =
2925 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2926 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2927 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2928 Type *OpTy = CI->getArgOperand(0)->getType();
2929 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2930 unsigned EltWidth = OpTy->getScalarSizeInBits();
2931 Intrinsic::ID IID;
2932 if (VecWidth == 128 && EltWidth == 32)
2933 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2934 else if (VecWidth == 256 && EltWidth == 32)
2935 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2936 else if (VecWidth == 512 && EltWidth == 32)
2937 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2938 else if (VecWidth == 128 && EltWidth == 64)
2939 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2940 else if (VecWidth == 256 && EltWidth == 64)
2941 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2942 else if (VecWidth == 512 && EltWidth == 64)
2943 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2944 else
2945 llvm_unreachable("Unexpected intrinsic");
2946
2947 Rep =
2948 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2949 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2950 } else if (Name.starts_with("avx512.cmp.p")) {
2951 SmallVector<Value *, 4> Args(CI->args());
2952 Type *OpTy = Args[0]->getType();
2953 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2954 unsigned EltWidth = OpTy->getScalarSizeInBits();
2955 Intrinsic::ID IID;
2956 if (VecWidth == 128 && EltWidth == 32)
2957 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2958 else if (VecWidth == 256 && EltWidth == 32)
2959 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2960 else if (VecWidth == 512 && EltWidth == 32)
2961 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2962 else if (VecWidth == 128 && EltWidth == 64)
2963 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2964 else if (VecWidth == 256 && EltWidth == 64)
2965 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2966 else if (VecWidth == 512 && EltWidth == 64)
2967 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2968 else
2969 llvm_unreachable("Unexpected intrinsic");
2970
2972 if (VecWidth == 512)
2973 std::swap(Mask, Args.back());
2974 Args.push_back(Mask);
2975
2976 Rep = Builder.CreateIntrinsic(IID, Args);
2977 } else if (Name.starts_with("avx512.mask.cmp.")) {
2978 // Integer compare intrinsics.
2979 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2980 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2981 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2982 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2983 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2984 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2985 Name.starts_with("avx512.cvtw2mask.") ||
2986 Name.starts_with("avx512.cvtd2mask.") ||
2987 Name.starts_with("avx512.cvtq2mask.")) {
2988 Value *Op = CI->getArgOperand(0);
2989 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2990 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2991 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2992 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2993 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2994 Name.starts_with("avx512.mask.pabs")) {
2995 Rep = upgradeAbs(Builder, *CI);
2996 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2997 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2998 Name.starts_with("avx512.mask.pmaxs")) {
2999 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3000 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3001 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3002 Name.starts_with("avx512.mask.pmaxu")) {
3003 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3004 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3005 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3006 Name.starts_with("avx512.mask.pmins")) {
3007 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3008 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3009 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3010 Name.starts_with("avx512.mask.pminu")) {
3011 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3012 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3013 Name == "avx512.pmulu.dq.512" ||
3014 Name.starts_with("avx512.mask.pmulu.dq.")) {
3015 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3016 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3017 Name == "avx512.pmul.dq.512" ||
3018 Name.starts_with("avx512.mask.pmul.dq.")) {
3019 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3020 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3021 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3022 Rep =
3023 Builder.CreateSIToFP(CI->getArgOperand(1),
3024 cast<VectorType>(CI->getType())->getElementType());
3025 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3026 } else if (Name == "avx512.cvtusi2sd") {
3027 Rep =
3028 Builder.CreateUIToFP(CI->getArgOperand(1),
3029 cast<VectorType>(CI->getType())->getElementType());
3030 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3031 } else if (Name == "sse2.cvtss2sd") {
3032 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3033 Rep = Builder.CreateFPExt(
3034 Rep, cast<VectorType>(CI->getType())->getElementType());
3035 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3036 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3037 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3038 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3039 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3040 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3041 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3042 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3043 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3044 Name == "avx512.mask.cvtqq2ps.256" ||
3045 Name == "avx512.mask.cvtqq2ps.512" ||
3046 Name == "avx512.mask.cvtuqq2ps.256" ||
3047 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3048 Name == "avx.cvt.ps2.pd.256" ||
3049 Name == "avx512.mask.cvtps2pd.128" ||
3050 Name == "avx512.mask.cvtps2pd.256") {
3051 auto *DstTy = cast<FixedVectorType>(CI->getType());
3052 Rep = CI->getArgOperand(0);
3053 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3054
3055 unsigned NumDstElts = DstTy->getNumElements();
3056 if (NumDstElts < SrcTy->getNumElements()) {
3057 assert(NumDstElts == 2 && "Unexpected vector size");
3058 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3059 }
3060
3061 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3062 bool IsUnsigned = Name.contains("cvtu");
3063 if (IsPS2PD)
3064 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3065 else if (CI->arg_size() == 4 &&
3066 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3067 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3068 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3069 : Intrinsic::x86_avx512_sitofp_round;
3070 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3071 {Rep, CI->getArgOperand(3)});
3072 } else {
3073 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3074 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3075 }
3076
3077 if (CI->arg_size() >= 3)
3078 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3079 CI->getArgOperand(1));
3080 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3081 Name.starts_with("vcvtph2ps.")) {
3082 auto *DstTy = cast<FixedVectorType>(CI->getType());
3083 Rep = CI->getArgOperand(0);
3084 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3085 unsigned NumDstElts = DstTy->getNumElements();
3086 if (NumDstElts != SrcTy->getNumElements()) {
3087 assert(NumDstElts == 4 && "Unexpected vector size");
3088 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3089 }
3090 Rep = Builder.CreateBitCast(
3091 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3092 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3093 if (CI->arg_size() >= 3)
3094 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3095 CI->getArgOperand(1));
3096 } else if (Name.starts_with("avx512.mask.load")) {
3097 // "avx512.mask.loadu." or "avx512.mask.load."
3098 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3099 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3100 CI->getArgOperand(2), Aligned);
3101 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3102 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3103 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3104 ResultTy->getNumElements());
3105
3106 Rep = Builder.CreateIntrinsic(
3107 Intrinsic::masked_expandload, ResultTy,
3108 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3109 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3110 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3111 Value *MaskVec =
3112 getX86MaskVec(Builder, CI->getArgOperand(2),
3113 cast<FixedVectorType>(ResultTy)->getNumElements());
3114
3115 Rep = Builder.CreateIntrinsic(
3116 Intrinsic::masked_compressstore, ResultTy,
3117 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3118 } else if (Name.starts_with("avx512.mask.compress.") ||
3119 Name.starts_with("avx512.mask.expand.")) {
3120 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3121
3122 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3123 ResultTy->getNumElements());
3124
3125 bool IsCompress = Name[12] == 'c';
3126 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3127 : Intrinsic::x86_avx512_mask_expand;
3128 Rep = Builder.CreateIntrinsic(
3129 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3130 } else if (Name.starts_with("xop.vpcom")) {
3131 bool IsSigned;
3132 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3133 Name.ends_with("uq"))
3134 IsSigned = false;
3135 else if (Name.ends_with("b") || Name.ends_with("w") ||
3136 Name.ends_with("d") || Name.ends_with("q"))
3137 IsSigned = true;
3138 else
3139 llvm_unreachable("Unknown suffix");
3140
3141 unsigned Imm;
3142 if (CI->arg_size() == 3) {
3143 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3144 } else {
3145 Name = Name.substr(9); // strip off "xop.vpcom"
3146 if (Name.starts_with("lt"))
3147 Imm = 0;
3148 else if (Name.starts_with("le"))
3149 Imm = 1;
3150 else if (Name.starts_with("gt"))
3151 Imm = 2;
3152 else if (Name.starts_with("ge"))
3153 Imm = 3;
3154 else if (Name.starts_with("eq"))
3155 Imm = 4;
3156 else if (Name.starts_with("ne"))
3157 Imm = 5;
3158 else if (Name.starts_with("false"))
3159 Imm = 6;
3160 else if (Name.starts_with("true"))
3161 Imm = 7;
3162 else
3163 llvm_unreachable("Unknown condition");
3164 }
3165
3166 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3167 } else if (Name.starts_with("xop.vpcmov")) {
3168 Value *Sel = CI->getArgOperand(2);
3169 Value *NotSel = Builder.CreateNot(Sel);
3170 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3171 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3172 Rep = Builder.CreateOr(Sel0, Sel1);
3173 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3174 Name.starts_with("avx512.mask.prol")) {
3175 Rep = upgradeX86Rotate(Builder, *CI, false);
3176 } else if (Name.starts_with("avx512.pror") ||
3177 Name.starts_with("avx512.mask.pror")) {
3178 Rep = upgradeX86Rotate(Builder, *CI, true);
3179 } else if (Name.starts_with("avx512.vpshld.") ||
3180 Name.starts_with("avx512.mask.vpshld") ||
3181 Name.starts_with("avx512.maskz.vpshld")) {
3182 bool ZeroMask = Name[11] == 'z';
3183 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3184 } else if (Name.starts_with("avx512.vpshrd.") ||
3185 Name.starts_with("avx512.mask.vpshrd") ||
3186 Name.starts_with("avx512.maskz.vpshrd")) {
3187 bool ZeroMask = Name[11] == 'z';
3188 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3189 } else if (Name == "sse42.crc32.64.8") {
3190 Value *Trunc0 =
3191 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3192 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3193 {Trunc0, CI->getArgOperand(1)});
3194 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3195 } else if (Name.starts_with("avx.vbroadcast.s") ||
3196 Name.starts_with("avx512.vbroadcast.s")) {
3197 // Replace broadcasts with a series of insertelements.
3198 auto *VecTy = cast<FixedVectorType>(CI->getType());
3199 Type *EltTy = VecTy->getElementType();
3200 unsigned EltNum = VecTy->getNumElements();
3201 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3202 Type *I32Ty = Type::getInt32Ty(C);
3203 Rep = PoisonValue::get(VecTy);
3204 for (unsigned I = 0; I < EltNum; ++I)
3205 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3206 } else if (Name.starts_with("sse41.pmovsx") ||
3207 Name.starts_with("sse41.pmovzx") ||
3208 Name.starts_with("avx2.pmovsx") ||
3209 Name.starts_with("avx2.pmovzx") ||
3210 Name.starts_with("avx512.mask.pmovsx") ||
3211 Name.starts_with("avx512.mask.pmovzx")) {
3212 auto *DstTy = cast<FixedVectorType>(CI->getType());
3213 unsigned NumDstElts = DstTy->getNumElements();
3214
3215 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3216 SmallVector<int, 8> ShuffleMask(NumDstElts);
3217 for (unsigned i = 0; i != NumDstElts; ++i)
3218 ShuffleMask[i] = i;
3219
3220 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3221
3222 bool DoSext = Name.contains("pmovsx");
3223 Rep =
3224 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3225 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3226 if (CI->arg_size() == 3)
3227 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3228 CI->getArgOperand(1));
3229 } else if (Name == "avx512.mask.pmov.qd.256" ||
3230 Name == "avx512.mask.pmov.qd.512" ||
3231 Name == "avx512.mask.pmov.wb.256" ||
3232 Name == "avx512.mask.pmov.wb.512") {
3233 Type *Ty = CI->getArgOperand(1)->getType();
3234 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3235 Rep =
3236 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3237 } else if (Name.starts_with("avx.vbroadcastf128") ||
3238 Name == "avx2.vbroadcasti128") {
3239 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3240 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3241 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3242 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3243 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3244 if (NumSrcElts == 2)
3245 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3246 else
3247 Rep = Builder.CreateShuffleVector(Load,
3248 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3249 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3250 Name.starts_with("avx512.mask.shuf.f")) {
3251 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3252 Type *VT = CI->getType();
3253 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3254 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3255 unsigned ControlBitsMask = NumLanes - 1;
3256 unsigned NumControlBits = NumLanes / 2;
3257 SmallVector<int, 8> ShuffleMask(0);
3258
3259 for (unsigned l = 0; l != NumLanes; ++l) {
3260 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3261 // We actually need the other source.
3262 if (l >= NumLanes / 2)
3263 LaneMask += NumLanes;
3264 for (unsigned i = 0; i != NumElementsInLane; ++i)
3265 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3266 }
3267 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3268 CI->getArgOperand(1), ShuffleMask);
3269 Rep =
3270 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3271 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3272 Name.starts_with("avx512.mask.broadcasti")) {
3273 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3274 ->getNumElements();
3275 unsigned NumDstElts =
3276 cast<FixedVectorType>(CI->getType())->getNumElements();
3277
3278 SmallVector<int, 8> ShuffleMask(NumDstElts);
3279 for (unsigned i = 0; i != NumDstElts; ++i)
3280 ShuffleMask[i] = i % NumSrcElts;
3281
3282 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3283 CI->getArgOperand(0), ShuffleMask);
3284 Rep =
3285 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3286 } else if (Name.starts_with("avx2.pbroadcast") ||
3287 Name.starts_with("avx2.vbroadcast") ||
3288 Name.starts_with("avx512.pbroadcast") ||
3289 Name.starts_with("avx512.mask.broadcast.s")) {
3290 // Replace vp?broadcasts with a vector shuffle.
3291 Value *Op = CI->getArgOperand(0);
3292 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3293 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3296 Rep = Builder.CreateShuffleVector(Op, M);
3297
3298 if (CI->arg_size() == 3)
3299 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3300 CI->getArgOperand(1));
3301 } else if (Name.starts_with("sse2.padds.") ||
3302 Name.starts_with("avx2.padds.") ||
3303 Name.starts_with("avx512.padds.") ||
3304 Name.starts_with("avx512.mask.padds.")) {
3305 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3306 } else if (Name.starts_with("sse2.psubs.") ||
3307 Name.starts_with("avx2.psubs.") ||
3308 Name.starts_with("avx512.psubs.") ||
3309 Name.starts_with("avx512.mask.psubs.")) {
3310 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3311 } else if (Name.starts_with("sse2.paddus.") ||
3312 Name.starts_with("avx2.paddus.") ||
3313 Name.starts_with("avx512.mask.paddus.")) {
3314 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3315 } else if (Name.starts_with("sse2.psubus.") ||
3316 Name.starts_with("avx2.psubus.") ||
3317 Name.starts_with("avx512.mask.psubus.")) {
3318 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3319 } else if (Name.starts_with("avx512.mask.palignr.")) {
3320 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3321 CI->getArgOperand(1), CI->getArgOperand(2),
3322 CI->getArgOperand(3), CI->getArgOperand(4),
3323 false);
3324 } else if (Name.starts_with("avx512.mask.valign.")) {
3326 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3327 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3328 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3329 // 128/256-bit shift left specified in bits.
3330 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3331 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3332 Shift / 8); // Shift is in bits.
3333 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3334 // 128/256-bit shift right specified in bits.
3335 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3336 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3337 Shift / 8); // Shift is in bits.
3338 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3339 Name == "avx512.psll.dq.512") {
3340 // 128/256/512-bit shift left specified in bytes.
3341 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3342 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3343 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3344 Name == "avx512.psrl.dq.512") {
3345 // 128/256/512-bit shift right specified in bytes.
3346 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3347 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3348 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3349 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3350 Name.starts_with("avx2.pblendd.")) {
3351 Value *Op0 = CI->getArgOperand(0);
3352 Value *Op1 = CI->getArgOperand(1);
3353 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3354 auto *VecTy = cast<FixedVectorType>(CI->getType());
3355 unsigned NumElts = VecTy->getNumElements();
3356
3357 SmallVector<int, 16> Idxs(NumElts);
3358 for (unsigned i = 0; i != NumElts; ++i)
3359 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3360
3361 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3362 } else if (Name.starts_with("avx.vinsertf128.") ||
3363 Name == "avx2.vinserti128" ||
3364 Name.starts_with("avx512.mask.insert")) {
3365 Value *Op0 = CI->getArgOperand(0);
3366 Value *Op1 = CI->getArgOperand(1);
3367 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3368 unsigned DstNumElts =
3369 cast<FixedVectorType>(CI->getType())->getNumElements();
3370 unsigned SrcNumElts =
3371 cast<FixedVectorType>(Op1->getType())->getNumElements();
3372 unsigned Scale = DstNumElts / SrcNumElts;
3373
3374 // Mask off the high bits of the immediate value; hardware ignores those.
3375 Imm = Imm % Scale;
3376
3377 // Extend the second operand into a vector the size of the destination.
3378 SmallVector<int, 8> Idxs(DstNumElts);
3379 for (unsigned i = 0; i != SrcNumElts; ++i)
3380 Idxs[i] = i;
3381 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3382 Idxs[i] = SrcNumElts;
3383 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3384
3385 // Insert the second operand into the first operand.
3386
3387 // Note that there is no guarantee that instruction lowering will actually
3388 // produce a vinsertf128 instruction for the created shuffles. In
3389 // particular, the 0 immediate case involves no lane changes, so it can
3390 // be handled as a blend.
3391
3392 // Example of shuffle mask for 32-bit elements:
3393 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3394 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3395
3396 // First fill with identify mask.
3397 for (unsigned i = 0; i != DstNumElts; ++i)
3398 Idxs[i] = i;
3399 // Then replace the elements where we need to insert.
3400 for (unsigned i = 0; i != SrcNumElts; ++i)
3401 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3402 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3403
3404 // If the intrinsic has a mask operand, handle that.
3405 if (CI->arg_size() == 5)
3406 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3407 CI->getArgOperand(3));
3408 } else if (Name.starts_with("avx.vextractf128.") ||
3409 Name == "avx2.vextracti128" ||
3410 Name.starts_with("avx512.mask.vextract")) {
3411 Value *Op0 = CI->getArgOperand(0);
3412 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3413 unsigned DstNumElts =
3414 cast<FixedVectorType>(CI->getType())->getNumElements();
3415 unsigned SrcNumElts =
3416 cast<FixedVectorType>(Op0->getType())->getNumElements();
3417 unsigned Scale = SrcNumElts / DstNumElts;
3418
3419 // Mask off the high bits of the immediate value; hardware ignores those.
3420 Imm = Imm % Scale;
3421
3422 // Get indexes for the subvector of the input vector.
3423 SmallVector<int, 8> Idxs(DstNumElts);
3424 for (unsigned i = 0; i != DstNumElts; ++i) {
3425 Idxs[i] = i + (Imm * DstNumElts);
3426 }
3427 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3428
3429 // If the intrinsic has a mask operand, handle that.
3430 if (CI->arg_size() == 4)
3431 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3432 CI->getArgOperand(2));
3433 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3434 Name.starts_with("avx512.mask.perm.di.")) {
3435 Value *Op0 = CI->getArgOperand(0);
3436 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3437 auto *VecTy = cast<FixedVectorType>(CI->getType());
3438 unsigned NumElts = VecTy->getNumElements();
3439
3440 SmallVector<int, 8> Idxs(NumElts);
3441 for (unsigned i = 0; i != NumElts; ++i)
3442 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3443
3444 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3445
3446 if (CI->arg_size() == 4)
3447 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3448 CI->getArgOperand(2));
3449 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3450 // The immediate permute control byte looks like this:
3451 // [1:0] - select 128 bits from sources for low half of destination
3452 // [2] - ignore
3453 // [3] - zero low half of destination
3454 // [5:4] - select 128 bits from sources for high half of destination
3455 // [6] - ignore
3456 // [7] - zero high half of destination
3457
3458 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3459
3460 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3461 unsigned HalfSize = NumElts / 2;
3462 SmallVector<int, 8> ShuffleMask(NumElts);
3463
3464 // Determine which operand(s) are actually in use for this instruction.
3465 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3466 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3467
3468 // If needed, replace operands based on zero mask.
3469 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3470 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3471
3472 // Permute low half of result.
3473 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3474 for (unsigned i = 0; i < HalfSize; ++i)
3475 ShuffleMask[i] = StartIndex + i;
3476
3477 // Permute high half of result.
3478 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3479 for (unsigned i = 0; i < HalfSize; ++i)
3480 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3481
3482 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3483
3484 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3485 Name.starts_with("avx512.mask.vpermil.p") ||
3486 Name.starts_with("avx512.mask.pshuf.d.")) {
3487 Value *Op0 = CI->getArgOperand(0);
3488 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3489 auto *VecTy = cast<FixedVectorType>(CI->getType());
3490 unsigned NumElts = VecTy->getNumElements();
3491 // Calculate the size of each index in the immediate.
3492 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3493 unsigned IdxMask = ((1 << IdxSize) - 1);
3494
3495 SmallVector<int, 8> Idxs(NumElts);
3496 // Lookup the bits for this element, wrapping around the immediate every
3497 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3498 // to offset by the first index of each group.
3499 for (unsigned i = 0; i != NumElts; ++i)
3500 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3501
3502 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3503
3504 if (CI->arg_size() == 4)
3505 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3506 CI->getArgOperand(2));
3507 } else if (Name == "sse2.pshufl.w" ||
3508 Name.starts_with("avx512.mask.pshufl.w.")) {
3509 Value *Op0 = CI->getArgOperand(0);
3510 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3511 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3512
3513 SmallVector<int, 16> Idxs(NumElts);
3514 for (unsigned l = 0; l != NumElts; l += 8) {
3515 for (unsigned i = 0; i != 4; ++i)
3516 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3517 for (unsigned i = 4; i != 8; ++i)
3518 Idxs[i + l] = i + l;
3519 }
3520
3521 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3522
3523 if (CI->arg_size() == 4)
3524 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3525 CI->getArgOperand(2));
3526 } else if (Name == "sse2.pshufh.w" ||
3527 Name.starts_with("avx512.mask.pshufh.w.")) {
3528 Value *Op0 = CI->getArgOperand(0);
3529 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3530 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3531
3532 SmallVector<int, 16> Idxs(NumElts);
3533 for (unsigned l = 0; l != NumElts; l += 8) {
3534 for (unsigned i = 0; i != 4; ++i)
3535 Idxs[i + l] = i + l;
3536 for (unsigned i = 0; i != 4; ++i)
3537 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3538 }
3539
3540 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3541
3542 if (CI->arg_size() == 4)
3543 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3544 CI->getArgOperand(2));
3545 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3546 Value *Op0 = CI->getArgOperand(0);
3547 Value *Op1 = CI->getArgOperand(1);
3548 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3549 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3550
3551 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3552 unsigned HalfLaneElts = NumLaneElts / 2;
3553
3554 SmallVector<int, 16> Idxs(NumElts);
3555 for (unsigned i = 0; i != NumElts; ++i) {
3556 // Base index is the starting element of the lane.
3557 Idxs[i] = i - (i % NumLaneElts);
3558 // If we are half way through the lane switch to the other source.
3559 if ((i % NumLaneElts) >= HalfLaneElts)
3560 Idxs[i] += NumElts;
3561 // Now select the specific element. By adding HalfLaneElts bits from
3562 // the immediate. Wrapping around the immediate every 8-bits.
3563 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3564 }
3565
3566 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3567
3568 Rep =
3569 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3570 } else if (Name.starts_with("avx512.mask.movddup") ||
3571 Name.starts_with("avx512.mask.movshdup") ||
3572 Name.starts_with("avx512.mask.movsldup")) {
3573 Value *Op0 = CI->getArgOperand(0);
3574 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3575 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3576
3577 unsigned Offset = 0;
3578 if (Name.starts_with("avx512.mask.movshdup."))
3579 Offset = 1;
3580
3581 SmallVector<int, 16> Idxs(NumElts);
3582 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3583 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3584 Idxs[i + l + 0] = i + l + Offset;
3585 Idxs[i + l + 1] = i + l + Offset;
3586 }
3587
3588 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3589
3590 Rep =
3591 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3592 } else if (Name.starts_with("avx512.mask.punpckl") ||
3593 Name.starts_with("avx512.mask.unpckl.")) {
3594 Value *Op0 = CI->getArgOperand(0);
3595 Value *Op1 = CI->getArgOperand(1);
3596 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3597 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3598
3599 SmallVector<int, 64> Idxs(NumElts);
3600 for (int l = 0; l != NumElts; l += NumLaneElts)
3601 for (int i = 0; i != NumLaneElts; ++i)
3602 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3603
3604 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3605
3606 Rep =
3607 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3608 } else if (Name.starts_with("avx512.mask.punpckh") ||
3609 Name.starts_with("avx512.mask.unpckh.")) {
3610 Value *Op0 = CI->getArgOperand(0);
3611 Value *Op1 = CI->getArgOperand(1);
3612 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3613 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3614
3615 SmallVector<int, 64> Idxs(NumElts);
3616 for (int l = 0; l != NumElts; l += NumLaneElts)
3617 for (int i = 0; i != NumLaneElts; ++i)
3618 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3619
3620 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3621
3622 Rep =
3623 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3624 } else if (Name.starts_with("avx512.mask.and.") ||
3625 Name.starts_with("avx512.mask.pand.")) {
3626 VectorType *FTy = cast<VectorType>(CI->getType());
3628 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3629 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3630 Rep = Builder.CreateBitCast(Rep, FTy);
3631 Rep =
3632 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3633 } else if (Name.starts_with("avx512.mask.andn.") ||
3634 Name.starts_with("avx512.mask.pandn.")) {
3635 VectorType *FTy = cast<VectorType>(CI->getType());
3637 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3638 Rep = Builder.CreateAnd(Rep,
3639 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3640 Rep = Builder.CreateBitCast(Rep, FTy);
3641 Rep =
3642 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3643 } else if (Name.starts_with("avx512.mask.or.") ||
3644 Name.starts_with("avx512.mask.por.")) {
3645 VectorType *FTy = cast<VectorType>(CI->getType());
3647 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3648 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3649 Rep = Builder.CreateBitCast(Rep, FTy);
3650 Rep =
3651 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3652 } else if (Name.starts_with("avx512.mask.xor.") ||
3653 Name.starts_with("avx512.mask.pxor.")) {
3654 VectorType *FTy = cast<VectorType>(CI->getType());
3656 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3657 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3658 Rep = Builder.CreateBitCast(Rep, FTy);
3659 Rep =
3660 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3661 } else if (Name.starts_with("avx512.mask.padd.")) {
3662 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3663 Rep =
3664 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3665 } else if (Name.starts_with("avx512.mask.psub.")) {
3666 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3667 Rep =
3668 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3669 } else if (Name.starts_with("avx512.mask.pmull.")) {
3670 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3671 Rep =
3672 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3673 } else if (Name.starts_with("avx512.mask.add.p")) {
3674 if (Name.ends_with(".512")) {
3675 Intrinsic::ID IID;
3676 if (Name[17] == 's')
3677 IID = Intrinsic::x86_avx512_add_ps_512;
3678 else
3679 IID = Intrinsic::x86_avx512_add_pd_512;
3680
3681 Rep = Builder.CreateIntrinsic(
3682 IID,
3683 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3684 } else {
3685 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3686 }
3687 Rep =
3688 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3689 } else if (Name.starts_with("avx512.mask.div.p")) {
3690 if (Name.ends_with(".512")) {
3691 Intrinsic::ID IID;
3692 if (Name[17] == 's')
3693 IID = Intrinsic::x86_avx512_div_ps_512;
3694 else
3695 IID = Intrinsic::x86_avx512_div_pd_512;
3696
3697 Rep = Builder.CreateIntrinsic(
3698 IID,
3699 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3700 } else {
3701 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3702 }
3703 Rep =
3704 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3705 } else if (Name.starts_with("avx512.mask.mul.p")) {
3706 if (Name.ends_with(".512")) {
3707 Intrinsic::ID IID;
3708 if (Name[17] == 's')
3709 IID = Intrinsic::x86_avx512_mul_ps_512;
3710 else
3711 IID = Intrinsic::x86_avx512_mul_pd_512;
3712
3713 Rep = Builder.CreateIntrinsic(
3714 IID,
3715 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3716 } else {
3717 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3718 }
3719 Rep =
3720 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3721 } else if (Name.starts_with("avx512.mask.sub.p")) {
3722 if (Name.ends_with(".512")) {
3723 Intrinsic::ID IID;
3724 if (Name[17] == 's')
3725 IID = Intrinsic::x86_avx512_sub_ps_512;
3726 else
3727 IID = Intrinsic::x86_avx512_sub_pd_512;
3728
3729 Rep = Builder.CreateIntrinsic(
3730 IID,
3731 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3732 } else {
3733 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3734 }
3735 Rep =
3736 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3737 } else if ((Name.starts_with("avx512.mask.max.p") ||
3738 Name.starts_with("avx512.mask.min.p")) &&
3739 Name.drop_front(18) == ".512") {
3740 bool IsDouble = Name[17] == 'd';
3741 bool IsMin = Name[13] == 'i';
3742 static const Intrinsic::ID MinMaxTbl[2][2] = {
3743 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3744 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3745 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3746
3747 Rep = Builder.CreateIntrinsic(
3748 IID,
3749 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3750 Rep =
3751 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3752 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3753 Rep =
3754 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3755 {CI->getArgOperand(0), Builder.getInt1(false)});
3756 Rep =
3757 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3758 } else if (Name.starts_with("avx512.mask.psll")) {
3759 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3760 bool IsVariable = Name[16] == 'v';
3761 char Size = Name[16] == '.' ? Name[17]
3762 : Name[17] == '.' ? Name[18]
3763 : Name[18] == '.' ? Name[19]
3764 : Name[20];
3765
3766 Intrinsic::ID IID;
3767 if (IsVariable && Name[17] != '.') {
3768 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3769 IID = Intrinsic::x86_avx2_psllv_q;
3770 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3771 IID = Intrinsic::x86_avx2_psllv_q_256;
3772 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3773 IID = Intrinsic::x86_avx2_psllv_d;
3774 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3775 IID = Intrinsic::x86_avx2_psllv_d_256;
3776 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3777 IID = Intrinsic::x86_avx512_psllv_w_128;
3778 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3779 IID = Intrinsic::x86_avx512_psllv_w_256;
3780 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3781 IID = Intrinsic::x86_avx512_psllv_w_512;
3782 else
3783 llvm_unreachable("Unexpected size");
3784 } else if (Name.ends_with(".128")) {
3785 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3786 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3787 : Intrinsic::x86_sse2_psll_d;
3788 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3789 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3790 : Intrinsic::x86_sse2_psll_q;
3791 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3792 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3793 : Intrinsic::x86_sse2_psll_w;
3794 else
3795 llvm_unreachable("Unexpected size");
3796 } else if (Name.ends_with(".256")) {
3797 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3798 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3799 : Intrinsic::x86_avx2_psll_d;
3800 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3801 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3802 : Intrinsic::x86_avx2_psll_q;
3803 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3804 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3805 : Intrinsic::x86_avx2_psll_w;
3806 else
3807 llvm_unreachable("Unexpected size");
3808 } else {
3809 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3810 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3811 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3812 : Intrinsic::x86_avx512_psll_d_512;
3813 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3814 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3815 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3816 : Intrinsic::x86_avx512_psll_q_512;
3817 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3818 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3819 : Intrinsic::x86_avx512_psll_w_512;
3820 else
3821 llvm_unreachable("Unexpected size");
3822 }
3823
3824 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3825 } else if (Name.starts_with("avx512.mask.psrl")) {
3826 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3827 bool IsVariable = Name[16] == 'v';
3828 char Size = Name[16] == '.' ? Name[17]
3829 : Name[17] == '.' ? Name[18]
3830 : Name[18] == '.' ? Name[19]
3831 : Name[20];
3832
3833 Intrinsic::ID IID;
3834 if (IsVariable && Name[17] != '.') {
3835 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3836 IID = Intrinsic::x86_avx2_psrlv_q;
3837 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3838 IID = Intrinsic::x86_avx2_psrlv_q_256;
3839 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3840 IID = Intrinsic::x86_avx2_psrlv_d;
3841 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3842 IID = Intrinsic::x86_avx2_psrlv_d_256;
3843 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3844 IID = Intrinsic::x86_avx512_psrlv_w_128;
3845 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3846 IID = Intrinsic::x86_avx512_psrlv_w_256;
3847 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3848 IID = Intrinsic::x86_avx512_psrlv_w_512;
3849 else
3850 llvm_unreachable("Unexpected size");
3851 } else if (Name.ends_with(".128")) {
3852 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3853 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3854 : Intrinsic::x86_sse2_psrl_d;
3855 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3856 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3857 : Intrinsic::x86_sse2_psrl_q;
3858 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3859 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3860 : Intrinsic::x86_sse2_psrl_w;
3861 else
3862 llvm_unreachable("Unexpected size");
3863 } else if (Name.ends_with(".256")) {
3864 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3865 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3866 : Intrinsic::x86_avx2_psrl_d;
3867 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3868 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3869 : Intrinsic::x86_avx2_psrl_q;
3870 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3871 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3872 : Intrinsic::x86_avx2_psrl_w;
3873 else
3874 llvm_unreachable("Unexpected size");
3875 } else {
3876 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3877 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3878 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3879 : Intrinsic::x86_avx512_psrl_d_512;
3880 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3881 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3882 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3883 : Intrinsic::x86_avx512_psrl_q_512;
3884 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3885 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3886 : Intrinsic::x86_avx512_psrl_w_512;
3887 else
3888 llvm_unreachable("Unexpected size");
3889 }
3890
3891 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3892 } else if (Name.starts_with("avx512.mask.psra")) {
3893 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3894 bool IsVariable = Name[16] == 'v';
3895 char Size = Name[16] == '.' ? Name[17]
3896 : Name[17] == '.' ? Name[18]
3897 : Name[18] == '.' ? Name[19]
3898 : Name[20];
3899
3900 Intrinsic::ID IID;
3901 if (IsVariable && Name[17] != '.') {
3902 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3903 IID = Intrinsic::x86_avx2_psrav_d;
3904 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3905 IID = Intrinsic::x86_avx2_psrav_d_256;
3906 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3907 IID = Intrinsic::x86_avx512_psrav_w_128;
3908 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3909 IID = Intrinsic::x86_avx512_psrav_w_256;
3910 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3911 IID = Intrinsic::x86_avx512_psrav_w_512;
3912 else
3913 llvm_unreachable("Unexpected size");
3914 } else if (Name.ends_with(".128")) {
3915 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3916 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3917 : Intrinsic::x86_sse2_psra_d;
3918 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3919 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3920 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3921 : Intrinsic::x86_avx512_psra_q_128;
3922 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3923 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3924 : Intrinsic::x86_sse2_psra_w;
3925 else
3926 llvm_unreachable("Unexpected size");
3927 } else if (Name.ends_with(".256")) {
3928 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3929 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3930 : Intrinsic::x86_avx2_psra_d;
3931 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3932 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3933 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3934 : Intrinsic::x86_avx512_psra_q_256;
3935 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3936 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3937 : Intrinsic::x86_avx2_psra_w;
3938 else
3939 llvm_unreachable("Unexpected size");
3940 } else {
3941 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3942 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3943 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3944 : Intrinsic::x86_avx512_psra_d_512;
3945 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3946 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3947 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3948 : Intrinsic::x86_avx512_psra_q_512;
3949 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3950 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3951 : Intrinsic::x86_avx512_psra_w_512;
3952 else
3953 llvm_unreachable("Unexpected size");
3954 }
3955
3956 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3957 } else if (Name.starts_with("avx512.mask.move.s")) {
3958 Rep = upgradeMaskedMove(Builder, *CI);
3959 } else if (Name.starts_with("avx512.cvtmask2")) {
3960 Rep = upgradeMaskToInt(Builder, *CI);
3961 } else if (Name.ends_with(".movntdqa")) {
3963 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3964
3965 LoadInst *LI = Builder.CreateAlignedLoad(
3966 CI->getType(), CI->getArgOperand(0),
3968 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3969 Rep = LI;
3970 } else if (Name.starts_with("fma.vfmadd.") ||
3971 Name.starts_with("fma.vfmsub.") ||
3972 Name.starts_with("fma.vfnmadd.") ||
3973 Name.starts_with("fma.vfnmsub.")) {
3974 bool NegMul = Name[6] == 'n';
3975 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3976 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3977
3978 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3979 CI->getArgOperand(2)};
3980
3981 if (IsScalar) {
3982 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3983 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3984 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3985 }
3986
3987 if (NegMul && !IsScalar)
3988 Ops[0] = Builder.CreateFNeg(Ops[0]);
3989 if (NegMul && IsScalar)
3990 Ops[1] = Builder.CreateFNeg(Ops[1]);
3991 if (NegAcc)
3992 Ops[2] = Builder.CreateFNeg(Ops[2]);
3993
3994 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3995
3996 if (IsScalar)
3997 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3998 } else if (Name.starts_with("fma4.vfmadd.s")) {
3999 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4000 CI->getArgOperand(2)};
4001
4002 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4003 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4004 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4005
4006 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4007
4008 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4009 Rep, (uint64_t)0);
4010 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4011 Name.starts_with("avx512.maskz.vfmadd.s") ||
4012 Name.starts_with("avx512.mask3.vfmadd.s") ||
4013 Name.starts_with("avx512.mask3.vfmsub.s") ||
4014 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4015 bool IsMask3 = Name[11] == '3';
4016 bool IsMaskZ = Name[11] == 'z';
4017 // Drop the "avx512.mask." to make it easier.
4018 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4019 bool NegMul = Name[2] == 'n';
4020 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4021
4022 Value *A = CI->getArgOperand(0);
4023 Value *B = CI->getArgOperand(1);
4024 Value *C = CI->getArgOperand(2);
4025
4026 if (NegMul && (IsMask3 || IsMaskZ))
4027 A = Builder.CreateFNeg(A);
4028 if (NegMul && !(IsMask3 || IsMaskZ))
4029 B = Builder.CreateFNeg(B);
4030 if (NegAcc)
4031 C = Builder.CreateFNeg(C);
4032
4033 A = Builder.CreateExtractElement(A, (uint64_t)0);
4034 B = Builder.CreateExtractElement(B, (uint64_t)0);
4035 C = Builder.CreateExtractElement(C, (uint64_t)0);
4036
4037 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4038 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4039 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4040
4041 Intrinsic::ID IID;
4042 if (Name.back() == 'd')
4043 IID = Intrinsic::x86_avx512_vfmadd_f64;
4044 else
4045 IID = Intrinsic::x86_avx512_vfmadd_f32;
4046 Rep = Builder.CreateIntrinsic(IID, Ops);
4047 } else {
4048 Rep = Builder.CreateFMA(A, B, C);
4049 }
4050
4051 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4052 : IsMask3 ? C
4053 : A;
4054
4055 // For Mask3 with NegAcc, we need to create a new extractelement that
4056 // avoids the negation above.
4057 if (NegAcc && IsMask3)
4058 PassThru =
4059 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4060
4061 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4062 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4063 (uint64_t)0);
4064 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4065 Name.starts_with("avx512.mask.vfnmadd.p") ||
4066 Name.starts_with("avx512.mask.vfnmsub.p") ||
4067 Name.starts_with("avx512.mask3.vfmadd.p") ||
4068 Name.starts_with("avx512.mask3.vfmsub.p") ||
4069 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4070 Name.starts_with("avx512.maskz.vfmadd.p")) {
4071 bool IsMask3 = Name[11] == '3';
4072 bool IsMaskZ = Name[11] == 'z';
4073 // Drop the "avx512.mask." to make it easier.
4074 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4075 bool NegMul = Name[2] == 'n';
4076 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4077
4078 Value *A = CI->getArgOperand(0);
4079 Value *B = CI->getArgOperand(1);
4080 Value *C = CI->getArgOperand(2);
4081
4082 if (NegMul && (IsMask3 || IsMaskZ))
4083 A = Builder.CreateFNeg(A);
4084 if (NegMul && !(IsMask3 || IsMaskZ))
4085 B = Builder.CreateFNeg(B);
4086 if (NegAcc)
4087 C = Builder.CreateFNeg(C);
4088
4089 if (CI->arg_size() == 5 &&
4090 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4091 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4092 Intrinsic::ID IID;
4093 // Check the character before ".512" in string.
4094 if (Name[Name.size() - 5] == 's')
4095 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4096 else
4097 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4098
4099 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4100 } else {
4101 Rep = Builder.CreateFMA(A, B, C);
4102 }
4103
4104 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4105 : IsMask3 ? CI->getArgOperand(2)
4106 : CI->getArgOperand(0);
4107
4108 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4109 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4110 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4111 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4112 Intrinsic::ID IID;
4113 if (VecWidth == 128 && EltWidth == 32)
4114 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4115 else if (VecWidth == 256 && EltWidth == 32)
4116 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4117 else if (VecWidth == 128 && EltWidth == 64)
4118 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4119 else if (VecWidth == 256 && EltWidth == 64)
4120 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4121 else
4122 llvm_unreachable("Unexpected intrinsic");
4123
4124 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4125 CI->getArgOperand(2)};
4126 Ops[2] = Builder.CreateFNeg(Ops[2]);
4127 Rep = Builder.CreateIntrinsic(IID, Ops);
4128 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4129 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4130 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4131 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4132 bool IsMask3 = Name[11] == '3';
4133 bool IsMaskZ = Name[11] == 'z';
4134 // Drop the "avx512.mask." to make it easier.
4135 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4136 bool IsSubAdd = Name[3] == 's';
4137 if (CI->arg_size() == 5) {
4138 Intrinsic::ID IID;
4139 // Check the character before ".512" in string.
4140 if (Name[Name.size() - 5] == 's')
4141 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4142 else
4143 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4144
4145 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4146 CI->getArgOperand(2), CI->getArgOperand(4)};
4147 if (IsSubAdd)
4148 Ops[2] = Builder.CreateFNeg(Ops[2]);
4149
4150 Rep = Builder.CreateIntrinsic(IID, Ops);
4151 } else {
4152 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4153
4154 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4155 CI->getArgOperand(2)};
4156
4158 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4159 Value *Odd = Builder.CreateCall(FMA, Ops);
4160 Ops[2] = Builder.CreateFNeg(Ops[2]);
4161 Value *Even = Builder.CreateCall(FMA, Ops);
4162
4163 if (IsSubAdd)
4164 std::swap(Even, Odd);
4165
4166 SmallVector<int, 32> Idxs(NumElts);
4167 for (int i = 0; i != NumElts; ++i)
4168 Idxs[i] = i + (i % 2) * NumElts;
4169
4170 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4171 }
4172
4173 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4174 : IsMask3 ? CI->getArgOperand(2)
4175 : CI->getArgOperand(0);
4176
4177 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4178 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4179 Name.starts_with("avx512.maskz.pternlog.")) {
4180 bool ZeroMask = Name[11] == 'z';
4181 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4182 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4183 Intrinsic::ID IID;
4184 if (VecWidth == 128 && EltWidth == 32)
4185 IID = Intrinsic::x86_avx512_pternlog_d_128;
4186 else if (VecWidth == 256 && EltWidth == 32)
4187 IID = Intrinsic::x86_avx512_pternlog_d_256;
4188 else if (VecWidth == 512 && EltWidth == 32)
4189 IID = Intrinsic::x86_avx512_pternlog_d_512;
4190 else if (VecWidth == 128 && EltWidth == 64)
4191 IID = Intrinsic::x86_avx512_pternlog_q_128;
4192 else if (VecWidth == 256 && EltWidth == 64)
4193 IID = Intrinsic::x86_avx512_pternlog_q_256;
4194 else if (VecWidth == 512 && EltWidth == 64)
4195 IID = Intrinsic::x86_avx512_pternlog_q_512;
4196 else
4197 llvm_unreachable("Unexpected intrinsic");
4198
4199 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4200 CI->getArgOperand(2), CI->getArgOperand(3)};
4201 Rep = Builder.CreateIntrinsic(IID, Args);
4202 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4203 : CI->getArgOperand(0);
4204 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4205 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4206 Name.starts_with("avx512.maskz.vpmadd52")) {
4207 bool ZeroMask = Name[11] == 'z';
4208 bool High = Name[20] == 'h' || Name[21] == 'h';
4209 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4210 Intrinsic::ID IID;
4211 if (VecWidth == 128 && !High)
4212 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4213 else if (VecWidth == 256 && !High)
4214 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4215 else if (VecWidth == 512 && !High)
4216 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4217 else if (VecWidth == 128 && High)
4218 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4219 else if (VecWidth == 256 && High)
4220 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4221 else if (VecWidth == 512 && High)
4222 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4223 else
4224 llvm_unreachable("Unexpected intrinsic");
4225
4226 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4227 CI->getArgOperand(2)};
4228 Rep = Builder.CreateIntrinsic(IID, Args);
4229 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4230 : CI->getArgOperand(0);
4231 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4232 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4233 Name.starts_with("avx512.mask.vpermt2var.") ||
4234 Name.starts_with("avx512.maskz.vpermt2var.")) {
4235 bool ZeroMask = Name[11] == 'z';
4236 bool IndexForm = Name[17] == 'i';
4237 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4238 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4239 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4240 Name.starts_with("avx512.mask.vpdpbusds.") ||
4241 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4242 bool ZeroMask = Name[11] == 'z';
4243 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4244 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4245 Intrinsic::ID IID;
4246 if (VecWidth == 128 && !IsSaturating)
4247 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4248 else if (VecWidth == 256 && !IsSaturating)
4249 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4250 else if (VecWidth == 512 && !IsSaturating)
4251 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4252 else if (VecWidth == 128 && IsSaturating)
4253 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4254 else if (VecWidth == 256 && IsSaturating)
4255 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4256 else if (VecWidth == 512 && IsSaturating)
4257 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4258 else
4259 llvm_unreachable("Unexpected intrinsic");
4260
4261 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4262 CI->getArgOperand(2)};
4263
4264 // Input arguments types were incorrectly set to vectors of i32 before but
4265 // they should be vectors of i8. Insert bit cast when encountering the old
4266 // types
4267 if (Args[1]->getType()->isVectorTy() &&
4268 cast<VectorType>(Args[1]->getType())
4269 ->getElementType()
4270 ->isIntegerTy(32) &&
4271 Args[2]->getType()->isVectorTy() &&
4272 cast<VectorType>(Args[2]->getType())
4273 ->getElementType()
4274 ->isIntegerTy(32)) {
4275 Type *NewArgType = nullptr;
4276 if (VecWidth == 128)
4277 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4278 else if (VecWidth == 256)
4279 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4280 else if (VecWidth == 512)
4281 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4282 else
4283 llvm_unreachable("Unexpected vector bit width");
4284
4285 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4286 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4287 }
4288
4289 Rep = Builder.CreateIntrinsic(IID, Args);
4290 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4291 : CI->getArgOperand(0);
4292 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4293 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4294 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4295 Name.starts_with("avx512.mask.vpdpwssds.") ||
4296 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4297 bool ZeroMask = Name[11] == 'z';
4298 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4299 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4300 Intrinsic::ID IID;
4301 if (VecWidth == 128 && !IsSaturating)
4302 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4303 else if (VecWidth == 256 && !IsSaturating)
4304 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4305 else if (VecWidth == 512 && !IsSaturating)
4306 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4307 else if (VecWidth == 128 && IsSaturating)
4308 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4309 else if (VecWidth == 256 && IsSaturating)
4310 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4311 else if (VecWidth == 512 && IsSaturating)
4312 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4313 else
4314 llvm_unreachable("Unexpected intrinsic");
4315
4316 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4317 CI->getArgOperand(2)};
4318 Rep = Builder.CreateIntrinsic(IID, Args);
4319 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4320 : CI->getArgOperand(0);
4321 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4322 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4323 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4324 Name == "subborrow.u32" || Name == "subborrow.u64") {
4325 Intrinsic::ID IID;
4326 if (Name[0] == 'a' && Name.back() == '2')
4327 IID = Intrinsic::x86_addcarry_32;
4328 else if (Name[0] == 'a' && Name.back() == '4')
4329 IID = Intrinsic::x86_addcarry_64;
4330 else if (Name[0] == 's' && Name.back() == '2')
4331 IID = Intrinsic::x86_subborrow_32;
4332 else if (Name[0] == 's' && Name.back() == '4')
4333 IID = Intrinsic::x86_subborrow_64;
4334 else
4335 llvm_unreachable("Unexpected intrinsic");
4336
4337 // Make a call with 3 operands.
4338 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4339 CI->getArgOperand(2)};
4340 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4341
4342 // Extract the second result and store it.
4343 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4344 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4345 // Replace the original call result with the first result of the new call.
4346 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4347
4348 CI->replaceAllUsesWith(CF);
4349 Rep = nullptr;
4350 } else if (Name.starts_with("avx512.mask.") &&
4351 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4352 // Rep will be updated by the call in the condition.
4353 }
4354
4355 return Rep;
4356}
4357
4359 Function *F, IRBuilder<> &Builder) {
4360 if (Name.starts_with("neon.bfcvt")) {
4361 if (Name.starts_with("neon.bfcvtn2")) {
4362 SmallVector<int, 32> LoMask(4);
4363 std::iota(LoMask.begin(), LoMask.end(), 0);
4364 SmallVector<int, 32> ConcatMask(8);
4365 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4366 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4367 Value *Trunc =
4368 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4369 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4370 } else if (Name.starts_with("neon.bfcvtn")) {
4371 SmallVector<int, 32> ConcatMask(8);
4372 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4373 Type *V4BF16 =
4374 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4375 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4376 dbgs() << "Trunc: " << *Trunc << "\n";
4377 return Builder.CreateShuffleVector(
4378 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4379 } else {
4380 return Builder.CreateFPTrunc(CI->getOperand(0),
4381 Type::getBFloatTy(F->getContext()));
4382 }
4383 } else if (Name.starts_with("sve.fcvt")) {
4384 Intrinsic::ID NewID =
4386 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4387 .Case("sve.fcvtnt.bf16f32",
4388 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4390 if (NewID == Intrinsic::not_intrinsic)
4391 llvm_unreachable("Unhandled Intrinsic!");
4392
4393 SmallVector<Value *, 3> Args(CI->args());
4394
4395 // The original intrinsics incorrectly used a predicate based on the
4396 // smallest element type rather than the largest.
4397 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4398 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4399
4400 if (Args[1]->getType() != BadPredTy)
4401 llvm_unreachable("Unexpected predicate type!");
4402
4403 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4404 BadPredTy, Args[1]);
4405 Args[1] = Builder.CreateIntrinsic(
4406 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4407
4408 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4409 CI->getName());
4410 }
4411
4412 llvm_unreachable("Unhandled Intrinsic!");
4413}
4414
4416 IRBuilder<> &Builder) {
4417 if (Name == "mve.vctp64.old") {
4418 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4419 // correct type.
4420 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4421 CI->getArgOperand(0),
4422 /*FMFSource=*/nullptr, CI->getName());
4423 Value *C1 = Builder.CreateIntrinsic(
4424 Intrinsic::arm_mve_pred_v2i,
4425 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4426 return Builder.CreateIntrinsic(
4427 Intrinsic::arm_mve_pred_i2v,
4428 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4429 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4430 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4431 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4432 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4433 Name ==
4434 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4435 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4436 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4437 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4438 Name ==
4439 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4440 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4441 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4442 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4443 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4444 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4445 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4446 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4447 std::vector<Type *> Tys;
4448 unsigned ID = CI->getIntrinsicID();
4449 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4450 switch (ID) {
4451 case Intrinsic::arm_mve_mull_int_predicated:
4452 case Intrinsic::arm_mve_vqdmull_predicated:
4453 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4454 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4455 break;
4456 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4457 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4458 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4459 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4460 V2I1Ty};
4461 break;
4462 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4463 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4464 CI->getOperand(1)->getType(), V2I1Ty};
4465 break;
4466 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4467 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4468 CI->getOperand(2)->getType(), V2I1Ty};
4469 break;
4470 case Intrinsic::arm_cde_vcx1q_predicated:
4471 case Intrinsic::arm_cde_vcx1qa_predicated:
4472 case Intrinsic::arm_cde_vcx2q_predicated:
4473 case Intrinsic::arm_cde_vcx2qa_predicated:
4474 case Intrinsic::arm_cde_vcx3q_predicated:
4475 case Intrinsic::arm_cde_vcx3qa_predicated:
4476 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4477 break;
4478 default:
4479 llvm_unreachable("Unhandled Intrinsic!");
4480 }
4481
4482 std::vector<Value *> Ops;
4483 for (Value *Op : CI->args()) {
4484 Type *Ty = Op->getType();
4485 if (Ty->getScalarSizeInBits() == 1) {
4486 Value *C1 = Builder.CreateIntrinsic(
4487 Intrinsic::arm_mve_pred_v2i,
4488 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4489 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4490 }
4491 Ops.push_back(Op);
4492 }
4493
4494 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4495 CI->getName());
4496 }
4497 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4498}
4499
4500// These are expected to have the arguments:
4501// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4502//
4503// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4504//
4506 Function *F, IRBuilder<> &Builder) {
4507 AtomicRMWInst::BinOp RMWOp =
4509 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4510 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4511 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4512 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4513 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4514 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4515 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4516 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4517 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4518 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4519 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4520
4521 unsigned NumOperands = CI->getNumOperands();
4522 if (NumOperands < 3) // Malformed bitcode.
4523 return nullptr;
4524
4525 Value *Ptr = CI->getArgOperand(0);
4526 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4527 if (!PtrTy) // Malformed.
4528 return nullptr;
4529
4530 Value *Val = CI->getArgOperand(1);
4531 if (Val->getType() != CI->getType()) // Malformed.
4532 return nullptr;
4533
4534 ConstantInt *OrderArg = nullptr;
4535 bool IsVolatile = false;
4536
4537 // These should have 5 arguments (plus the callee). A separate version of the
4538 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4539 if (NumOperands > 3)
4540 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4541
4542 // Ignore scope argument at 3
4543
4544 if (NumOperands > 5) {
4545 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4546 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4547 }
4548
4550 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4551 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4554
4555 LLVMContext &Ctx = F->getContext();
4556
4557 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4558 Type *RetTy = CI->getType();
4559 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4560 if (VT->getElementType()->isIntegerTy(16)) {
4561 VectorType *AsBF16 =
4562 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4563 Val = Builder.CreateBitCast(Val, AsBF16);
4564 }
4565 }
4566
4567 // The scope argument never really worked correctly. Use agent as the most
4568 // conservative option which should still always produce the instruction.
4569 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4570 AtomicRMWInst *RMW =
4571 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4572
4573 unsigned AddrSpace = PtrTy->getAddressSpace();
4574 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4575 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4576 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4577 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4578 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4579 }
4580
4581 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4582 MDBuilder MDB(F->getContext());
4583 MDNode *RangeNotPrivate =
4586 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4587 }
4588
4589 if (IsVolatile)
4590 RMW->setVolatile(true);
4591
4592 return Builder.CreateBitCast(RMW, RetTy);
4593}
4594
4595/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4596/// plain MDNode, as it's the verifier's job to check these are the correct
4597/// types later.
4598static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4599 if (Op < CI->arg_size()) {
4600 if (MetadataAsValue *MAV =
4602 Metadata *MD = MAV->getMetadata();
4603 return dyn_cast_if_present<MDNode>(MD);
4604 }
4605 }
4606 return nullptr;
4607}
4608
4609/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4610static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4611 if (Op < CI->arg_size())
4613 return MAV->getMetadata();
4614 return nullptr;
4615}
4616
4618 // The MDNode attached to this instruction might not be the correct type,
4619 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4620 return I->getDebugLoc().getAsMDNode();
4621}
4622
4623/// Convert debug intrinsic calls to non-instruction debug records.
4624/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4625/// \p CI - The debug intrinsic call.
4627 DbgRecord *DR = nullptr;
4628 if (Name == "label") {
4630 CI->getDebugLoc());
4631 } else if (Name == "assign") {
4634 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4635 unwrapMAVMetadataOp(CI, 4),
4636 /*The address is a Value ref, it will be stored as a Metadata */
4637 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4638 } else if (Name == "declare") {
4641 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4642 getDebugLocSafe(CI));
4643 } else if (Name == "addr") {
4644 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4645 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4646 // Don't try to add something to the expression if it's not an expression.
4647 // Instead, allow the verifier to fail later.
4648 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4649 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4650 }
4653 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4654 getDebugLocSafe(CI));
4655 } else if (Name == "value") {
4656 // An old version of dbg.value had an extra offset argument.
4657 unsigned VarOp = 1;
4658 unsigned ExprOp = 2;
4659 if (CI->arg_size() == 4) {
4661 // Nonzero offset dbg.values get dropped without a replacement.
4662 if (!Offset || !Offset->isZeroValue())
4663 return;
4664 VarOp = 2;
4665 ExprOp = 3;
4666 }
4669 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4670 nullptr, getDebugLocSafe(CI));
4671 }
4672 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4673 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4674}
4675
4676/// Upgrade a call to an old intrinsic. All argument and return casting must be
4677/// provided to seamlessly integrate with existing context.
4679 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4680 // checks the callee's function type matches. It's likely we need to handle
4681 // type changes here.
4683 if (!F)
4684 return;
4685
4686 LLVMContext &C = CI->getContext();
4687 IRBuilder<> Builder(C);
4688 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4689
4690 if (!NewFn) {
4691 // Get the Function's name.
4692 StringRef Name = F->getName();
4693
4694 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4695 Name = Name.substr(5);
4696
4697 bool IsX86 = Name.consume_front("x86.");
4698 bool IsNVVM = Name.consume_front("nvvm.");
4699 bool IsAArch64 = Name.consume_front("aarch64.");
4700 bool IsARM = Name.consume_front("arm.");
4701 bool IsAMDGCN = Name.consume_front("amdgcn.");
4702 bool IsDbg = Name.consume_front("dbg.");
4703 Value *Rep = nullptr;
4704
4705 if (!IsX86 && Name == "stackprotectorcheck") {
4706 Rep = nullptr;
4707 } else if (IsNVVM) {
4708 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4709 } else if (IsX86) {
4710 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4711 } else if (IsAArch64) {
4712 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4713 } else if (IsARM) {
4714 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4715 } else if (IsAMDGCN) {
4716 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4717 } else if (IsDbg) {
4719 } else {
4720 llvm_unreachable("Unknown function for CallBase upgrade.");
4721 }
4722
4723 if (Rep)
4724 CI->replaceAllUsesWith(Rep);
4725 CI->eraseFromParent();
4726 return;
4727 }
4728
4729 const auto &DefaultCase = [&]() -> void {
4730 if (F == NewFn)
4731 return;
4732
4733 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4734 // Handle generic mangling change.
4735 assert(
4736 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4737 "Unknown function for CallBase upgrade and isn't just a name change");
4738 CI->setCalledFunction(NewFn);
4739 return;
4740 }
4741
4742 // This must be an upgrade from a named to a literal struct.
4743 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4744 assert(OldST != NewFn->getReturnType() &&
4745 "Return type must have changed");
4746 assert(OldST->getNumElements() ==
4747 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4748 "Must have same number of elements");
4749
4750 SmallVector<Value *> Args(CI->args());
4751 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4752 NewCI->setAttributes(CI->getAttributes());
4753 Value *Res = PoisonValue::get(OldST);
4754 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4755 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4756 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4757 }
4758 CI->replaceAllUsesWith(Res);
4759 CI->eraseFromParent();
4760 return;
4761 }
4762
4763 // We're probably about to produce something invalid. Let the verifier catch
4764 // it instead of dying here.
4765 CI->setCalledOperand(
4767 return;
4768 };
4769 CallInst *NewCall = nullptr;
4770 switch (NewFn->getIntrinsicID()) {
4771 default: {
4772 DefaultCase();
4773 return;
4774 }
4775 case Intrinsic::arm_neon_vst1:
4776 case Intrinsic::arm_neon_vst2:
4777 case Intrinsic::arm_neon_vst3:
4778 case Intrinsic::arm_neon_vst4:
4779 case Intrinsic::arm_neon_vst2lane:
4780 case Intrinsic::arm_neon_vst3lane:
4781 case Intrinsic::arm_neon_vst4lane: {
4782 SmallVector<Value *, 4> Args(CI->args());
4783 NewCall = Builder.CreateCall(NewFn, Args);
4784 break;
4785 }
4786 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4787 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4788 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4789 LLVMContext &Ctx = F->getParent()->getContext();
4790 SmallVector<Value *, 4> Args(CI->args());
4791 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4792 cast<ConstantInt>(Args[3])->getZExtValue());
4793 NewCall = Builder.CreateCall(NewFn, Args);
4794 break;
4795 }
4796 case Intrinsic::aarch64_sve_ld3_sret:
4797 case Intrinsic::aarch64_sve_ld4_sret:
4798 case Intrinsic::aarch64_sve_ld2_sret: {
4799 StringRef Name = F->getName();
4800 Name = Name.substr(5);
4801 unsigned N = StringSwitch<unsigned>(Name)
4802 .StartsWith("aarch64.sve.ld2", 2)
4803 .StartsWith("aarch64.sve.ld3", 3)
4804 .StartsWith("aarch64.sve.ld4", 4)
4805 .Default(0);
4806 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4807 unsigned MinElts = RetTy->getMinNumElements() / N;
4808 SmallVector<Value *, 2> Args(CI->args());
4809 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4810 Value *Ret = llvm::PoisonValue::get(RetTy);
4811 for (unsigned I = 0; I < N; I++) {
4812 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4813 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4814 }
4815 NewCall = dyn_cast<CallInst>(Ret);
4816 break;
4817 }
4818
4819 case Intrinsic::coro_end: {
4820 SmallVector<Value *, 3> Args(CI->args());
4821 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4822 NewCall = Builder.CreateCall(NewFn, Args);
4823 break;
4824 }
4825
4826 case Intrinsic::vector_extract: {
4827 StringRef Name = F->getName();
4828 Name = Name.substr(5); // Strip llvm
4829 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4830 DefaultCase();
4831 return;
4832 }
4833 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4834 unsigned MinElts = RetTy->getMinNumElements();
4835 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4836 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4837 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4838 break;
4839 }
4840
4841 case Intrinsic::vector_insert: {
4842 StringRef Name = F->getName();
4843 Name = Name.substr(5);
4844 if (!Name.starts_with("aarch64.sve.tuple")) {
4845 DefaultCase();
4846 return;
4847 }
4848 if (Name.starts_with("aarch64.sve.tuple.set")) {
4849 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4850 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4851 Value *NewIdx =
4852 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4853 NewCall = Builder.CreateCall(
4854 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4855 break;
4856 }
4857 if (Name.starts_with("aarch64.sve.tuple.create")) {
4858 unsigned N = StringSwitch<unsigned>(Name)
4859 .StartsWith("aarch64.sve.tuple.create2", 2)
4860 .StartsWith("aarch64.sve.tuple.create3", 3)
4861 .StartsWith("aarch64.sve.tuple.create4", 4)
4862 .Default(0);
4863 assert(N > 1 && "Create is expected to be between 2-4");
4864 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4865 Value *Ret = llvm::PoisonValue::get(RetTy);
4866 unsigned MinElts = RetTy->getMinNumElements() / N;
4867 for (unsigned I = 0; I < N; I++) {
4868 Value *V = CI->getArgOperand(I);
4869 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4870 }
4871 NewCall = dyn_cast<CallInst>(Ret);
4872 }
4873 break;
4874 }
4875
4876 case Intrinsic::arm_neon_bfdot:
4877 case Intrinsic::arm_neon_bfmmla:
4878 case Intrinsic::arm_neon_bfmlalb:
4879 case Intrinsic::arm_neon_bfmlalt:
4880 case Intrinsic::aarch64_neon_bfdot:
4881 case Intrinsic::aarch64_neon_bfmmla:
4882 case Intrinsic::aarch64_neon_bfmlalb:
4883 case Intrinsic::aarch64_neon_bfmlalt: {
4885 assert(CI->arg_size() == 3 &&
4886 "Mismatch between function args and call args");
4887 size_t OperandWidth =
4889 assert((OperandWidth == 64 || OperandWidth == 128) &&
4890 "Unexpected operand width");
4891 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4892 auto Iter = CI->args().begin();
4893 Args.push_back(*Iter++);
4894 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4895 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4896 NewCall = Builder.CreateCall(NewFn, Args);
4897 break;
4898 }
4899
4900 case Intrinsic::bitreverse:
4901 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4902 break;
4903
4904 case Intrinsic::ctlz:
4905 case Intrinsic::cttz:
4906 assert(CI->arg_size() == 1 &&
4907 "Mismatch between function args and call args");
4908 NewCall =
4909 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4910 break;
4911
4912 case Intrinsic::objectsize: {
4913 Value *NullIsUnknownSize =
4914 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4915 Value *Dynamic =
4916 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4917 NewCall = Builder.CreateCall(
4918 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4919 break;
4920 }
4921
4922 case Intrinsic::ctpop:
4923 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4924 break;
4925
4926 case Intrinsic::convert_from_fp16:
4927 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4928 break;
4929
4930 case Intrinsic::dbg_value: {
4931 StringRef Name = F->getName();
4932 Name = Name.substr(5); // Strip llvm.
4933 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4934 if (Name.starts_with("dbg.addr")) {
4936 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4937 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4938 NewCall =
4939 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4940 MetadataAsValue::get(C, Expr)});
4941 break;
4942 }
4943
4944 // Upgrade from the old version that had an extra offset argument.
4945 assert(CI->arg_size() == 4);
4946 // Drop nonzero offsets instead of attempting to upgrade them.
4948 if (Offset->isZeroValue()) {
4949 NewCall = Builder.CreateCall(
4950 NewFn,
4951 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4952 break;
4953 }
4954 CI->eraseFromParent();
4955 return;
4956 }
4957
4958 case Intrinsic::ptr_annotation:
4959 // Upgrade from versions that lacked the annotation attribute argument.
4960 if (CI->arg_size() != 4) {
4961 DefaultCase();
4962 return;
4963 }
4964
4965 // Create a new call with an added null annotation attribute argument.
4966 NewCall = Builder.CreateCall(
4967 NewFn,
4968 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4969 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4970 NewCall->takeName(CI);
4971 CI->replaceAllUsesWith(NewCall);
4972 CI->eraseFromParent();
4973 return;
4974
4975 case Intrinsic::var_annotation:
4976 // Upgrade from versions that lacked the annotation attribute argument.
4977 if (CI->arg_size() != 4) {
4978 DefaultCase();
4979 return;
4980 }
4981 // Create a new call with an added null annotation attribute argument.
4982 NewCall = Builder.CreateCall(
4983 NewFn,
4984 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4985 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4986 NewCall->takeName(CI);
4987 CI->replaceAllUsesWith(NewCall);
4988 CI->eraseFromParent();
4989 return;
4990
4991 case Intrinsic::riscv_aes32dsi:
4992 case Intrinsic::riscv_aes32dsmi:
4993 case Intrinsic::riscv_aes32esi:
4994 case Intrinsic::riscv_aes32esmi:
4995 case Intrinsic::riscv_sm4ks:
4996 case Intrinsic::riscv_sm4ed: {
4997 // The last argument to these intrinsics used to be i8 and changed to i32.
4998 // The type overload for sm4ks and sm4ed was removed.
4999 Value *Arg2 = CI->getArgOperand(2);
5000 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5001 return;
5002
5003 Value *Arg0 = CI->getArgOperand(0);
5004 Value *Arg1 = CI->getArgOperand(1);
5005 if (CI->getType()->isIntegerTy(64)) {
5006 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5007 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5008 }
5009
5010 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5011 cast<ConstantInt>(Arg2)->getZExtValue());
5012
5013 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5014 Value *Res = NewCall;
5015 if (Res->getType() != CI->getType())
5016 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5017 NewCall->takeName(CI);
5018 CI->replaceAllUsesWith(Res);
5019 CI->eraseFromParent();
5020 return;
5021 }
5022 case Intrinsic::nvvm_mapa_shared_cluster: {
5023 // Create a new call with the correct address space.
5024 NewCall =
5025 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5026 Value *Res = NewCall;
5027 Res = Builder.CreateAddrSpaceCast(
5028 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5029 NewCall->takeName(CI);
5030 CI->replaceAllUsesWith(Res);
5031 CI->eraseFromParent();
5032 return;
5033 }
5034 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5035 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5036 // Create a new call with the correct address space.
5037 SmallVector<Value *, 4> Args(CI->args());
5038 Args[0] = Builder.CreateAddrSpaceCast(
5039 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5040
5041 NewCall = Builder.CreateCall(NewFn, Args);
5042 NewCall->takeName(CI);
5043 CI->replaceAllUsesWith(NewCall);
5044 CI->eraseFromParent();
5045 return;
5046 }
5047 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5048 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5049 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5050 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5051 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5052 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5053 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5054 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5055 SmallVector<Value *, 16> Args(CI->args());
5056
5057 // Create AddrSpaceCast to shared_cluster if needed.
5058 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5059 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5061 Args[0] = Builder.CreateAddrSpaceCast(
5062 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5063
5064 // Attach the flag argument for cta_group, with a
5065 // default value of 0. This handles case (2) in
5066 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5067 size_t NumArgs = CI->arg_size();
5068 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5069 if (!FlagArg->getType()->isIntegerTy(1))
5070 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5071
5072 NewCall = Builder.CreateCall(NewFn, Args);
5073 NewCall->takeName(CI);
5074 CI->replaceAllUsesWith(NewCall);
5075 CI->eraseFromParent();
5076 return;
5077 }
5078 case Intrinsic::riscv_sha256sig0:
5079 case Intrinsic::riscv_sha256sig1:
5080 case Intrinsic::riscv_sha256sum0:
5081 case Intrinsic::riscv_sha256sum1:
5082 case Intrinsic::riscv_sm3p0:
5083 case Intrinsic::riscv_sm3p1: {
5084 // The last argument to these intrinsics used to be i8 and changed to i32.
5085 // The type overload for sm4ks and sm4ed was removed.
5086 if (!CI->getType()->isIntegerTy(64))
5087 return;
5088
5089 Value *Arg =
5090 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5091
5092 NewCall = Builder.CreateCall(NewFn, Arg);
5093 Value *Res =
5094 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5095 NewCall->takeName(CI);
5096 CI->replaceAllUsesWith(Res);
5097 CI->eraseFromParent();
5098 return;
5099 }
5100
5101 case Intrinsic::x86_xop_vfrcz_ss:
5102 case Intrinsic::x86_xop_vfrcz_sd:
5103 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5104 break;
5105
5106 case Intrinsic::x86_xop_vpermil2pd:
5107 case Intrinsic::x86_xop_vpermil2ps:
5108 case Intrinsic::x86_xop_vpermil2pd_256:
5109 case Intrinsic::x86_xop_vpermil2ps_256: {
5110 SmallVector<Value *, 4> Args(CI->args());
5111 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5112 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5113 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5114 NewCall = Builder.CreateCall(NewFn, Args);
5115 break;
5116 }
5117
5118 case Intrinsic::x86_sse41_ptestc:
5119 case Intrinsic::x86_sse41_ptestz:
5120 case Intrinsic::x86_sse41_ptestnzc: {
5121 // The arguments for these intrinsics used to be v4f32, and changed
5122 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5123 // So, the only thing required is a bitcast for both arguments.
5124 // First, check the arguments have the old type.
5125 Value *Arg0 = CI->getArgOperand(0);
5126 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5127 return;
5128
5129 // Old intrinsic, add bitcasts
5130 Value *Arg1 = CI->getArgOperand(1);
5131
5132 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5133
5134 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5135 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5136
5137 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5138 break;
5139 }
5140
5141 case Intrinsic::x86_rdtscp: {
5142 // This used to take 1 arguments. If we have no arguments, it is already
5143 // upgraded.
5144 if (CI->getNumOperands() == 0)
5145 return;
5146
5147 NewCall = Builder.CreateCall(NewFn);
5148 // Extract the second result and store it.
5149 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5150 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5151 // Replace the original call result with the first result of the new call.
5152 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5153
5154 NewCall->takeName(CI);
5155 CI->replaceAllUsesWith(TSC);
5156 CI->eraseFromParent();
5157 return;
5158 }
5159
5160 case Intrinsic::x86_sse41_insertps:
5161 case Intrinsic::x86_sse41_dppd:
5162 case Intrinsic::x86_sse41_dpps:
5163 case Intrinsic::x86_sse41_mpsadbw:
5164 case Intrinsic::x86_avx_dp_ps_256:
5165 case Intrinsic::x86_avx2_mpsadbw: {
5166 // Need to truncate the last argument from i32 to i8 -- this argument models
5167 // an inherently 8-bit immediate operand to these x86 instructions.
5168 SmallVector<Value *, 4> Args(CI->args());
5169
5170 // Replace the last argument with a trunc.
5171 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5172 NewCall = Builder.CreateCall(NewFn, Args);
5173 break;
5174 }
5175
5176 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5177 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5178 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5179 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5180 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5181 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5182 SmallVector<Value *, 4> Args(CI->args());
5183 unsigned NumElts =
5184 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5185 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5186
5187 NewCall = Builder.CreateCall(NewFn, Args);
5188 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5189
5190 NewCall->takeName(CI);
5191 CI->replaceAllUsesWith(Res);
5192 CI->eraseFromParent();
5193 return;
5194 }
5195
5196 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5197 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5198 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5199 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5200 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5201 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5202 SmallVector<Value *, 4> Args(CI->args());
5203 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5204 if (NewFn->getIntrinsicID() ==
5205 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5206 Args[1] = Builder.CreateBitCast(
5207 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5208
5209 NewCall = Builder.CreateCall(NewFn, Args);
5210 Value *Res = Builder.CreateBitCast(
5211 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5212
5213 NewCall->takeName(CI);
5214 CI->replaceAllUsesWith(Res);
5215 CI->eraseFromParent();
5216 return;
5217 }
5218 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5219 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5220 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5221 SmallVector<Value *, 4> Args(CI->args());
5222 unsigned NumElts =
5223 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5224 Args[1] = Builder.CreateBitCast(
5225 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5226 Args[2] = Builder.CreateBitCast(
5227 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5228
5229 NewCall = Builder.CreateCall(NewFn, Args);
5230 break;
5231 }
5232
5233 case Intrinsic::thread_pointer: {
5234 NewCall = Builder.CreateCall(NewFn, {});
5235 break;
5236 }
5237
5238 case Intrinsic::memcpy:
5239 case Intrinsic::memmove:
5240 case Intrinsic::memset: {
5241 // We have to make sure that the call signature is what we're expecting.
5242 // We only want to change the old signatures by removing the alignment arg:
5243 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5244 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5245 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5246 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5247 // Note: i8*'s in the above can be any pointer type
5248 if (CI->arg_size() != 5) {
5249 DefaultCase();
5250 return;
5251 }
5252 // Remove alignment argument (3), and add alignment attributes to the
5253 // dest/src pointers.
5254 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5255 CI->getArgOperand(2), CI->getArgOperand(4)};
5256 NewCall = Builder.CreateCall(NewFn, Args);
5257 AttributeList OldAttrs = CI->getAttributes();
5258 AttributeList NewAttrs = AttributeList::get(
5259 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5260 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5261 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5262 NewCall->setAttributes(NewAttrs);
5263 auto *MemCI = cast<MemIntrinsic>(NewCall);
5264 // All mem intrinsics support dest alignment.
5266 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5267 // Memcpy/Memmove also support source alignment.
5268 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5269 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5270 break;
5271 }
5272
5273 case Intrinsic::masked_load:
5274 case Intrinsic::masked_gather:
5275 case Intrinsic::masked_store:
5276 case Intrinsic::masked_scatter: {
5277 if (CI->arg_size() != 4) {
5278 DefaultCase();
5279 return;
5280 }
5281
5282 auto GetMaybeAlign = [](Value *Op) {
5283 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5284 uint64_t Val = CI->getZExtValue();
5285 if (Val == 0)
5286 return MaybeAlign();
5287 if (isPowerOf2_64(Val))
5288 return MaybeAlign(Val);
5289 }
5290 reportFatalUsageError("Invalid alignment argument");
5291 };
5292 auto GetAlign = [&](Value *Op) {
5293 MaybeAlign Align = GetMaybeAlign(Op);
5294 if (Align)
5295 return *Align;
5296 reportFatalUsageError("Invalid zero alignment argument");
5297 };
5298
5299 const DataLayout &DL = CI->getDataLayout();
5300 switch (NewFn->getIntrinsicID()) {
5301 case Intrinsic::masked_load:
5302 NewCall = Builder.CreateMaskedLoad(
5303 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5304 CI->getArgOperand(2), CI->getArgOperand(3));
5305 break;
5306 case Intrinsic::masked_gather:
5307 NewCall = Builder.CreateMaskedGather(
5308 CI->getType(), CI->getArgOperand(0),
5309 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5310 CI->getType()->getScalarType()),
5311 CI->getArgOperand(2), CI->getArgOperand(3));
5312 break;
5313 case Intrinsic::masked_store:
5314 NewCall = Builder.CreateMaskedStore(
5315 CI->getArgOperand(0), CI->getArgOperand(1),
5316 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5317 break;
5318 case Intrinsic::masked_scatter:
5319 NewCall = Builder.CreateMaskedScatter(
5320 CI->getArgOperand(0), CI->getArgOperand(1),
5321 DL.getValueOrABITypeAlignment(
5322 GetMaybeAlign(CI->getArgOperand(2)),
5323 CI->getArgOperand(0)->getType()->getScalarType()),
5324 CI->getArgOperand(3));
5325 break;
5326 default:
5327 llvm_unreachable("Unexpected intrinsic ID");
5328 }
5329 // Previous metadata is still valid.
5330 NewCall->copyMetadata(*CI);
5331 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5332 break;
5333 }
5334
5335 case Intrinsic::lifetime_start:
5336 case Intrinsic::lifetime_end: {
5337 if (CI->arg_size() != 2) {
5338 DefaultCase();
5339 return;
5340 }
5341
5342 Value *Ptr = CI->getArgOperand(1);
5343 // Try to strip pointer casts, such that the lifetime works on an alloca.
5344 Ptr = Ptr->stripPointerCasts();
5345 if (isa<AllocaInst>(Ptr)) {
5346 // Don't use NewFn, as we might have looked through an addrspacecast.
5347 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5348 NewCall = Builder.CreateLifetimeStart(Ptr);
5349 else
5350 NewCall = Builder.CreateLifetimeEnd(Ptr);
5351 break;
5352 }
5353
5354 // Otherwise remove the lifetime marker.
5355 CI->eraseFromParent();
5356 return;
5357 }
5358
5359 case Intrinsic::x86_avx512_vpdpbusd_128:
5360 case Intrinsic::x86_avx512_vpdpbusd_256:
5361 case Intrinsic::x86_avx512_vpdpbusd_512:
5362 case Intrinsic::x86_avx512_vpdpbusds_128:
5363 case Intrinsic::x86_avx512_vpdpbusds_256:
5364 case Intrinsic::x86_avx512_vpdpbusds_512:
5365 case Intrinsic::x86_avx2_vpdpbssd_128:
5366 case Intrinsic::x86_avx2_vpdpbssd_256:
5367 case Intrinsic::x86_avx10_vpdpbssd_512:
5368 case Intrinsic::x86_avx2_vpdpbssds_128:
5369 case Intrinsic::x86_avx2_vpdpbssds_256:
5370 case Intrinsic::x86_avx10_vpdpbssds_512:
5371 case Intrinsic::x86_avx2_vpdpbsud_128:
5372 case Intrinsic::x86_avx2_vpdpbsud_256:
5373 case Intrinsic::x86_avx10_vpdpbsud_512:
5374 case Intrinsic::x86_avx2_vpdpbsuds_128:
5375 case Intrinsic::x86_avx2_vpdpbsuds_256:
5376 case Intrinsic::x86_avx10_vpdpbsuds_512:
5377 case Intrinsic::x86_avx2_vpdpbuud_128:
5378 case Intrinsic::x86_avx2_vpdpbuud_256:
5379 case Intrinsic::x86_avx10_vpdpbuud_512:
5380 case Intrinsic::x86_avx2_vpdpbuuds_128:
5381 case Intrinsic::x86_avx2_vpdpbuuds_256:
5382 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5383 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5384 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5385 CI->getArgOperand(2)};
5386 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5387 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5388 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5389
5390 NewCall = Builder.CreateCall(NewFn, Args);
5391 break;
5392 }
5393 }
5394 assert(NewCall && "Should have either set this variable or returned through "
5395 "the default case");
5396 NewCall->takeName(CI);
5397 CI->replaceAllUsesWith(NewCall);
5398 CI->eraseFromParent();
5399}
5400
5402 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5403
5404 // Check if this function should be upgraded and get the replacement function
5405 // if there is one.
5406 Function *NewFn;
5407 if (UpgradeIntrinsicFunction(F, NewFn)) {
5408 // Replace all users of the old function with the new function or new
5409 // instructions. This is not a range loop because the call is deleted.
5410 for (User *U : make_early_inc_range(F->users()))
5411 if (CallBase *CB = dyn_cast<CallBase>(U))
5412 UpgradeIntrinsicCall(CB, NewFn);
5413
5414 // Remove old function, no longer used, from the module.
5415 if (F != NewFn)
5416 F->eraseFromParent();
5417 }
5418}
5419
5421 const unsigned NumOperands = MD.getNumOperands();
5422 if (NumOperands == 0)
5423 return &MD; // Invalid, punt to a verifier error.
5424
5425 // Check if the tag uses struct-path aware TBAA format.
5426 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5427 return &MD;
5428
5429 auto &Context = MD.getContext();
5430 if (NumOperands == 3) {
5431 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5432 MDNode *ScalarType = MDNode::get(Context, Elts);
5433 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5434 Metadata *Elts2[] = {ScalarType, ScalarType,
5437 MD.getOperand(2)};
5438 return MDNode::get(Context, Elts2);
5439 }
5440 // Create a MDNode <MD, MD, offset 0>
5442 Type::getInt64Ty(Context)))};
5443 return MDNode::get(Context, Elts);
5444}
5445
5447 Instruction *&Temp) {
5448 if (Opc != Instruction::BitCast)
5449 return nullptr;
5450
5451 Temp = nullptr;
5452 Type *SrcTy = V->getType();
5453 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5454 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5455 LLVMContext &Context = V->getContext();
5456
5457 // We have no information about target data layout, so we assume that
5458 // the maximum pointer size is 64bit.
5459 Type *MidTy = Type::getInt64Ty(Context);
5460 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5461
5462 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5463 }
5464
5465 return nullptr;
5466}
5467
5469 if (Opc != Instruction::BitCast)
5470 return nullptr;
5471
5472 Type *SrcTy = C->getType();
5473 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5474 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5475 LLVMContext &Context = C->getContext();
5476
5477 // We have no information about target data layout, so we assume that
5478 // the maximum pointer size is 64bit.
5479 Type *MidTy = Type::getInt64Ty(Context);
5480
5482 DestTy);
5483 }
5484
5485 return nullptr;
5486}
5487
5488/// Check the debug info version number, if it is out-dated, drop the debug
5489/// info. Return true if module is modified.
5492 return false;
5493
5494 llvm::TimeTraceScope timeScope("Upgrade debug info");
5495 // We need to get metadata before the module is verified (i.e., getModuleFlag
5496 // makes assumptions that we haven't verified yet). Carefully extract the flag
5497 // from the metadata.
5498 unsigned Version = 0;
5499 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5500 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5501 if (Flag->getNumOperands() < 3)
5502 return false;
5503 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5504 return K->getString() == "Debug Info Version";
5505 return false;
5506 });
5507 if (OpIt != ModFlags->op_end()) {
5508 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5509 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5510 Version = CI->getZExtValue();
5511 }
5512 }
5513
5515 bool BrokenDebugInfo = false;
5516 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5517 report_fatal_error("Broken module found, compilation aborted!");
5518 if (!BrokenDebugInfo)
5519 // Everything is ok.
5520 return false;
5521 else {
5522 // Diagnose malformed debug info.
5524 M.getContext().diagnose(Diag);
5525 }
5526 }
5527 bool Modified = StripDebugInfo(M);
5529 // Diagnose a version mismatch.
5531 M.getContext().diagnose(DiagVersion);
5532 }
5533 return Modified;
5534}
5535
5536static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5537 GlobalValue *GV, const Metadata *V) {
5538 Function *F = cast<Function>(GV);
5539
5540 constexpr StringLiteral DefaultValue = "1";
5541 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5542 unsigned Length = 0;
5543
5544 if (F->hasFnAttribute(Attr)) {
5545 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5546 // parse these elements placing them into Vect3
5547 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5548 for (; Length < 3 && !S.empty(); Length++) {
5549 auto [Part, Rest] = S.split(',');
5550 Vect3[Length] = Part.trim();
5551 S = Rest;
5552 }
5553 }
5554
5555 const unsigned Dim = DimC - 'x';
5556 assert(Dim < 3 && "Unexpected dim char");
5557
5558 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5559
5560 // local variable required for StringRef in Vect3 to point to.
5561 const std::string VStr = llvm::utostr(VInt);
5562 Vect3[Dim] = VStr;
5563 Length = std::max(Length, Dim + 1);
5564
5565 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5566 F->addFnAttr(Attr, NewAttr);
5567}
5568
5569static inline bool isXYZ(StringRef S) {
5570 return S == "x" || S == "y" || S == "z";
5571}
5572
5574 const Metadata *V) {
5575 if (K == "kernel") {
5577 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5578 return true;
5579 }
5580 if (K == "align") {
5581 // V is a bitfeild specifying two 16-bit values. The alignment value is
5582 // specfied in low 16-bits, The index is specified in the high bits. For the
5583 // index, 0 indicates the return value while higher values correspond to
5584 // each parameter (idx = param + 1).
5585 const uint64_t AlignIdxValuePair =
5586 mdconst::extract<ConstantInt>(V)->getZExtValue();
5587 const unsigned Idx = (AlignIdxValuePair >> 16);
5588 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5589 cast<Function>(GV)->addAttributeAtIndex(
5590 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5591 return true;
5592 }
5593 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5594 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5595 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5596 return true;
5597 }
5598 if (K == "minctasm") {
5599 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5600 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5601 return true;
5602 }
5603 if (K == "maxnreg") {
5604 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5605 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5606 return true;
5607 }
5608 if (K.consume_front("maxntid") && isXYZ(K)) {
5609 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5610 return true;
5611 }
5612 if (K.consume_front("reqntid") && isXYZ(K)) {
5613 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5614 return true;
5615 }
5616 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5617 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5618 return true;
5619 }
5620 if (K == "grid_constant") {
5621 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5622 for (const auto &Op : cast<MDNode>(V)->operands()) {
5623 // For some reason, the index is 1-based in the metadata. Good thing we're
5624 // able to auto-upgrade it!
5625 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5626 cast<Function>(GV)->addParamAttr(Index, Attr);
5627 }
5628 return true;
5629 }
5630
5631 return false;
5632}
5633
5635 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5636 if (!NamedMD)
5637 return;
5638
5639 SmallVector<MDNode *, 8> NewNodes;
5641 for (MDNode *MD : NamedMD->operands()) {
5642 if (!SeenNodes.insert(MD).second)
5643 continue;
5644
5645 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5646 if (!GV)
5647 continue;
5648
5649 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5650
5651 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5652 // Each nvvm.annotations metadata entry will be of the following form:
5653 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5654 // start index = 1, to skip the global variable key
5655 // increment = 2, to skip the value for each property-value pairs
5656 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5657 MDString *K = cast<MDString>(MD->getOperand(j));
5658 const MDOperand &V = MD->getOperand(j + 1);
5659 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5660 if (!Upgraded)
5661 NewOperands.append({K, V});
5662 }
5663
5664 if (NewOperands.size() > 1)
5665 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5666 }
5667
5668 NamedMD->clearOperands();
5669 for (MDNode *N : NewNodes)
5670 NamedMD->addOperand(N);
5671}
5672
5673/// This checks for objc retain release marker which should be upgraded. It
5674/// returns true if module is modified.
5676 bool Changed = false;
5677 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5678 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5679 if (ModRetainReleaseMarker) {
5680 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5681 if (Op) {
5682 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5683 if (ID) {
5684 SmallVector<StringRef, 4> ValueComp;
5685 ID->getString().split(ValueComp, "#");
5686 if (ValueComp.size() == 2) {
5687 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5688 ID = MDString::get(M.getContext(), NewValue);
5689 }
5690 M.addModuleFlag(Module::Error, MarkerKey, ID);
5691 M.eraseNamedMetadata(ModRetainReleaseMarker);
5692 Changed = true;
5693 }
5694 }
5695 }
5696 return Changed;
5697}
5698
5700 // This lambda converts normal function calls to ARC runtime functions to
5701 // intrinsic calls.
5702 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5703 llvm::Intrinsic::ID IntrinsicFunc) {
5704 Function *Fn = M.getFunction(OldFunc);
5705
5706 if (!Fn)
5707 return;
5708
5709 Function *NewFn =
5710 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5711
5712 for (User *U : make_early_inc_range(Fn->users())) {
5714 if (!CI || CI->getCalledFunction() != Fn)
5715 continue;
5716
5717 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5718 FunctionType *NewFuncTy = NewFn->getFunctionType();
5720
5721 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5722 // value to the return type of the old function.
5723 if (NewFuncTy->getReturnType() != CI->getType() &&
5724 !CastInst::castIsValid(Instruction::BitCast, CI,
5725 NewFuncTy->getReturnType()))
5726 continue;
5727
5728 bool InvalidCast = false;
5729
5730 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5731 Value *Arg = CI->getArgOperand(I);
5732
5733 // Bitcast argument to the parameter type of the new function if it's
5734 // not a variadic argument.
5735 if (I < NewFuncTy->getNumParams()) {
5736 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5737 // to the parameter type of the new function.
5738 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5739 NewFuncTy->getParamType(I))) {
5740 InvalidCast = true;
5741 break;
5742 }
5743 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5744 }
5745 Args.push_back(Arg);
5746 }
5747
5748 if (InvalidCast)
5749 continue;
5750
5751 // Create a call instruction that calls the new function.
5752 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5753 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5754 NewCall->takeName(CI);
5755
5756 // Bitcast the return value back to the type of the old call.
5757 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5758
5759 if (!CI->use_empty())
5760 CI->replaceAllUsesWith(NewRetVal);
5761 CI->eraseFromParent();
5762 }
5763
5764 if (Fn->use_empty())
5765 Fn->eraseFromParent();
5766 };
5767
5768 // Unconditionally convert a call to "clang.arc.use" to a call to
5769 // "llvm.objc.clang.arc.use".
5770 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5771
5772 // Upgrade the retain release marker. If there is no need to upgrade
5773 // the marker, that means either the module is already new enough to contain
5774 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5776 return;
5777
5778 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5779 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5780 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5781 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5782 {"objc_autoreleaseReturnValue",
5783 llvm::Intrinsic::objc_autoreleaseReturnValue},
5784 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5785 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5786 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5787 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5788 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5789 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5790 {"objc_release", llvm::Intrinsic::objc_release},
5791 {"objc_retain", llvm::Intrinsic::objc_retain},
5792 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5793 {"objc_retainAutoreleaseReturnValue",
5794 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5795 {"objc_retainAutoreleasedReturnValue",
5796 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5797 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5798 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5799 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5800 {"objc_unsafeClaimAutoreleasedReturnValue",
5801 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5802 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5803 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5804 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5805 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5806 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5807 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5808 {"objc_arc_annotation_topdown_bbstart",
5809 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5810 {"objc_arc_annotation_topdown_bbend",
5811 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5812 {"objc_arc_annotation_bottomup_bbstart",
5813 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5814 {"objc_arc_annotation_bottomup_bbend",
5815 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5816
5817 for (auto &I : RuntimeFuncs)
5818 UpgradeToIntrinsic(I.first, I.second);
5819}
5820
5822 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5823 if (!ModFlags)
5824 return false;
5825
5826 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5827 bool HasSwiftVersionFlag = false;
5828 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5829 uint32_t SwiftABIVersion;
5830 auto Int8Ty = Type::getInt8Ty(M.getContext());
5831 auto Int32Ty = Type::getInt32Ty(M.getContext());
5832
5833 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5834 MDNode *Op = ModFlags->getOperand(I);
5835 if (Op->getNumOperands() != 3)
5836 continue;
5837 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5838 if (!ID)
5839 continue;
5840 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5841 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5842 Type::getInt32Ty(M.getContext()), B)),
5843 MDString::get(M.getContext(), ID->getString()),
5844 Op->getOperand(2)};
5845 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5846 Changed = true;
5847 };
5848
5849 if (ID->getString() == "Objective-C Image Info Version")
5850 HasObjCFlag = true;
5851 if (ID->getString() == "Objective-C Class Properties")
5852 HasClassProperties = true;
5853 // Upgrade PIC from Error/Max to Min.
5854 if (ID->getString() == "PIC Level") {
5855 if (auto *Behavior =
5857 uint64_t V = Behavior->getLimitedValue();
5858 if (V == Module::Error || V == Module::Max)
5859 SetBehavior(Module::Min);
5860 }
5861 }
5862 // Upgrade "PIE Level" from Error to Max.
5863 if (ID->getString() == "PIE Level")
5864 if (auto *Behavior =
5866 if (Behavior->getLimitedValue() == Module::Error)
5867 SetBehavior(Module::Max);
5868
5869 // Upgrade branch protection and return address signing module flags. The
5870 // module flag behavior for these fields were Error and now they are Min.
5871 if (ID->getString() == "branch-target-enforcement" ||
5872 ID->getString().starts_with("sign-return-address")) {
5873 if (auto *Behavior =
5875 if (Behavior->getLimitedValue() == Module::Error) {
5876 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5877 Metadata *Ops[3] = {
5878 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5879 Op->getOperand(1), Op->getOperand(2)};
5880 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5881 Changed = true;
5882 }
5883 }
5884 }
5885
5886 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5887 // section name so that llvm-lto will not complain about mismatching
5888 // module flags that is functionally the same.
5889 if (ID->getString() == "Objective-C Image Info Section") {
5890 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5891 SmallVector<StringRef, 4> ValueComp;
5892 Value->getString().split(ValueComp, " ");
5893 if (ValueComp.size() != 1) {
5894 std::string NewValue;
5895 for (auto &S : ValueComp)
5896 NewValue += S.str();
5897 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5898 MDString::get(M.getContext(), NewValue)};
5899 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5900 Changed = true;
5901 }
5902 }
5903 }
5904
5905 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5906 // If the higher bits are set, it adds new module flag for swift info.
5907 if (ID->getString() == "Objective-C Garbage Collection") {
5908 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5909 if (Md) {
5910 assert(Md->getValue() && "Expected non-empty metadata");
5911 auto Type = Md->getValue()->getType();
5912 if (Type == Int8Ty)
5913 continue;
5914 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5915 if ((Val & 0xff) != Val) {
5916 HasSwiftVersionFlag = true;
5917 SwiftABIVersion = (Val & 0xff00) >> 8;
5918 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5919 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5920 }
5921 Metadata *Ops[3] = {
5923 Op->getOperand(1),
5924 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5925 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5926 Changed = true;
5927 }
5928 }
5929
5930 if (ID->getString() == "amdgpu_code_object_version") {
5931 Metadata *Ops[3] = {
5932 Op->getOperand(0),
5933 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5934 Op->getOperand(2)};
5935 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5936 Changed = true;
5937 }
5938 }
5939
5940 // "Objective-C Class Properties" is recently added for Objective-C. We
5941 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5942 // flag of value 0, so we can correclty downgrade this flag when trying to
5943 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5944 // this module flag.
5945 if (HasObjCFlag && !HasClassProperties) {
5946 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5947 (uint32_t)0);
5948 Changed = true;
5949 }
5950
5951 if (HasSwiftVersionFlag) {
5952 M.addModuleFlag(Module::Error, "Swift ABI Version",
5953 SwiftABIVersion);
5954 M.addModuleFlag(Module::Error, "Swift Major Version",
5955 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5956 M.addModuleFlag(Module::Error, "Swift Minor Version",
5957 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5958 Changed = true;
5959 }
5960
5961 return Changed;
5962}
5963
5965 auto TrimSpaces = [](StringRef Section) -> std::string {
5966 SmallVector<StringRef, 5> Components;
5967 Section.split(Components, ',');
5968
5969 SmallString<32> Buffer;
5970 raw_svector_ostream OS(Buffer);
5971
5972 for (auto Component : Components)
5973 OS << ',' << Component.trim();
5974
5975 return std::string(OS.str().substr(1));
5976 };
5977
5978 for (auto &GV : M.globals()) {
5979 if (!GV.hasSection())
5980 continue;
5981
5982 StringRef Section = GV.getSection();
5983
5984 if (!Section.starts_with("__DATA, __objc_catlist"))
5985 continue;
5986
5987 // __DATA, __objc_catlist, regular, no_dead_strip
5988 // __DATA,__objc_catlist,regular,no_dead_strip
5989 GV.setSection(TrimSpaces(Section));
5990 }
5991}
5992
5993namespace {
5994// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5995// callsites within a function that did not also have the strictfp attribute.
5996// Since 10.0, if strict FP semantics are needed within a function, the
5997// function must have the strictfp attribute and all calls within the function
5998// must also have the strictfp attribute. This latter restriction is
5999// necessary to prevent unwanted libcall simplification when a function is
6000// being cloned (such as for inlining).
6001//
6002// The "dangling" strictfp attribute usage was only used to prevent constant
6003// folding and other libcall simplification. The nobuiltin attribute on the
6004// callsite has the same effect.
6005struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6006 StrictFPUpgradeVisitor() = default;
6007
6008 void visitCallBase(CallBase &Call) {
6009 if (!Call.isStrictFP())
6010 return;
6012 return;
6013 // If we get here, the caller doesn't have the strictfp attribute
6014 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6015 Call.removeFnAttr(Attribute::StrictFP);
6016 Call.addFnAttr(Attribute::NoBuiltin);
6017 }
6018};
6019
6020/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6021struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6022 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6023 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6024
6025 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6026 if (!RMW.isFloatingPointOperation())
6027 return;
6028
6029 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6030 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6031 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6032 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6033 }
6034};
6035} // namespace
6036
6038 // If a function definition doesn't have the strictfp attribute,
6039 // convert any callsite strictfp attributes to nobuiltin.
6040 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6041 StrictFPUpgradeVisitor SFPV;
6042 SFPV.visit(F);
6043 }
6044
6045 // Remove all incompatibile attributes from function.
6046 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6047 F.getReturnType(), F.getAttributes().getRetAttrs()));
6048 for (auto &Arg : F.args())
6049 Arg.removeAttrs(
6050 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6051
6052 // Older versions of LLVM treated an "implicit-section-name" attribute
6053 // similarly to directly setting the section on a Function.
6054 if (Attribute A = F.getFnAttribute("implicit-section-name");
6055 A.isValid() && A.isStringAttribute()) {
6056 F.setSection(A.getValueAsString());
6057 F.removeFnAttr("implicit-section-name");
6058 }
6059
6060 if (!F.empty()) {
6061 // For some reason this is called twice, and the first time is before any
6062 // instructions are loaded into the body.
6063
6064 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6065 A.isValid()) {
6066
6067 if (A.getValueAsBool()) {
6068 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6069 Visitor.visit(F);
6070 }
6071
6072 // We will leave behind dead attribute uses on external declarations, but
6073 // clang never added these to declarations anyway.
6074 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6075 }
6076 }
6077}
6078
6079// Check if the function attribute is not present and set it.
6081 StringRef Value) {
6082 if (!F.hasFnAttribute(FnAttrName))
6083 F.addFnAttr(FnAttrName, Value);
6084}
6085
6086// Check if the function attribute is not present and set it if needed.
6087// If the attribute is "false" then removes it.
6088// If the attribute is "true" resets it to a valueless attribute.
6089static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6090 if (!F.hasFnAttribute(FnAttrName)) {
6091 if (Set)
6092 F.addFnAttr(FnAttrName);
6093 } else {
6094 auto A = F.getFnAttribute(FnAttrName);
6095 if ("false" == A.getValueAsString())
6096 F.removeFnAttr(FnAttrName);
6097 else if ("true" == A.getValueAsString()) {
6098 F.removeFnAttr(FnAttrName);
6099 F.addFnAttr(FnAttrName);
6100 }
6101 }
6102}
6103
6105 Triple T(M.getTargetTriple());
6106 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6107 return;
6108
6109 uint64_t BTEValue = 0;
6110 uint64_t BPPLRValue = 0;
6111 uint64_t GCSValue = 0;
6112 uint64_t SRAValue = 0;
6113 uint64_t SRAALLValue = 0;
6114 uint64_t SRABKeyValue = 0;
6115
6116 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6117 if (ModFlags) {
6118 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6119 MDNode *Op = ModFlags->getOperand(I);
6120 if (Op->getNumOperands() != 3)
6121 continue;
6122
6123 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6124 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6125 if (!ID || !CI)
6126 continue;
6127
6128 StringRef IDStr = ID->getString();
6129 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6130 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6131 : IDStr == "guarded-control-stack" ? &GCSValue
6132 : IDStr == "sign-return-address" ? &SRAValue
6133 : IDStr == "sign-return-address-all" ? &SRAALLValue
6134 : IDStr == "sign-return-address-with-bkey"
6135 ? &SRABKeyValue
6136 : nullptr;
6137 if (!ValPtr)
6138 continue;
6139
6140 *ValPtr = CI->getZExtValue();
6141 if (*ValPtr == 2)
6142 return;
6143 }
6144 }
6145
6146 bool BTE = BTEValue == 1;
6147 bool BPPLR = BPPLRValue == 1;
6148 bool GCS = GCSValue == 1;
6149 bool SRA = SRAValue == 1;
6150
6151 StringRef SignTypeValue = "non-leaf";
6152 if (SRA && SRAALLValue == 1)
6153 SignTypeValue = "all";
6154
6155 StringRef SignKeyValue = "a_key";
6156 if (SRA && SRABKeyValue == 1)
6157 SignKeyValue = "b_key";
6158
6159 for (Function &F : M.getFunctionList()) {
6160 if (F.isDeclaration())
6161 continue;
6162
6163 if (SRA) {
6164 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6165 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6166 } else {
6167 if (auto A = F.getFnAttribute("sign-return-address");
6168 A.isValid() && "none" == A.getValueAsString()) {
6169 F.removeFnAttr("sign-return-address");
6170 F.removeFnAttr("sign-return-address-key");
6171 }
6172 }
6173 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6174 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6175 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6176 }
6177
6178 if (BTE)
6179 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6180 if (BPPLR)
6181 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6182 if (GCS)
6183 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6184 if (SRA) {
6185 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6186 if (SRAALLValue == 1)
6187 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6188 if (SRABKeyValue == 1)
6189 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6190 }
6191}
6192
6193static bool isOldLoopArgument(Metadata *MD) {
6194 auto *T = dyn_cast_or_null<MDTuple>(MD);
6195 if (!T)
6196 return false;
6197 if (T->getNumOperands() < 1)
6198 return false;
6199 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6200 if (!S)
6201 return false;
6202 return S->getString().starts_with("llvm.vectorizer.");
6203}
6204
6206 StringRef OldPrefix = "llvm.vectorizer.";
6207 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6208
6209 if (OldTag == "llvm.vectorizer.unroll")
6210 return MDString::get(C, "llvm.loop.interleave.count");
6211
6212 return MDString::get(
6213 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6214 .str());
6215}
6216
6218 auto *T = dyn_cast_or_null<MDTuple>(MD);
6219 if (!T)
6220 return MD;
6221 if (T->getNumOperands() < 1)
6222 return MD;
6223 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6224 if (!OldTag)
6225 return MD;
6226 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6227 return MD;
6228
6229 // This has an old tag. Upgrade it.
6231 Ops.reserve(T->getNumOperands());
6232 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6233 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6234 Ops.push_back(T->getOperand(I));
6235
6236 return MDTuple::get(T->getContext(), Ops);
6237}
6238
6240 auto *T = dyn_cast<MDTuple>(&N);
6241 if (!T)
6242 return &N;
6243
6244 if (none_of(T->operands(), isOldLoopArgument))
6245 return &N;
6246
6248 Ops.reserve(T->getNumOperands());
6249 for (Metadata *MD : T->operands())
6250 Ops.push_back(upgradeLoopArgument(MD));
6251
6252 return MDTuple::get(T->getContext(), Ops);
6253}
6254
6256 Triple T(TT);
6257 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6258 // the address space of globals to 1. This does not apply to SPIRV Logical.
6259 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6260 !DL.contains("-G") && !DL.starts_with("G")) {
6261 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6262 }
6263
6264 if (T.isLoongArch64() || T.isRISCV64()) {
6265 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6266 auto I = DL.find("-n64-");
6267 if (I != StringRef::npos)
6268 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6269 return DL.str();
6270 }
6271
6272 // AMDGPU data layout upgrades.
6273 std::string Res = DL.str();
6274 if (T.isAMDGPU()) {
6275 // Define address spaces for constants.
6276 if (!DL.contains("-G") && !DL.starts_with("G"))
6277 Res.append(Res.empty() ? "G1" : "-G1");
6278
6279 // AMDGCN data layout upgrades.
6280 if (T.isAMDGCN()) {
6281
6282 // Add missing non-integral declarations.
6283 // This goes before adding new address spaces to prevent incoherent string
6284 // values.
6285 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6286 Res.append("-ni:7:8:9");
6287 // Update ni:7 to ni:7:8:9.
6288 if (DL.ends_with("ni:7"))
6289 Res.append(":8:9");
6290 if (DL.ends_with("ni:7:8"))
6291 Res.append(":9");
6292
6293 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6294 // resources) An empty data layout has already been upgraded to G1 by now.
6295 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6296 Res.append("-p7:160:256:256:32");
6297 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6298 Res.append("-p8:128:128:128:48");
6299 constexpr StringRef OldP8("-p8:128:128-");
6300 if (DL.contains(OldP8))
6301 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6302 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6303 Res.append("-p9:192:256:256:32");
6304 }
6305
6306 // Upgrade the ELF mangling mode.
6307 if (!DL.contains("m:e"))
6308 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6309
6310 return Res;
6311 }
6312
6313 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6314 // If the datalayout matches the expected format, add pointer size address
6315 // spaces to the datalayout.
6316 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6317 if (!DL.contains(AddrSpaces)) {
6319 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6320 if (R.match(Res, &Groups))
6321 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6322 }
6323 };
6324
6325 // AArch64 data layout upgrades.
6326 if (T.isAArch64()) {
6327 // Add "-Fn32"
6328 if (!DL.empty() && !DL.contains("-Fn32"))
6329 Res.append("-Fn32");
6330 AddPtr32Ptr64AddrSpaces();
6331 return Res;
6332 }
6333
6334 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6335 T.isWasm()) {
6336 // Mips64 with o32 ABI did not add "-i128:128".
6337 // Add "-i128:128"
6338 std::string I64 = "-i64:64";
6339 std::string I128 = "-i128:128";
6340 if (!StringRef(Res).contains(I128)) {
6341 size_t Pos = Res.find(I64);
6342 if (Pos != size_t(-1))
6343 Res.insert(Pos + I64.size(), I128);
6344 }
6345 return Res;
6346 }
6347
6348 if (!T.isX86())
6349 return Res;
6350
6351 AddPtr32Ptr64AddrSpaces();
6352
6353 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6354 // for i128 operations prior to this being reflected in the data layout, and
6355 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6356 // boundaries, so although this is a breaking change, the upgrade is expected
6357 // to fix more IR than it breaks.
6358 // Intel MCU is an exception and uses 4-byte-alignment.
6359 if (!T.isOSIAMCU()) {
6360 std::string I128 = "-i128:128";
6361 if (StringRef Ref = Res; !Ref.contains(I128)) {
6363 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6364 if (R.match(Res, &Groups))
6365 Res = (Groups[1] + I128 + Groups[3]).str();
6366 }
6367 }
6368
6369 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6370 // Raising the alignment is safe because Clang did not produce f80 values in
6371 // the MSVC environment before this upgrade was added.
6372 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6373 StringRef Ref = Res;
6374 auto I = Ref.find("-f80:32-");
6375 if (I != StringRef::npos)
6376 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6377 }
6378
6379 return Res;
6380}
6381
6382void llvm::UpgradeAttributes(AttrBuilder &B) {
6383 StringRef FramePointer;
6384 Attribute A = B.getAttribute("no-frame-pointer-elim");
6385 if (A.isValid()) {
6386 // The value can be "true" or "false".
6387 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6388 B.removeAttribute("no-frame-pointer-elim");
6389 }
6390 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6391 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6392 if (FramePointer != "all")
6393 FramePointer = "non-leaf";
6394 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6395 }
6396 if (!FramePointer.empty())
6397 B.addAttribute("frame-pointer", FramePointer);
6398
6399 A = B.getAttribute("null-pointer-is-valid");
6400 if (A.isValid()) {
6401 // The value can be "true" or "false".
6402 bool NullPointerIsValid = A.getValueAsString() == "true";
6403 B.removeAttribute("null-pointer-is-valid");
6404 if (NullPointerIsValid)
6405 B.addAttribute(Attribute::NullPointerIsValid);
6406 }
6407}
6408
6409void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6410 // clang.arc.attachedcall bundles are now required to have an operand.
6411 // If they don't, it's okay to drop them entirely: when there is an operand,
6412 // the "attachedcall" is meaningful and required, but without an operand,
6413 // it's just a marker NOP. Dropping it merely prevents an optimization.
6414 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6415 return OBD.getTag() == "clang.arc.attachedcall" &&
6416 OBD.inputs().empty();
6417 });
6418}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:816
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2132
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106