LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
129 Function *&NewFn) {
130 if (F->getReturnType()->getScalarType()->isBFloatTy())
131 return false;
132
133 rename(F);
134 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
135 return true;
136}
137
139 Function *&NewFn) {
140 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
141 return false;
142
143 rename(F);
144 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
145 return true;
146}
147
149 // All of the intrinsics matches below should be marked with which llvm
150 // version started autoupgrading them. At some point in the future we would
151 // like to use this information to remove upgrade code for some older
152 // intrinsics. It is currently undecided how we will determine that future
153 // point.
154 if (Name.consume_front("avx."))
155 return (Name.starts_with("blend.p") || // Added in 3.7
156 Name == "cvt.ps2.pd.256" || // Added in 3.9
157 Name == "cvtdq2.pd.256" || // Added in 3.9
158 Name == "cvtdq2.ps.256" || // Added in 7.0
159 Name.starts_with("movnt.") || // Added in 3.2
160 Name.starts_with("sqrt.p") || // Added in 7.0
161 Name.starts_with("storeu.") || // Added in 3.9
162 Name.starts_with("vbroadcast.s") || // Added in 3.5
163 Name.starts_with("vbroadcastf128") || // Added in 4.0
164 Name.starts_with("vextractf128.") || // Added in 3.7
165 Name.starts_with("vinsertf128.") || // Added in 3.7
166 Name.starts_with("vperm2f128.") || // Added in 6.0
167 Name.starts_with("vpermil.")); // Added in 3.1
168
169 if (Name.consume_front("avx2."))
170 return (Name == "movntdqa" || // Added in 5.0
171 Name.starts_with("pabs.") || // Added in 6.0
172 Name.starts_with("padds.") || // Added in 8.0
173 Name.starts_with("paddus.") || // Added in 8.0
174 Name.starts_with("pblendd.") || // Added in 3.7
175 Name == "pblendw" || // Added in 3.7
176 Name.starts_with("pbroadcast") || // Added in 3.8
177 Name.starts_with("pcmpeq.") || // Added in 3.1
178 Name.starts_with("pcmpgt.") || // Added in 3.1
179 Name.starts_with("pmax") || // Added in 3.9
180 Name.starts_with("pmin") || // Added in 3.9
181 Name.starts_with("pmovsx") || // Added in 3.9
182 Name.starts_with("pmovzx") || // Added in 3.9
183 Name == "pmul.dq" || // Added in 7.0
184 Name == "pmulu.dq" || // Added in 7.0
185 Name.starts_with("psll.dq") || // Added in 3.7
186 Name.starts_with("psrl.dq") || // Added in 3.7
187 Name.starts_with("psubs.") || // Added in 8.0
188 Name.starts_with("psubus.") || // Added in 8.0
189 Name.starts_with("vbroadcast") || // Added in 3.8
190 Name == "vbroadcasti128" || // Added in 3.7
191 Name == "vextracti128" || // Added in 3.7
192 Name == "vinserti128" || // Added in 3.7
193 Name == "vperm2i128"); // Added in 6.0
194
195 if (Name.consume_front("avx512.")) {
196 if (Name.consume_front("mask."))
197 // 'avx512.mask.*'
198 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
199 Name.starts_with("and.") || // Added in 3.9
200 Name.starts_with("andn.") || // Added in 3.9
201 Name.starts_with("broadcast.s") || // Added in 3.9
202 Name.starts_with("broadcastf32x4.") || // Added in 6.0
203 Name.starts_with("broadcastf32x8.") || // Added in 6.0
204 Name.starts_with("broadcastf64x2.") || // Added in 6.0
205 Name.starts_with("broadcastf64x4.") || // Added in 6.0
206 Name.starts_with("broadcasti32x4.") || // Added in 6.0
207 Name.starts_with("broadcasti32x8.") || // Added in 6.0
208 Name.starts_with("broadcasti64x2.") || // Added in 6.0
209 Name.starts_with("broadcasti64x4.") || // Added in 6.0
210 Name.starts_with("cmp.b") || // Added in 5.0
211 Name.starts_with("cmp.d") || // Added in 5.0
212 Name.starts_with("cmp.q") || // Added in 5.0
213 Name.starts_with("cmp.w") || // Added in 5.0
214 Name.starts_with("compress.b") || // Added in 9.0
215 Name.starts_with("compress.d") || // Added in 9.0
216 Name.starts_with("compress.p") || // Added in 9.0
217 Name.starts_with("compress.q") || // Added in 9.0
218 Name.starts_with("compress.store.") || // Added in 7.0
219 Name.starts_with("compress.w") || // Added in 9.0
220 Name.starts_with("conflict.") || // Added in 9.0
221 Name.starts_with("cvtdq2pd.") || // Added in 4.0
222 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
223 Name == "cvtpd2dq.256" || // Added in 7.0
224 Name == "cvtpd2ps.256" || // Added in 7.0
225 Name == "cvtps2pd.128" || // Added in 7.0
226 Name == "cvtps2pd.256" || // Added in 7.0
227 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
228 Name == "cvtqq2ps.256" || // Added in 9.0
229 Name == "cvtqq2ps.512" || // Added in 9.0
230 Name == "cvttpd2dq.256" || // Added in 7.0
231 Name == "cvttps2dq.128" || // Added in 7.0
232 Name == "cvttps2dq.256" || // Added in 7.0
233 Name.starts_with("cvtudq2pd.") || // Added in 4.0
234 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
235 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
236 Name == "cvtuqq2ps.256" || // Added in 9.0
237 Name == "cvtuqq2ps.512" || // Added in 9.0
238 Name.starts_with("dbpsadbw.") || // Added in 7.0
239 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
240 Name.starts_with("expand.b") || // Added in 9.0
241 Name.starts_with("expand.d") || // Added in 9.0
242 Name.starts_with("expand.load.") || // Added in 7.0
243 Name.starts_with("expand.p") || // Added in 9.0
244 Name.starts_with("expand.q") || // Added in 9.0
245 Name.starts_with("expand.w") || // Added in 9.0
246 Name.starts_with("fpclass.p") || // Added in 7.0
247 Name.starts_with("insert") || // Added in 4.0
248 Name.starts_with("load.") || // Added in 3.9
249 Name.starts_with("loadu.") || // Added in 3.9
250 Name.starts_with("lzcnt.") || // Added in 5.0
251 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
252 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
253 Name.starts_with("movddup") || // Added in 3.9
254 Name.starts_with("move.s") || // Added in 4.0
255 Name.starts_with("movshdup") || // Added in 3.9
256 Name.starts_with("movsldup") || // Added in 3.9
257 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("or.") || // Added in 3.9
259 Name.starts_with("pabs.") || // Added in 6.0
260 Name.starts_with("packssdw.") || // Added in 5.0
261 Name.starts_with("packsswb.") || // Added in 5.0
262 Name.starts_with("packusdw.") || // Added in 5.0
263 Name.starts_with("packuswb.") || // Added in 5.0
264 Name.starts_with("padd.") || // Added in 4.0
265 Name.starts_with("padds.") || // Added in 8.0
266 Name.starts_with("paddus.") || // Added in 8.0
267 Name.starts_with("palignr.") || // Added in 3.9
268 Name.starts_with("pand.") || // Added in 3.9
269 Name.starts_with("pandn.") || // Added in 3.9
270 Name.starts_with("pavg") || // Added in 6.0
271 Name.starts_with("pbroadcast") || // Added in 6.0
272 Name.starts_with("pcmpeq.") || // Added in 3.9
273 Name.starts_with("pcmpgt.") || // Added in 3.9
274 Name.starts_with("perm.df.") || // Added in 3.9
275 Name.starts_with("perm.di.") || // Added in 3.9
276 Name.starts_with("permvar.") || // Added in 7.0
277 Name.starts_with("pmaddubs.w.") || // Added in 7.0
278 Name.starts_with("pmaddw.d.") || // Added in 7.0
279 Name.starts_with("pmax") || // Added in 4.0
280 Name.starts_with("pmin") || // Added in 4.0
281 Name == "pmov.qd.256" || // Added in 9.0
282 Name == "pmov.qd.512" || // Added in 9.0
283 Name == "pmov.wb.256" || // Added in 9.0
284 Name == "pmov.wb.512" || // Added in 9.0
285 Name.starts_with("pmovsx") || // Added in 4.0
286 Name.starts_with("pmovzx") || // Added in 4.0
287 Name.starts_with("pmul.dq.") || // Added in 4.0
288 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
289 Name.starts_with("pmulh.w.") || // Added in 7.0
290 Name.starts_with("pmulhu.w.") || // Added in 7.0
291 Name.starts_with("pmull.") || // Added in 4.0
292 Name.starts_with("pmultishift.qb.") || // Added in 8.0
293 Name.starts_with("pmulu.dq.") || // Added in 4.0
294 Name.starts_with("por.") || // Added in 3.9
295 Name.starts_with("prol.") || // Added in 8.0
296 Name.starts_with("prolv.") || // Added in 8.0
297 Name.starts_with("pror.") || // Added in 8.0
298 Name.starts_with("prorv.") || // Added in 8.0
299 Name.starts_with("pshuf.b.") || // Added in 4.0
300 Name.starts_with("pshuf.d.") || // Added in 3.9
301 Name.starts_with("pshufh.w.") || // Added in 3.9
302 Name.starts_with("pshufl.w.") || // Added in 3.9
303 Name.starts_with("psll.d") || // Added in 4.0
304 Name.starts_with("psll.q") || // Added in 4.0
305 Name.starts_with("psll.w") || // Added in 4.0
306 Name.starts_with("pslli") || // Added in 4.0
307 Name.starts_with("psllv") || // Added in 4.0
308 Name.starts_with("psra.d") || // Added in 4.0
309 Name.starts_with("psra.q") || // Added in 4.0
310 Name.starts_with("psra.w") || // Added in 4.0
311 Name.starts_with("psrai") || // Added in 4.0
312 Name.starts_with("psrav") || // Added in 4.0
313 Name.starts_with("psrl.d") || // Added in 4.0
314 Name.starts_with("psrl.q") || // Added in 4.0
315 Name.starts_with("psrl.w") || // Added in 4.0
316 Name.starts_with("psrli") || // Added in 4.0
317 Name.starts_with("psrlv") || // Added in 4.0
318 Name.starts_with("psub.") || // Added in 4.0
319 Name.starts_with("psubs.") || // Added in 8.0
320 Name.starts_with("psubus.") || // Added in 8.0
321 Name.starts_with("pternlog.") || // Added in 7.0
322 Name.starts_with("punpckh") || // Added in 3.9
323 Name.starts_with("punpckl") || // Added in 3.9
324 Name.starts_with("pxor.") || // Added in 3.9
325 Name.starts_with("shuf.f") || // Added in 6.0
326 Name.starts_with("shuf.i") || // Added in 6.0
327 Name.starts_with("shuf.p") || // Added in 4.0
328 Name.starts_with("sqrt.p") || // Added in 7.0
329 Name.starts_with("store.b.") || // Added in 3.9
330 Name.starts_with("store.d.") || // Added in 3.9
331 Name.starts_with("store.p") || // Added in 3.9
332 Name.starts_with("store.q.") || // Added in 3.9
333 Name.starts_with("store.w.") || // Added in 3.9
334 Name == "store.ss" || // Added in 7.0
335 Name.starts_with("storeu.") || // Added in 3.9
336 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
337 Name.starts_with("ucmp.") || // Added in 5.0
338 Name.starts_with("unpckh.") || // Added in 3.9
339 Name.starts_with("unpckl.") || // Added in 3.9
340 Name.starts_with("valign.") || // Added in 4.0
341 Name == "vcvtph2ps.128" || // Added in 11.0
342 Name == "vcvtph2ps.256" || // Added in 11.0
343 Name.starts_with("vextract") || // Added in 4.0
344 Name.starts_with("vfmadd.") || // Added in 7.0
345 Name.starts_with("vfmaddsub.") || // Added in 7.0
346 Name.starts_with("vfnmadd.") || // Added in 7.0
347 Name.starts_with("vfnmsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermi2var.") || // Added in 7.0
353 Name.starts_with("vpermil.p") || // Added in 3.9
354 Name.starts_with("vpermilvar.") || // Added in 4.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshld.") || // Added in 7.0
358 Name.starts_with("vpshldv.") || // Added in 8.0
359 Name.starts_with("vpshrd.") || // Added in 7.0
360 Name.starts_with("vpshrdv.") || // Added in 8.0
361 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
362 Name.starts_with("xor.")); // Added in 3.9
363
364 if (Name.consume_front("mask3."))
365 // 'avx512.mask3.*'
366 return (Name.starts_with("vfmadd.") || // Added in 7.0
367 Name.starts_with("vfmaddsub.") || // Added in 7.0
368 Name.starts_with("vfmsub.") || // Added in 7.0
369 Name.starts_with("vfmsubadd.") || // Added in 7.0
370 Name.starts_with("vfnmsub.")); // Added in 7.0
371
372 if (Name.consume_front("maskz."))
373 // 'avx512.maskz.*'
374 return (Name.starts_with("pternlog.") || // Added in 7.0
375 Name.starts_with("vfmadd.") || // Added in 7.0
376 Name.starts_with("vfmaddsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermt2var.") || // Added in 7.0
382 Name.starts_with("vpmadd52") || // Added in 7.0
383 Name.starts_with("vpshldv.") || // Added in 8.0
384 Name.starts_with("vpshrdv.")); // Added in 8.0
385
386 // 'avx512.*'
387 return (Name == "movntdqa" || // Added in 5.0
388 Name == "pmul.dq.512" || // Added in 7.0
389 Name == "pmulu.dq.512" || // Added in 7.0
390 Name.starts_with("broadcastm") || // Added in 6.0
391 Name.starts_with("cmp.p") || // Added in 12.0
392 Name.starts_with("cvtb2mask.") || // Added in 7.0
393 Name.starts_with("cvtd2mask.") || // Added in 7.0
394 Name.starts_with("cvtmask2") || // Added in 5.0
395 Name.starts_with("cvtq2mask.") || // Added in 7.0
396 Name == "cvtusi2sd" || // Added in 7.0
397 Name.starts_with("cvtw2mask.") || // Added in 7.0
398 Name == "kand.w" || // Added in 7.0
399 Name == "kandn.w" || // Added in 7.0
400 Name == "knot.w" || // Added in 7.0
401 Name == "kor.w" || // Added in 7.0
402 Name == "kortestc.w" || // Added in 7.0
403 Name == "kortestz.w" || // Added in 7.0
404 Name.starts_with("kunpck") || // added in 6.0
405 Name == "kxnor.w" || // Added in 7.0
406 Name == "kxor.w" || // Added in 7.0
407 Name.starts_with("padds.") || // Added in 8.0
408 Name.starts_with("pbroadcast") || // Added in 3.9
409 Name.starts_with("prol") || // Added in 8.0
410 Name.starts_with("pror") || // Added in 8.0
411 Name.starts_with("psll.dq") || // Added in 3.9
412 Name.starts_with("psrl.dq") || // Added in 3.9
413 Name.starts_with("psubs.") || // Added in 8.0
414 Name.starts_with("ptestm") || // Added in 6.0
415 Name.starts_with("ptestnm") || // Added in 6.0
416 Name.starts_with("storent.") || // Added in 3.9
417 Name.starts_with("vbroadcast.s") || // Added in 7.0
418 Name.starts_with("vpshld.") || // Added in 8.0
419 Name.starts_with("vpshrd.")); // Added in 8.0
420 }
421
422 if (Name.consume_front("fma."))
423 return (Name.starts_with("vfmadd.") || // Added in 7.0
424 Name.starts_with("vfmsub.") || // Added in 7.0
425 Name.starts_with("vfmsubadd.") || // Added in 7.0
426 Name.starts_with("vfnmadd.") || // Added in 7.0
427 Name.starts_with("vfnmsub.")); // Added in 7.0
428
429 if (Name.consume_front("fma4."))
430 return Name.starts_with("vfmadd.s"); // Added in 7.0
431
432 if (Name.consume_front("sse."))
433 return (Name == "add.ss" || // Added in 4.0
434 Name == "cvtsi2ss" || // Added in 7.0
435 Name == "cvtsi642ss" || // Added in 7.0
436 Name == "div.ss" || // Added in 4.0
437 Name == "mul.ss" || // Added in 4.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.ss" || // Added in 7.0
440 Name.starts_with("storeu.") || // Added in 3.9
441 Name == "sub.ss"); // Added in 4.0
442
443 if (Name.consume_front("sse2."))
444 return (Name == "add.sd" || // Added in 4.0
445 Name == "cvtdq2pd" || // Added in 3.9
446 Name == "cvtdq2ps" || // Added in 7.0
447 Name == "cvtps2pd" || // Added in 3.9
448 Name == "cvtsi2sd" || // Added in 7.0
449 Name == "cvtsi642sd" || // Added in 7.0
450 Name == "cvtss2sd" || // Added in 7.0
451 Name == "div.sd" || // Added in 4.0
452 Name == "mul.sd" || // Added in 4.0
453 Name.starts_with("padds.") || // Added in 8.0
454 Name.starts_with("paddus.") || // Added in 8.0
455 Name.starts_with("pcmpeq.") || // Added in 3.1
456 Name.starts_with("pcmpgt.") || // Added in 3.1
457 Name == "pmaxs.w" || // Added in 3.9
458 Name == "pmaxu.b" || // Added in 3.9
459 Name == "pmins.w" || // Added in 3.9
460 Name == "pminu.b" || // Added in 3.9
461 Name == "pmulu.dq" || // Added in 7.0
462 Name.starts_with("pshuf") || // Added in 3.9
463 Name.starts_with("psll.dq") || // Added in 3.7
464 Name.starts_with("psrl.dq") || // Added in 3.7
465 Name.starts_with("psubs.") || // Added in 8.0
466 Name.starts_with("psubus.") || // Added in 8.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.sd" || // Added in 7.0
469 Name == "storel.dq" || // Added in 3.9
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.sd"); // Added in 4.0
472
473 if (Name.consume_front("sse41."))
474 return (Name.starts_with("blendp") || // Added in 3.7
475 Name == "movntdqa" || // Added in 5.0
476 Name == "pblendw" || // Added in 3.7
477 Name == "pmaxsb" || // Added in 3.9
478 Name == "pmaxsd" || // Added in 3.9
479 Name == "pmaxud" || // Added in 3.9
480 Name == "pmaxuw" || // Added in 3.9
481 Name == "pminsb" || // Added in 3.9
482 Name == "pminsd" || // Added in 3.9
483 Name == "pminud" || // Added in 3.9
484 Name == "pminuw" || // Added in 3.9
485 Name.starts_with("pmovsx") || // Added in 3.8
486 Name.starts_with("pmovzx") || // Added in 3.9
487 Name == "pmuldq"); // Added in 7.0
488
489 if (Name.consume_front("sse42."))
490 return Name == "crc32.64.8"; // Added in 3.4
491
492 if (Name.consume_front("sse4a."))
493 return Name.starts_with("movnt."); // Added in 3.9
494
495 if (Name.consume_front("ssse3."))
496 return (Name == "pabs.b.128" || // Added in 6.0
497 Name == "pabs.d.128" || // Added in 6.0
498 Name == "pabs.w.128"); // Added in 6.0
499
500 if (Name.consume_front("xop."))
501 return (Name == "vpcmov" || // Added in 3.8
502 Name == "vpcmov.256" || // Added in 5.0
503 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
504 Name.starts_with("vprot")); // Added in 8.0
505
506 return (Name == "addcarry.u32" || // Added in 8.0
507 Name == "addcarry.u64" || // Added in 8.0
508 Name == "addcarryx.u32" || // Added in 8.0
509 Name == "addcarryx.u64" || // Added in 8.0
510 Name == "subborrow.u32" || // Added in 8.0
511 Name == "subborrow.u64" || // Added in 8.0
512 Name.starts_with("vcvtph2ps.")); // Added in 11.0
513}
514
516 Function *&NewFn) {
517 // Only handle intrinsics that start with "x86.".
518 if (!Name.consume_front("x86."))
519 return false;
520
521 if (shouldUpgradeX86Intrinsic(F, Name)) {
522 NewFn = nullptr;
523 return true;
524 }
525
526 if (Name == "rdtscp") { // Added in 8.0
527 // If this intrinsic has 0 operands, it's the new version.
528 if (F->getFunctionType()->getNumParams() == 0)
529 return false;
530
531 rename(F);
532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
533 Intrinsic::x86_rdtscp);
534 return true;
535 }
536
538
539 // SSE4.1 ptest functions may have an old signature.
540 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
542 .Case("c", Intrinsic::x86_sse41_ptestc)
543 .Case("z", Intrinsic::x86_sse41_ptestz)
544 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
547 return upgradePTESTIntrinsic(F, ID, NewFn);
548
549 return false;
550 }
551
552 // Several blend and other instructions with masks used the wrong number of
553 // bits.
554
555 // Added in 3.6
557 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
565 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
566
567 if (Name.consume_front("avx512.")) {
568 if (Name.consume_front("mask.cmp.")) {
569 // Added in 7.0
571 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
579 return upgradeX86MaskedFPCompare(F, ID, NewFn);
580 } else if (Name.starts_with("vpdpbusd.") ||
581 Name.starts_with("vpdpbusds.")) {
582 // Added in 21.1
584 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
592 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
593 }
594 return false; // No other 'x86.avx512.*'.
595 }
596
597 if (Name.consume_front("avx2.vpdpb")) {
598 // Added in 21.1
600 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
601 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
602 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
603 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
604 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
605 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
606 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
607 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
608 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
609 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
610 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
611 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
614 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
615 return false; // No other 'x86.avx2.*'
616 }
617
618 if (Name.consume_front("avx10.vpdpb")) {
619 // Added in 21.1
621 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
622 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
623 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
624 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
625 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
626 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
629 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
630 return false; // No other 'x86.avx10.*'
631 }
632
633 if (Name.consume_front("avx512bf16.")) {
634 // Added in 9.0
636 .Case("cvtne2ps2bf16.128",
637 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
638 .Case("cvtne2ps2bf16.256",
639 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
640 .Case("cvtne2ps2bf16.512",
641 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
642 .Case("mask.cvtneps2bf16.128",
643 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
644 .Case("cvtneps2bf16.256",
645 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
646 .Case("cvtneps2bf16.512",
647 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
650 return upgradeX86BF16Intrinsic(F, ID, NewFn);
651
652 // Added in 9.0
654 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
655 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
656 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
659 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
660 return false; // No other 'x86.avx512bf16.*'.
661 }
662
663 if (Name.consume_front("xop.")) {
665 if (Name.starts_with("vpermil2")) { // Added in 3.9
666 // Upgrade any XOP PERMIL2 index operand still using a float/double
667 // vector.
668 auto Idx = F->getFunctionType()->getParamType(2);
669 if (Idx->isFPOrFPVectorTy()) {
670 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
671 unsigned EltSize = Idx->getScalarSizeInBits();
672 if (EltSize == 64 && IdxSize == 128)
673 ID = Intrinsic::x86_xop_vpermil2pd;
674 else if (EltSize == 32 && IdxSize == 128)
675 ID = Intrinsic::x86_xop_vpermil2ps;
676 else if (EltSize == 64 && IdxSize == 256)
677 ID = Intrinsic::x86_xop_vpermil2pd_256;
678 else
679 ID = Intrinsic::x86_xop_vpermil2ps_256;
680 }
681 } else if (F->arg_size() == 2)
682 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
684 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
685 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
687
689 rename(F);
690 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
691 return true;
692 }
693 return false; // No other 'x86.xop.*'
694 }
695
696 if (Name == "seh.recoverfp") {
697 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
698 Intrinsic::eh_recoverfp);
699 return true;
700 }
701
702 return false;
703}
704
705// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
706// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
708 StringRef Name,
709 Function *&NewFn) {
710 if (Name.starts_with("rbit")) {
711 // '(arm|aarch64).rbit'.
713 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
714 return true;
715 }
716
717 if (Name == "thread.pointer") {
718 // '(arm|aarch64).thread.pointer'.
720 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
721 return true;
722 }
723
724 bool Neon = Name.consume_front("neon.");
725 if (Neon) {
726 // '(arm|aarch64).neon.*'.
727 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
728 // v16i8 respectively.
729 if (Name.consume_front("bfdot.")) {
730 // (arm|aarch64).neon.bfdot.*'.
733 .Cases("v2f32.v8i8", "v4f32.v16i8",
734 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
735 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
738 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
739 assert((OperandWidth == 64 || OperandWidth == 128) &&
740 "Unexpected operand width");
741 LLVMContext &Ctx = F->getParent()->getContext();
742 std::array<Type *, 2> Tys{
743 {F->getReturnType(),
744 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
745 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
746 return true;
747 }
748 return false; // No other '(arm|aarch64).neon.bfdot.*'.
749 }
750
751 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
752 // anymore and accept v8bf16 instead of v16i8.
753 if (Name.consume_front("bfm")) {
754 // (arm|aarch64).neon.bfm*'.
755 if (Name.consume_back(".v4f32.v16i8")) {
756 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
759 .Case("mla",
760 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
761 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
762 .Case("lalb",
763 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
764 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
765 .Case("lalt",
766 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
767 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
771 return true;
772 }
773 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
774 }
775 return false; // No other '(arm|aarch64).neon.bfm*.
776 }
777 // Continue on to Aarch64 Neon or Arm Neon.
778 }
779 // Continue on to Arm or Aarch64.
780
781 if (IsArm) {
782 // 'arm.*'.
783 if (Neon) {
784 // 'arm.neon.*'.
786 .StartsWith("vclz.", Intrinsic::ctlz)
787 .StartsWith("vcnt.", Intrinsic::ctpop)
788 .StartsWith("vqadds.", Intrinsic::sadd_sat)
789 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
790 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
791 .StartsWith("vqsubu.", Intrinsic::usub_sat)
792 .StartsWith("vrinta.", Intrinsic::round)
793 .StartsWith("vrintn.", Intrinsic::roundeven)
794 .StartsWith("vrintm.", Intrinsic::floor)
795 .StartsWith("vrintp.", Intrinsic::ceil)
796 .StartsWith("vrintx.", Intrinsic::rint)
797 .StartsWith("vrintz.", Intrinsic::trunc)
800 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
801 F->arg_begin()->getType());
802 return true;
803 }
804
805 if (Name.consume_front("vst")) {
806 // 'arm.neon.vst*'.
807 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
809 if (vstRegex.match(Name, &Groups)) {
810 static const Intrinsic::ID StoreInts[] = {
811 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
812 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
813
814 static const Intrinsic::ID StoreLaneInts[] = {
815 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
816 Intrinsic::arm_neon_vst4lane};
817
818 auto fArgs = F->getFunctionType()->params();
819 Type *Tys[] = {fArgs[0], fArgs[1]};
820 if (Groups[1].size() == 1)
822 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
823 else
825 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
826 return true;
827 }
828 return false; // No other 'arm.neon.vst*'.
829 }
830
831 return false; // No other 'arm.neon.*'.
832 }
833
834 if (Name.consume_front("mve.")) {
835 // 'arm.mve.*'.
836 if (Name == "vctp64") {
837 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
838 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
839 // the function and deal with it below in UpgradeIntrinsicCall.
840 rename(F);
841 return true;
842 }
843 return false; // Not 'arm.mve.vctp64'.
844 }
845
846 if (Name.starts_with("vrintn.v")) {
848 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
849 return true;
850 }
851
852 // These too are changed to accept a v2i1 instead of the old v4i1.
853 if (Name.consume_back(".v4i1")) {
854 // 'arm.mve.*.v4i1'.
855 if (Name.consume_back(".predicated.v2i64.v4i32"))
856 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
857 return Name == "mull.int" || Name == "vqdmull";
858
859 if (Name.consume_back(".v2i64")) {
860 // 'arm.mve.*.v2i64.v4i1'
861 bool IsGather = Name.consume_front("vldr.gather.");
862 if (IsGather || Name.consume_front("vstr.scatter.")) {
863 if (Name.consume_front("base.")) {
864 // Optional 'wb.' prefix.
865 Name.consume_front("wb.");
866 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
867 // predicated.v2i64.v2i64.v4i1'.
868 return Name == "predicated.v2i64";
869 }
870
871 if (Name.consume_front("offset.predicated."))
872 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
873 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
874
875 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
876 return false;
877 }
878
879 return false; // No other 'arm.mve.*.v2i64.v4i1'.
880 }
881 return false; // No other 'arm.mve.*.v4i1'.
882 }
883 return false; // No other 'arm.mve.*'.
884 }
885
886 if (Name.consume_front("cde.vcx")) {
887 // 'arm.cde.vcx*'.
888 if (Name.consume_back(".predicated.v2i64.v4i1"))
889 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
890 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
891 Name == "3q" || Name == "3qa";
892
893 return false; // No other 'arm.cde.vcx*'.
894 }
895 } else {
896 // 'aarch64.*'.
897 if (Neon) {
898 // 'aarch64.neon.*'.
900 .StartsWith("frintn", Intrinsic::roundeven)
901 .StartsWith("rbit", Intrinsic::bitreverse)
904 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
905 F->arg_begin()->getType());
906 return true;
907 }
908
909 if (Name.starts_with("addp")) {
910 // 'aarch64.neon.addp*'.
911 if (F->arg_size() != 2)
912 return false; // Invalid IR.
913 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
914 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
916 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
917 return true;
918 }
919 }
920
921 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
922 if (Name.starts_with("bfcvt")) {
923 NewFn = nullptr;
924 return true;
925 }
926
927 return false; // No other 'aarch64.neon.*'.
928 }
929 if (Name.consume_front("sve.")) {
930 // 'aarch64.sve.*'.
931 if (Name.consume_front("bf")) {
932 if (Name.consume_back(".lane")) {
933 // 'aarch64.sve.bf*.lane'.
936 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
937 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
938 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
941 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
942 return true;
943 }
944 return false; // No other 'aarch64.sve.bf*.lane'.
945 }
946 return false; // No other 'aarch64.sve.bf*'.
947 }
948
949 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
950 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
951 NewFn = nullptr;
952 return true;
953 }
954
955 if (Name.consume_front("addqv")) {
956 // 'aarch64.sve.addqv'.
957 if (!F->getReturnType()->isFPOrFPVectorTy())
958 return false;
959
960 auto Args = F->getFunctionType()->params();
961 Type *Tys[] = {F->getReturnType(), Args[1]};
963 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
964 return true;
965 }
966
967 if (Name.consume_front("ld")) {
968 // 'aarch64.sve.ld*'.
969 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
970 if (LdRegex.match(Name)) {
971 Type *ScalarTy =
972 cast<VectorType>(F->getReturnType())->getElementType();
973 ElementCount EC =
974 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
975 Type *Ty = VectorType::get(ScalarTy, EC);
976 static const Intrinsic::ID LoadIDs[] = {
977 Intrinsic::aarch64_sve_ld2_sret,
978 Intrinsic::aarch64_sve_ld3_sret,
979 Intrinsic::aarch64_sve_ld4_sret,
980 };
981 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
982 LoadIDs[Name[0] - '2'], Ty);
983 return true;
984 }
985 return false; // No other 'aarch64.sve.ld*'.
986 }
987
988 if (Name.consume_front("tuple.")) {
989 // 'aarch64.sve.tuple.*'.
990 if (Name.starts_with("get")) {
991 // 'aarch64.sve.tuple.get*'.
992 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
994 F->getParent(), Intrinsic::vector_extract, Tys);
995 return true;
996 }
997
998 if (Name.starts_with("set")) {
999 // 'aarch64.sve.tuple.set*'.
1000 auto Args = F->getFunctionType()->params();
1001 Type *Tys[] = {Args[0], Args[2], Args[1]};
1003 F->getParent(), Intrinsic::vector_insert, Tys);
1004 return true;
1005 }
1006
1007 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1008 if (CreateTupleRegex.match(Name)) {
1009 // 'aarch64.sve.tuple.create*'.
1010 auto Args = F->getFunctionType()->params();
1011 Type *Tys[] = {F->getReturnType(), Args[1]};
1013 F->getParent(), Intrinsic::vector_insert, Tys);
1014 return true;
1015 }
1016 return false; // No other 'aarch64.sve.tuple.*'.
1017 }
1018 return false; // No other 'aarch64.sve.*'.
1019 }
1020 }
1021 return false; // No other 'arm.*', 'aarch64.*'.
1022}
1023
1025 StringRef Name) {
1026 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1029 .Case("im2col.3d",
1030 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1031 .Case("im2col.4d",
1032 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1033 .Case("im2col.5d",
1034 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1035 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1036 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1037 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1038 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1039 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1041
1043 return ID;
1044
1045 // These intrinsics may need upgrade for two reasons:
1046 // (1) When the address-space of the first argument is shared[AS=3]
1047 // (and we upgrade it to use shared_cluster address-space[AS=7])
1048 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1050 return ID;
1051
1052 // (2) When there are only two boolean flag arguments at the end:
1053 //
1054 // The last three parameters of the older version of these
1055 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1056 //
1057 // The newer version reads as:
1058 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1059 //
1060 // So, when the type of the [N-3]rd argument is "not i1", then
1061 // it is the older version and we need to upgrade.
1062 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1063 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1064 if (!ArgType->isIntegerTy(1))
1065 return ID;
1066 }
1067
1069}
1070
1072 StringRef Name) {
1073 if (Name.consume_front("mapa.shared.cluster"))
1074 if (F->getReturnType()->getPointerAddressSpace() ==
1076 return Intrinsic::nvvm_mapa_shared_cluster;
1077
1078 if (Name.consume_front("cp.async.bulk.")) {
1081 .Case("global.to.shared.cluster",
1082 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1083 .Case("shared.cta.to.cluster",
1084 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1086
1088 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1090 return ID;
1091 }
1092
1094}
1095
1097 if (Name.consume_front("fma.rn."))
1098 return StringSwitch<Intrinsic::ID>(Name)
1099 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1100 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1101 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1102 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1103 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1104 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1105 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1106 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1107 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1108 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1109 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1110 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1112
1113 if (Name.consume_front("fmax."))
1114 return StringSwitch<Intrinsic::ID>(Name)
1115 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1116 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1117 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1118 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1119 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1120 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1121 .Case("ftz.nan.xorsign.abs.bf16",
1122 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1123 .Case("ftz.nan.xorsign.abs.bf16x2",
1124 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1125 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1126 .Case("ftz.xorsign.abs.bf16x2",
1127 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1128 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1129 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1130 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1131 .Case("nan.xorsign.abs.bf16x2",
1132 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1133 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1134 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1136
1137 if (Name.consume_front("fmin."))
1138 return StringSwitch<Intrinsic::ID>(Name)
1139 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1140 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1141 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1142 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1143 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1144 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1145 .Case("ftz.nan.xorsign.abs.bf16",
1146 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1147 .Case("ftz.nan.xorsign.abs.bf16x2",
1148 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1149 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1150 .Case("ftz.xorsign.abs.bf16x2",
1151 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1152 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1153 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1154 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1155 .Case("nan.xorsign.abs.bf16x2",
1156 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1157 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1158 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1160
1161 if (Name.consume_front("neg."))
1162 return StringSwitch<Intrinsic::ID>(Name)
1163 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1164 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1166
1168}
1169
1171 return Name.consume_front("local") || Name.consume_front("shared") ||
1172 Name.consume_front("global") || Name.consume_front("constant") ||
1173 Name.consume_front("param");
1174}
1175
1177 bool CanUpgradeDebugIntrinsicsToRecords) {
1178 assert(F && "Illegal to upgrade a non-existent Function.");
1179
1180 StringRef Name = F->getName();
1181
1182 // Quickly eliminate it, if it's not a candidate.
1183 if (!Name.consume_front("llvm.") || Name.empty())
1184 return false;
1185
1186 switch (Name[0]) {
1187 default: break;
1188 case 'a': {
1189 bool IsArm = Name.consume_front("arm.");
1190 if (IsArm || Name.consume_front("aarch64.")) {
1191 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1192 return true;
1193 break;
1194 }
1195
1196 if (Name.consume_front("amdgcn.")) {
1197 if (Name == "alignbit") {
1198 // Target specific intrinsic became redundant
1200 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1201 return true;
1202 }
1203
1204 if (Name.consume_front("atomic.")) {
1205 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1206 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1207 // there's no new declaration.
1208 NewFn = nullptr;
1209 return true;
1210 }
1211 break; // No other 'amdgcn.atomic.*'
1212 }
1213
1214 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1215 Name.consume_front("flat.atomic.")) {
1216 if (Name.starts_with("fadd") ||
1217 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1218 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1219 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1220 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1221 // declaration.
1222 NewFn = nullptr;
1223 return true;
1224 }
1225 }
1226
1227 if (Name.starts_with("ldexp.")) {
1228 // Target specific intrinsic became redundant
1230 F->getParent(), Intrinsic::ldexp,
1231 {F->getReturnType(), F->getArg(1)->getType()});
1232 return true;
1233 }
1234 break; // No other 'amdgcn.*'
1235 }
1236
1237 break;
1238 }
1239 case 'c': {
1240 if (F->arg_size() == 1) {
1242 .StartsWith("ctlz.", Intrinsic::ctlz)
1243 .StartsWith("cttz.", Intrinsic::cttz)
1246 rename(F);
1247 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1248 F->arg_begin()->getType());
1249 return true;
1250 }
1251 }
1252
1253 if (F->arg_size() == 2 && Name == "coro.end") {
1254 rename(F);
1255 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1256 Intrinsic::coro_end);
1257 return true;
1258 }
1259
1260 break;
1261 }
1262 case 'd':
1263 if (Name.consume_front("dbg.")) {
1264 // Mark debug intrinsics for upgrade to new debug format.
1265 if (CanUpgradeDebugIntrinsicsToRecords) {
1266 if (Name == "addr" || Name == "value" || Name == "assign" ||
1267 Name == "declare" || Name == "label") {
1268 // There's no function to replace these with.
1269 NewFn = nullptr;
1270 // But we do want these to get upgraded.
1271 return true;
1272 }
1273 }
1274 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1275 // converted to DbgVariableRecords later.
1276 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1277 rename(F);
1278 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1279 Intrinsic::dbg_value);
1280 return true;
1281 }
1282 break; // No other 'dbg.*'.
1283 }
1284 break;
1285 case 'e':
1286 if (Name.consume_front("experimental.vector.")) {
1289 // Skip over extract.last.active, otherwise it will be 'upgraded'
1290 // to a regular vector extract which is a different operation.
1291 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1292 .StartsWith("extract.", Intrinsic::vector_extract)
1293 .StartsWith("insert.", Intrinsic::vector_insert)
1294 .StartsWith("splice.", Intrinsic::vector_splice)
1295 .StartsWith("reverse.", Intrinsic::vector_reverse)
1296 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1297 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1298 .StartsWith("partial.reduce.add",
1299 Intrinsic::vector_partial_reduce_add)
1302 const auto *FT = F->getFunctionType();
1304 if (ID == Intrinsic::vector_extract ||
1305 ID == Intrinsic::vector_interleave2)
1306 // Extracting overloads the return type.
1307 Tys.push_back(FT->getReturnType());
1308 if (ID != Intrinsic::vector_interleave2)
1309 Tys.push_back(FT->getParamType(0));
1310 if (ID == Intrinsic::vector_insert ||
1311 ID == Intrinsic::vector_partial_reduce_add)
1312 // Inserting overloads the inserted type.
1313 Tys.push_back(FT->getParamType(1));
1314 rename(F);
1315 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1316 return true;
1317 }
1318
1319 if (Name.consume_front("reduce.")) {
1321 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1322 if (R.match(Name, &Groups))
1324 .Case("add", Intrinsic::vector_reduce_add)
1325 .Case("mul", Intrinsic::vector_reduce_mul)
1326 .Case("and", Intrinsic::vector_reduce_and)
1327 .Case("or", Intrinsic::vector_reduce_or)
1328 .Case("xor", Intrinsic::vector_reduce_xor)
1329 .Case("smax", Intrinsic::vector_reduce_smax)
1330 .Case("smin", Intrinsic::vector_reduce_smin)
1331 .Case("umax", Intrinsic::vector_reduce_umax)
1332 .Case("umin", Intrinsic::vector_reduce_umin)
1333 .Case("fmax", Intrinsic::vector_reduce_fmax)
1334 .Case("fmin", Intrinsic::vector_reduce_fmin)
1336
1337 bool V2 = false;
1339 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1340 Groups.clear();
1341 V2 = true;
1342 if (R2.match(Name, &Groups))
1344 .Case("fadd", Intrinsic::vector_reduce_fadd)
1345 .Case("fmul", Intrinsic::vector_reduce_fmul)
1347 }
1349 rename(F);
1350 auto Args = F->getFunctionType()->params();
1351 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1352 {Args[V2 ? 1 : 0]});
1353 return true;
1354 }
1355 break; // No other 'expermental.vector.reduce.*'.
1356 }
1357 break; // No other 'experimental.vector.*'.
1358 }
1359 if (Name.consume_front("experimental.stepvector.")) {
1360 Intrinsic::ID ID = Intrinsic::stepvector;
1361 rename(F);
1363 F->getParent(), ID, F->getFunctionType()->getReturnType());
1364 return true;
1365 }
1366 break; // No other 'e*'.
1367 case 'f':
1368 if (Name.starts_with("flt.rounds")) {
1369 rename(F);
1370 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1371 Intrinsic::get_rounding);
1372 return true;
1373 }
1374 break;
1375 case 'i':
1376 if (Name.starts_with("invariant.group.barrier")) {
1377 // Rename invariant.group.barrier to launder.invariant.group
1378 auto Args = F->getFunctionType()->params();
1379 Type* ObjectPtr[1] = {Args[0]};
1380 rename(F);
1382 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1383 return true;
1384 }
1385 break;
1386 case 'l':
1387 if ((Name.starts_with("lifetime.start") ||
1388 Name.starts_with("lifetime.end")) &&
1389 F->arg_size() == 2) {
1390 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1391 ? Intrinsic::lifetime_start
1392 : Intrinsic::lifetime_end;
1393 rename(F);
1394 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1395 F->getArg(0)->getType());
1396 return true;
1397 }
1398 break;
1399 case 'm': {
1400 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1401 // alignment parameter to embedding the alignment as an attribute of
1402 // the pointer args.
1403 if (unsigned ID = StringSwitch<unsigned>(Name)
1404 .StartsWith("memcpy.", Intrinsic::memcpy)
1405 .StartsWith("memmove.", Intrinsic::memmove)
1406 .Default(0)) {
1407 if (F->arg_size() == 5) {
1408 rename(F);
1409 // Get the types of dest, src, and len
1410 ArrayRef<Type *> ParamTypes =
1411 F->getFunctionType()->params().slice(0, 3);
1412 NewFn =
1413 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1414 return true;
1415 }
1416 }
1417 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1418 rename(F);
1419 // Get the types of dest, and len
1420 const auto *FT = F->getFunctionType();
1421 Type *ParamTypes[2] = {
1422 FT->getParamType(0), // Dest
1423 FT->getParamType(2) // len
1424 };
1425 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1426 Intrinsic::memset, ParamTypes);
1427 return true;
1428 }
1429
1430 unsigned MaskedID =
1432 .StartsWith("masked.load", Intrinsic::masked_load)
1433 .StartsWith("masked.gather", Intrinsic::masked_gather)
1434 .StartsWith("masked.store", Intrinsic::masked_store)
1435 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1436 .Default(0);
1437 if (MaskedID && F->arg_size() == 4) {
1438 rename(F);
1439 if (MaskedID == Intrinsic::masked_load ||
1440 MaskedID == Intrinsic::masked_gather) {
1442 F->getParent(), MaskedID,
1443 {F->getReturnType(), F->getArg(0)->getType()});
1444 return true;
1445 }
1447 F->getParent(), MaskedID,
1448 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1449 return true;
1450 }
1451 break;
1452 }
1453 case 'n': {
1454 if (Name.consume_front("nvvm.")) {
1455 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1456 if (F->arg_size() == 1) {
1457 Intrinsic::ID IID =
1459 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1460 .Case("clz.i", Intrinsic::ctlz)
1461 .Case("popc.i", Intrinsic::ctpop)
1463 if (IID != Intrinsic::not_intrinsic) {
1464 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1465 {F->getReturnType()});
1466 return true;
1467 }
1468 }
1469
1470 // Check for nvvm intrinsics that need a return type adjustment.
1471 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1473 if (IID != Intrinsic::not_intrinsic) {
1474 NewFn = nullptr;
1475 return true;
1476 }
1477 }
1478
1479 // Upgrade Distributed Shared Memory Intrinsics
1481 if (IID != Intrinsic::not_intrinsic) {
1482 rename(F);
1483 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1484 return true;
1485 }
1486
1487 // Upgrade TMA copy G2S Intrinsics
1489 if (IID != Intrinsic::not_intrinsic) {
1490 rename(F);
1491 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1492 return true;
1493 }
1494
1495 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1496 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1497 //
1498 // TODO: We could add lohi.i2d.
1499 bool Expand = false;
1500 if (Name.consume_front("abs."))
1501 // nvvm.abs.{i,ii}
1502 Expand =
1503 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1504 else if (Name.consume_front("fabs."))
1505 // nvvm.fabs.{f,ftz.f,d}
1506 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1507 else if (Name.consume_front("max.") || Name.consume_front("min."))
1508 // nvvm.{min,max}.{i,ii,ui,ull}
1509 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1510 Name == "ui" || Name == "ull";
1511 else if (Name.consume_front("atomic.load."))
1512 // nvvm.atomic.load.add.{f32,f64}.p
1513 // nvvm.atomic.load.{inc,dec}.32.p
1514 Expand = StringSwitch<bool>(Name)
1515 .StartsWith("add.f32.p", true)
1516 .StartsWith("add.f64.p", true)
1517 .StartsWith("inc.32.p", true)
1518 .StartsWith("dec.32.p", true)
1519 .Default(false);
1520 else if (Name.consume_front("bitcast."))
1521 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1522 Expand =
1523 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1524 else if (Name.consume_front("rotate."))
1525 // nvvm.rotate.{b32,b64,right.b64}
1526 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1527 else if (Name.consume_front("ptr.gen.to."))
1528 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1529 Expand = consumeNVVMPtrAddrSpace(Name);
1530 else if (Name.consume_front("ptr."))
1531 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1532 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1533 else if (Name.consume_front("ldg.global."))
1534 // nvvm.ldg.global.{i,p,f}
1535 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1536 Name.starts_with("p."));
1537 else
1538 Expand = StringSwitch<bool>(Name)
1539 .Case("barrier0", true)
1540 .Case("barrier.n", true)
1541 .Case("barrier.sync.cnt", true)
1542 .Case("barrier.sync", true)
1543 .Case("barrier", true)
1544 .Case("bar.sync", true)
1545 .Case("clz.ll", true)
1546 .Case("popc.ll", true)
1547 .Case("h2f", true)
1548 .Case("swap.lo.hi.b64", true)
1549 .Case("tanh.approx.f32", true)
1550 .Default(false);
1551
1552 if (Expand) {
1553 NewFn = nullptr;
1554 return true;
1555 }
1556 break; // No other 'nvvm.*'.
1557 }
1558 break;
1559 }
1560 case 'o':
1561 if (Name.starts_with("objectsize.")) {
1562 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1563 if (F->arg_size() == 2 || F->arg_size() == 3) {
1564 rename(F);
1565 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1566 Intrinsic::objectsize, Tys);
1567 return true;
1568 }
1569 }
1570 break;
1571
1572 case 'p':
1573 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1574 rename(F);
1576 F->getParent(), Intrinsic::ptr_annotation,
1577 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1578 return true;
1579 }
1580 break;
1581
1582 case 'r': {
1583 if (Name.consume_front("riscv.")) {
1586 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1587 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1588 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1589 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1592 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1593 rename(F);
1594 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1595 return true;
1596 }
1597 break; // No other applicable upgrades.
1598 }
1599
1601 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1602 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1605 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1606 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1607 rename(F);
1608 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1609 return true;
1610 }
1611 break; // No other applicable upgrades.
1612 }
1613
1615 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1616 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1617 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1618 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1619 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1620 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1623 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1624 rename(F);
1625 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1626 return true;
1627 }
1628 break; // No other applicable upgrades.
1629 }
1630 break; // No other 'riscv.*' intrinsics
1631 }
1632 } break;
1633
1634 case 's':
1635 if (Name == "stackprotectorcheck") {
1636 NewFn = nullptr;
1637 return true;
1638 }
1639 break;
1640
1641 case 't':
1642 if (Name == "thread.pointer") {
1644 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1645 return true;
1646 }
1647 break;
1648
1649 case 'v': {
1650 if (Name == "var.annotation" && F->arg_size() == 4) {
1651 rename(F);
1653 F->getParent(), Intrinsic::var_annotation,
1654 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1655 return true;
1656 }
1657 break;
1658 }
1659
1660 case 'w':
1661 if (Name.consume_front("wasm.")) {
1664 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1665 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1666 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1669 rename(F);
1670 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1671 F->getReturnType());
1672 return true;
1673 }
1674
1675 if (Name.consume_front("dot.i8x16.i7x16.")) {
1677 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1678 .Case("add.signed",
1679 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1682 rename(F);
1683 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1684 return true;
1685 }
1686 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1687 }
1688 break; // No other 'wasm.*'.
1689 }
1690 break;
1691
1692 case 'x':
1693 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1694 return true;
1695 }
1696
1697 auto *ST = dyn_cast<StructType>(F->getReturnType());
1698 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1699 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1700 // Replace return type with literal non-packed struct. Only do this for
1701 // intrinsics declared to return a struct, not for intrinsics with
1702 // overloaded return type, in which case the exact struct type will be
1703 // mangled into the name.
1706 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1707 auto *FT = F->getFunctionType();
1708 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1709 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1710 std::string Name = F->getName().str();
1711 rename(F);
1712 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1713 Name, F->getParent());
1714
1715 // The new function may also need remangling.
1716 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1717 NewFn = *Result;
1718 return true;
1719 }
1720 }
1721
1722 // Remangle our intrinsic since we upgrade the mangling
1724 if (Result != std::nullopt) {
1725 NewFn = *Result;
1726 return true;
1727 }
1728
1729 // This may not belong here. This function is effectively being overloaded
1730 // to both detect an intrinsic which needs upgrading, and to provide the
1731 // upgraded form of the intrinsic. We should perhaps have two separate
1732 // functions for this.
1733 return false;
1734}
1735
1737 bool CanUpgradeDebugIntrinsicsToRecords) {
1738 NewFn = nullptr;
1739 bool Upgraded =
1740 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1741
1742 // Upgrade intrinsic attributes. This does not change the function.
1743 if (NewFn)
1744 F = NewFn;
1745 if (Intrinsic::ID id = F->getIntrinsicID()) {
1746 // Only do this if the intrinsic signature is valid.
1747 SmallVector<Type *> OverloadTys;
1748 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1749 F->setAttributes(
1750 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1751 }
1752 return Upgraded;
1753}
1754
1756 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1757 GV->getName() == "llvm.global_dtors")) ||
1758 !GV->hasInitializer())
1759 return nullptr;
1761 if (!ATy)
1762 return nullptr;
1764 if (!STy || STy->getNumElements() != 2)
1765 return nullptr;
1766
1767 LLVMContext &C = GV->getContext();
1768 IRBuilder<> IRB(C);
1769 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1770 IRB.getPtrTy());
1771 Constant *Init = GV->getInitializer();
1772 unsigned N = Init->getNumOperands();
1773 std::vector<Constant *> NewCtors(N);
1774 for (unsigned i = 0; i != N; ++i) {
1775 auto Ctor = cast<Constant>(Init->getOperand(i));
1776 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1777 Ctor->getAggregateElement(1),
1779 }
1780 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1781
1782 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1783 NewInit, GV->getName());
1784}
1785
1786// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1787// to byte shuffles.
1789 unsigned Shift) {
1790 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1791 unsigned NumElts = ResultTy->getNumElements() * 8;
1792
1793 // Bitcast from a 64-bit element type to a byte element type.
1794 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1795 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1796
1797 // We'll be shuffling in zeroes.
1798 Value *Res = Constant::getNullValue(VecTy);
1799
1800 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1801 // we'll just return the zero vector.
1802 if (Shift < 16) {
1803 int Idxs[64];
1804 // 256/512-bit version is split into 2/4 16-byte lanes.
1805 for (unsigned l = 0; l != NumElts; l += 16)
1806 for (unsigned i = 0; i != 16; ++i) {
1807 unsigned Idx = NumElts + i - Shift;
1808 if (Idx < NumElts)
1809 Idx -= NumElts - 16; // end of lane, switch operand.
1810 Idxs[l + i] = Idx + l;
1811 }
1812
1813 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1814 }
1815
1816 // Bitcast back to a 64-bit element type.
1817 return Builder.CreateBitCast(Res, ResultTy, "cast");
1818}
1819
1820// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1821// to byte shuffles.
1823 unsigned Shift) {
1824 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1825 unsigned NumElts = ResultTy->getNumElements() * 8;
1826
1827 // Bitcast from a 64-bit element type to a byte element type.
1828 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1829 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1830
1831 // We'll be shuffling in zeroes.
1832 Value *Res = Constant::getNullValue(VecTy);
1833
1834 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1835 // we'll just return the zero vector.
1836 if (Shift < 16) {
1837 int Idxs[64];
1838 // 256/512-bit version is split into 2/4 16-byte lanes.
1839 for (unsigned l = 0; l != NumElts; l += 16)
1840 for (unsigned i = 0; i != 16; ++i) {
1841 unsigned Idx = i + Shift;
1842 if (Idx >= 16)
1843 Idx += NumElts - 16; // end of lane, switch operand.
1844 Idxs[l + i] = Idx + l;
1845 }
1846
1847 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1848 }
1849
1850 // Bitcast back to a 64-bit element type.
1851 return Builder.CreateBitCast(Res, ResultTy, "cast");
1852}
1853
1854static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1855 unsigned NumElts) {
1856 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1858 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1859 Mask = Builder.CreateBitCast(Mask, MaskTy);
1860
1861 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1862 // i8 and we need to extract down to the right number of elements.
1863 if (NumElts <= 4) {
1864 int Indices[4];
1865 for (unsigned i = 0; i != NumElts; ++i)
1866 Indices[i] = i;
1867 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1868 "extract");
1869 }
1870
1871 return Mask;
1872}
1873
1874static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1875 Value *Op1) {
1876 // If the mask is all ones just emit the first operation.
1877 if (const auto *C = dyn_cast<Constant>(Mask))
1878 if (C->isAllOnesValue())
1879 return Op0;
1880
1881 Mask = getX86MaskVec(Builder, Mask,
1882 cast<FixedVectorType>(Op0->getType())->getNumElements());
1883 return Builder.CreateSelect(Mask, Op0, Op1);
1884}
1885
1886static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1887 Value *Op1) {
1888 // If the mask is all ones just emit the first operation.
1889 if (const auto *C = dyn_cast<Constant>(Mask))
1890 if (C->isAllOnesValue())
1891 return Op0;
1892
1893 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1894 Mask->getType()->getIntegerBitWidth());
1895 Mask = Builder.CreateBitCast(Mask, MaskTy);
1896 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1897 return Builder.CreateSelect(Mask, Op0, Op1);
1898}
1899
1900// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1901// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1902// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1904 Value *Op1, Value *Shift,
1905 Value *Passthru, Value *Mask,
1906 bool IsVALIGN) {
1907 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1908
1909 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1910 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1911 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1912 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1913
1914 // Mask the immediate for VALIGN.
1915 if (IsVALIGN)
1916 ShiftVal &= (NumElts - 1);
1917
1918 // If palignr is shifting the pair of vectors more than the size of two
1919 // lanes, emit zero.
1920 if (ShiftVal >= 32)
1922
1923 // If palignr is shifting the pair of input vectors more than one lane,
1924 // but less than two lanes, convert to shifting in zeroes.
1925 if (ShiftVal > 16) {
1926 ShiftVal -= 16;
1927 Op1 = Op0;
1929 }
1930
1931 int Indices[64];
1932 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1933 for (unsigned l = 0; l < NumElts; l += 16) {
1934 for (unsigned i = 0; i != 16; ++i) {
1935 unsigned Idx = ShiftVal + i;
1936 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1937 Idx += NumElts - 16; // End of lane, switch operand.
1938 Indices[l + i] = Idx + l;
1939 }
1940 }
1941
1942 Value *Align = Builder.CreateShuffleVector(
1943 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1944
1945 return emitX86Select(Builder, Mask, Align, Passthru);
1946}
1947
1949 bool ZeroMask, bool IndexForm) {
1950 Type *Ty = CI.getType();
1951 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1952 unsigned EltWidth = Ty->getScalarSizeInBits();
1953 bool IsFloat = Ty->isFPOrFPVectorTy();
1954 Intrinsic::ID IID;
1955 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1956 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1957 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1958 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1959 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1960 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1961 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1962 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1963 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1964 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1965 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1966 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1967 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1968 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1969 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1970 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1971 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1972 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1973 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1974 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1975 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1976 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1977 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1978 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1979 else if (VecWidth == 128 && EltWidth == 16)
1980 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1981 else if (VecWidth == 256 && EltWidth == 16)
1982 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1983 else if (VecWidth == 512 && EltWidth == 16)
1984 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1985 else if (VecWidth == 128 && EltWidth == 8)
1986 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1987 else if (VecWidth == 256 && EltWidth == 8)
1988 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1989 else if (VecWidth == 512 && EltWidth == 8)
1990 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1991 else
1992 llvm_unreachable("Unexpected intrinsic");
1993
1994 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1995 CI.getArgOperand(2) };
1996
1997 // If this isn't index form we need to swap operand 0 and 1.
1998 if (!IndexForm)
1999 std::swap(Args[0], Args[1]);
2000
2001 Value *V = Builder.CreateIntrinsic(IID, Args);
2002 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2003 : Builder.CreateBitCast(CI.getArgOperand(1),
2004 Ty);
2005 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2006}
2007
2009 Intrinsic::ID IID) {
2010 Type *Ty = CI.getType();
2011 Value *Op0 = CI.getOperand(0);
2012 Value *Op1 = CI.getOperand(1);
2013 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2014
2015 if (CI.arg_size() == 4) { // For masked intrinsics.
2016 Value *VecSrc = CI.getOperand(2);
2017 Value *Mask = CI.getOperand(3);
2018 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2019 }
2020 return Res;
2021}
2022
2024 bool IsRotateRight) {
2025 Type *Ty = CI.getType();
2026 Value *Src = CI.getArgOperand(0);
2027 Value *Amt = CI.getArgOperand(1);
2028
2029 // Amount may be scalar immediate, in which case create a splat vector.
2030 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2031 // we only care about the lowest log2 bits anyway.
2032 if (Amt->getType() != Ty) {
2033 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2034 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2035 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2036 }
2037
2038 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2039 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2040
2041 if (CI.arg_size() == 4) { // For masked intrinsics.
2042 Value *VecSrc = CI.getOperand(2);
2043 Value *Mask = CI.getOperand(3);
2044 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2045 }
2046 return Res;
2047}
2048
2049static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2050 bool IsSigned) {
2051 Type *Ty = CI.getType();
2052 Value *LHS = CI.getArgOperand(0);
2053 Value *RHS = CI.getArgOperand(1);
2054
2055 CmpInst::Predicate Pred;
2056 switch (Imm) {
2057 case 0x0:
2058 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2059 break;
2060 case 0x1:
2061 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2062 break;
2063 case 0x2:
2064 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2065 break;
2066 case 0x3:
2067 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2068 break;
2069 case 0x4:
2070 Pred = ICmpInst::ICMP_EQ;
2071 break;
2072 case 0x5:
2073 Pred = ICmpInst::ICMP_NE;
2074 break;
2075 case 0x6:
2076 return Constant::getNullValue(Ty); // FALSE
2077 case 0x7:
2078 return Constant::getAllOnesValue(Ty); // TRUE
2079 default:
2080 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2081 }
2082
2083 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2084 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2085 return Ext;
2086}
2087
2089 bool IsShiftRight, bool ZeroMask) {
2090 Type *Ty = CI.getType();
2091 Value *Op0 = CI.getArgOperand(0);
2092 Value *Op1 = CI.getArgOperand(1);
2093 Value *Amt = CI.getArgOperand(2);
2094
2095 if (IsShiftRight)
2096 std::swap(Op0, Op1);
2097
2098 // Amount may be scalar immediate, in which case create a splat vector.
2099 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2100 // we only care about the lowest log2 bits anyway.
2101 if (Amt->getType() != Ty) {
2102 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2103 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2104 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2105 }
2106
2107 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2108 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2109
2110 unsigned NumArgs = CI.arg_size();
2111 if (NumArgs >= 4) { // For masked intrinsics.
2112 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2113 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2114 CI.getArgOperand(0);
2115 Value *Mask = CI.getOperand(NumArgs - 1);
2116 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2117 }
2118 return Res;
2119}
2120
2122 Value *Mask, bool Aligned) {
2123 const Align Alignment =
2124 Aligned
2125 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2126 : Align(1);
2127
2128 // If the mask is all ones just emit a regular store.
2129 if (const auto *C = dyn_cast<Constant>(Mask))
2130 if (C->isAllOnesValue())
2131 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2132
2133 // Convert the mask from an integer type to a vector of i1.
2134 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2135 Mask = getX86MaskVec(Builder, Mask, NumElts);
2136 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2137}
2138
2140 Value *Passthru, Value *Mask, bool Aligned) {
2141 Type *ValTy = Passthru->getType();
2142 const Align Alignment =
2143 Aligned
2144 ? Align(
2146 8)
2147 : Align(1);
2148
2149 // If the mask is all ones just emit a regular store.
2150 if (const auto *C = dyn_cast<Constant>(Mask))
2151 if (C->isAllOnesValue())
2152 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2153
2154 // Convert the mask from an integer type to a vector of i1.
2155 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2156 Mask = getX86MaskVec(Builder, Mask, NumElts);
2157 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2158}
2159
2160static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2161 Type *Ty = CI.getType();
2162 Value *Op0 = CI.getArgOperand(0);
2163 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2164 {Op0, Builder.getInt1(false)});
2165 if (CI.arg_size() == 3)
2166 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2167 return Res;
2168}
2169
2170static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2171 Type *Ty = CI.getType();
2172
2173 // Arguments have a vXi32 type so cast to vXi64.
2174 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2175 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2176
2177 if (IsSigned) {
2178 // Shift left then arithmetic shift right.
2179 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2180 LHS = Builder.CreateShl(LHS, ShiftAmt);
2181 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2182 RHS = Builder.CreateShl(RHS, ShiftAmt);
2183 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2184 } else {
2185 // Clear the upper bits.
2186 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2187 LHS = Builder.CreateAnd(LHS, Mask);
2188 RHS = Builder.CreateAnd(RHS, Mask);
2189 }
2190
2191 Value *Res = Builder.CreateMul(LHS, RHS);
2192
2193 if (CI.arg_size() == 4)
2194 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2195
2196 return Res;
2197}
2198
2199// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2201 Value *Mask) {
2202 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2203 if (Mask) {
2204 const auto *C = dyn_cast<Constant>(Mask);
2205 if (!C || !C->isAllOnesValue())
2206 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2207 }
2208
2209 if (NumElts < 8) {
2210 int Indices[8];
2211 for (unsigned i = 0; i != NumElts; ++i)
2212 Indices[i] = i;
2213 for (unsigned i = NumElts; i != 8; ++i)
2214 Indices[i] = NumElts + i % NumElts;
2215 Vec = Builder.CreateShuffleVector(Vec,
2217 Indices);
2218 }
2219 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2220}
2221
2223 unsigned CC, bool Signed) {
2224 Value *Op0 = CI.getArgOperand(0);
2225 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2226
2227 Value *Cmp;
2228 if (CC == 3) {
2230 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2231 } else if (CC == 7) {
2233 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2234 } else {
2236 switch (CC) {
2237 default: llvm_unreachable("Unknown condition code");
2238 case 0: Pred = ICmpInst::ICMP_EQ; break;
2239 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2240 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2241 case 4: Pred = ICmpInst::ICMP_NE; break;
2242 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2243 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2244 }
2245 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2246 }
2247
2248 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2249
2250 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2251}
2252
2253// Replace a masked intrinsic with an older unmasked intrinsic.
2255 Intrinsic::ID IID) {
2256 Value *Rep =
2257 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2258 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2259}
2260
2262 Value* A = CI.getArgOperand(0);
2263 Value* B = CI.getArgOperand(1);
2264 Value* Src = CI.getArgOperand(2);
2265 Value* Mask = CI.getArgOperand(3);
2266
2267 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2268 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2269 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2270 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2271 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2272 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2273}
2274
2276 Value* Op = CI.getArgOperand(0);
2277 Type* ReturnOp = CI.getType();
2278 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2279 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2280 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2281}
2282
2283// Replace intrinsic with unmasked version and a select.
2285 CallBase &CI, Value *&Rep) {
2286 Name = Name.substr(12); // Remove avx512.mask.
2287
2288 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2289 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2290 Intrinsic::ID IID;
2291 if (Name.starts_with("max.p")) {
2292 if (VecWidth == 128 && EltWidth == 32)
2293 IID = Intrinsic::x86_sse_max_ps;
2294 else if (VecWidth == 128 && EltWidth == 64)
2295 IID = Intrinsic::x86_sse2_max_pd;
2296 else if (VecWidth == 256 && EltWidth == 32)
2297 IID = Intrinsic::x86_avx_max_ps_256;
2298 else if (VecWidth == 256 && EltWidth == 64)
2299 IID = Intrinsic::x86_avx_max_pd_256;
2300 else
2301 llvm_unreachable("Unexpected intrinsic");
2302 } else if (Name.starts_with("min.p")) {
2303 if (VecWidth == 128 && EltWidth == 32)
2304 IID = Intrinsic::x86_sse_min_ps;
2305 else if (VecWidth == 128 && EltWidth == 64)
2306 IID = Intrinsic::x86_sse2_min_pd;
2307 else if (VecWidth == 256 && EltWidth == 32)
2308 IID = Intrinsic::x86_avx_min_ps_256;
2309 else if (VecWidth == 256 && EltWidth == 64)
2310 IID = Intrinsic::x86_avx_min_pd_256;
2311 else
2312 llvm_unreachable("Unexpected intrinsic");
2313 } else if (Name.starts_with("pshuf.b.")) {
2314 if (VecWidth == 128)
2315 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2316 else if (VecWidth == 256)
2317 IID = Intrinsic::x86_avx2_pshuf_b;
2318 else if (VecWidth == 512)
2319 IID = Intrinsic::x86_avx512_pshuf_b_512;
2320 else
2321 llvm_unreachable("Unexpected intrinsic");
2322 } else if (Name.starts_with("pmul.hr.sw.")) {
2323 if (VecWidth == 128)
2324 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2325 else if (VecWidth == 256)
2326 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2327 else if (VecWidth == 512)
2328 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2329 else
2330 llvm_unreachable("Unexpected intrinsic");
2331 } else if (Name.starts_with("pmulh.w.")) {
2332 if (VecWidth == 128)
2333 IID = Intrinsic::x86_sse2_pmulh_w;
2334 else if (VecWidth == 256)
2335 IID = Intrinsic::x86_avx2_pmulh_w;
2336 else if (VecWidth == 512)
2337 IID = Intrinsic::x86_avx512_pmulh_w_512;
2338 else
2339 llvm_unreachable("Unexpected intrinsic");
2340 } else if (Name.starts_with("pmulhu.w.")) {
2341 if (VecWidth == 128)
2342 IID = Intrinsic::x86_sse2_pmulhu_w;
2343 else if (VecWidth == 256)
2344 IID = Intrinsic::x86_avx2_pmulhu_w;
2345 else if (VecWidth == 512)
2346 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2347 else
2348 llvm_unreachable("Unexpected intrinsic");
2349 } else if (Name.starts_with("pmaddw.d.")) {
2350 if (VecWidth == 128)
2351 IID = Intrinsic::x86_sse2_pmadd_wd;
2352 else if (VecWidth == 256)
2353 IID = Intrinsic::x86_avx2_pmadd_wd;
2354 else if (VecWidth == 512)
2355 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2356 else
2357 llvm_unreachable("Unexpected intrinsic");
2358 } else if (Name.starts_with("pmaddubs.w.")) {
2359 if (VecWidth == 128)
2360 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2361 else if (VecWidth == 256)
2362 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2363 else if (VecWidth == 512)
2364 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2365 else
2366 llvm_unreachable("Unexpected intrinsic");
2367 } else if (Name.starts_with("packsswb.")) {
2368 if (VecWidth == 128)
2369 IID = Intrinsic::x86_sse2_packsswb_128;
2370 else if (VecWidth == 256)
2371 IID = Intrinsic::x86_avx2_packsswb;
2372 else if (VecWidth == 512)
2373 IID = Intrinsic::x86_avx512_packsswb_512;
2374 else
2375 llvm_unreachable("Unexpected intrinsic");
2376 } else if (Name.starts_with("packssdw.")) {
2377 if (VecWidth == 128)
2378 IID = Intrinsic::x86_sse2_packssdw_128;
2379 else if (VecWidth == 256)
2380 IID = Intrinsic::x86_avx2_packssdw;
2381 else if (VecWidth == 512)
2382 IID = Intrinsic::x86_avx512_packssdw_512;
2383 else
2384 llvm_unreachable("Unexpected intrinsic");
2385 } else if (Name.starts_with("packuswb.")) {
2386 if (VecWidth == 128)
2387 IID = Intrinsic::x86_sse2_packuswb_128;
2388 else if (VecWidth == 256)
2389 IID = Intrinsic::x86_avx2_packuswb;
2390 else if (VecWidth == 512)
2391 IID = Intrinsic::x86_avx512_packuswb_512;
2392 else
2393 llvm_unreachable("Unexpected intrinsic");
2394 } else if (Name.starts_with("packusdw.")) {
2395 if (VecWidth == 128)
2396 IID = Intrinsic::x86_sse41_packusdw;
2397 else if (VecWidth == 256)
2398 IID = Intrinsic::x86_avx2_packusdw;
2399 else if (VecWidth == 512)
2400 IID = Intrinsic::x86_avx512_packusdw_512;
2401 else
2402 llvm_unreachable("Unexpected intrinsic");
2403 } else if (Name.starts_with("vpermilvar.")) {
2404 if (VecWidth == 128 && EltWidth == 32)
2405 IID = Intrinsic::x86_avx_vpermilvar_ps;
2406 else if (VecWidth == 128 && EltWidth == 64)
2407 IID = Intrinsic::x86_avx_vpermilvar_pd;
2408 else if (VecWidth == 256 && EltWidth == 32)
2409 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2410 else if (VecWidth == 256 && EltWidth == 64)
2411 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2412 else if (VecWidth == 512 && EltWidth == 32)
2413 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2414 else if (VecWidth == 512 && EltWidth == 64)
2415 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2416 else
2417 llvm_unreachable("Unexpected intrinsic");
2418 } else if (Name == "cvtpd2dq.256") {
2419 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2420 } else if (Name == "cvtpd2ps.256") {
2421 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2422 } else if (Name == "cvttpd2dq.256") {
2423 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2424 } else if (Name == "cvttps2dq.128") {
2425 IID = Intrinsic::x86_sse2_cvttps2dq;
2426 } else if (Name == "cvttps2dq.256") {
2427 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2428 } else if (Name.starts_with("permvar.")) {
2429 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2430 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2431 IID = Intrinsic::x86_avx2_permps;
2432 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2433 IID = Intrinsic::x86_avx2_permd;
2434 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2435 IID = Intrinsic::x86_avx512_permvar_df_256;
2436 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2437 IID = Intrinsic::x86_avx512_permvar_di_256;
2438 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2439 IID = Intrinsic::x86_avx512_permvar_sf_512;
2440 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2441 IID = Intrinsic::x86_avx512_permvar_si_512;
2442 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2443 IID = Intrinsic::x86_avx512_permvar_df_512;
2444 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2445 IID = Intrinsic::x86_avx512_permvar_di_512;
2446 else if (VecWidth == 128 && EltWidth == 16)
2447 IID = Intrinsic::x86_avx512_permvar_hi_128;
2448 else if (VecWidth == 256 && EltWidth == 16)
2449 IID = Intrinsic::x86_avx512_permvar_hi_256;
2450 else if (VecWidth == 512 && EltWidth == 16)
2451 IID = Intrinsic::x86_avx512_permvar_hi_512;
2452 else if (VecWidth == 128 && EltWidth == 8)
2453 IID = Intrinsic::x86_avx512_permvar_qi_128;
2454 else if (VecWidth == 256 && EltWidth == 8)
2455 IID = Intrinsic::x86_avx512_permvar_qi_256;
2456 else if (VecWidth == 512 && EltWidth == 8)
2457 IID = Intrinsic::x86_avx512_permvar_qi_512;
2458 else
2459 llvm_unreachable("Unexpected intrinsic");
2460 } else if (Name.starts_with("dbpsadbw.")) {
2461 if (VecWidth == 128)
2462 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2463 else if (VecWidth == 256)
2464 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2465 else if (VecWidth == 512)
2466 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2467 else
2468 llvm_unreachable("Unexpected intrinsic");
2469 } else if (Name.starts_with("pmultishift.qb.")) {
2470 if (VecWidth == 128)
2471 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2472 else if (VecWidth == 256)
2473 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2474 else if (VecWidth == 512)
2475 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2476 else
2477 llvm_unreachable("Unexpected intrinsic");
2478 } else if (Name.starts_with("conflict.")) {
2479 if (Name[9] == 'd' && VecWidth == 128)
2480 IID = Intrinsic::x86_avx512_conflict_d_128;
2481 else if (Name[9] == 'd' && VecWidth == 256)
2482 IID = Intrinsic::x86_avx512_conflict_d_256;
2483 else if (Name[9] == 'd' && VecWidth == 512)
2484 IID = Intrinsic::x86_avx512_conflict_d_512;
2485 else if (Name[9] == 'q' && VecWidth == 128)
2486 IID = Intrinsic::x86_avx512_conflict_q_128;
2487 else if (Name[9] == 'q' && VecWidth == 256)
2488 IID = Intrinsic::x86_avx512_conflict_q_256;
2489 else if (Name[9] == 'q' && VecWidth == 512)
2490 IID = Intrinsic::x86_avx512_conflict_q_512;
2491 else
2492 llvm_unreachable("Unexpected intrinsic");
2493 } else if (Name.starts_with("pavg.")) {
2494 if (Name[5] == 'b' && VecWidth == 128)
2495 IID = Intrinsic::x86_sse2_pavg_b;
2496 else if (Name[5] == 'b' && VecWidth == 256)
2497 IID = Intrinsic::x86_avx2_pavg_b;
2498 else if (Name[5] == 'b' && VecWidth == 512)
2499 IID = Intrinsic::x86_avx512_pavg_b_512;
2500 else if (Name[5] == 'w' && VecWidth == 128)
2501 IID = Intrinsic::x86_sse2_pavg_w;
2502 else if (Name[5] == 'w' && VecWidth == 256)
2503 IID = Intrinsic::x86_avx2_pavg_w;
2504 else if (Name[5] == 'w' && VecWidth == 512)
2505 IID = Intrinsic::x86_avx512_pavg_w_512;
2506 else
2507 llvm_unreachable("Unexpected intrinsic");
2508 } else
2509 return false;
2510
2511 SmallVector<Value *, 4> Args(CI.args());
2512 Args.pop_back();
2513 Args.pop_back();
2514 Rep = Builder.CreateIntrinsic(IID, Args);
2515 unsigned NumArgs = CI.arg_size();
2516 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2517 CI.getArgOperand(NumArgs - 2));
2518 return true;
2519}
2520
2521/// Upgrade comment in call to inline asm that represents an objc retain release
2522/// marker.
2523void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2524 size_t Pos;
2525 if (AsmStr->find("mov\tfp") == 0 &&
2526 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2527 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2528 AsmStr->replace(Pos, 1, ";");
2529 }
2530}
2531
2533 Function *F, IRBuilder<> &Builder) {
2534 Value *Rep = nullptr;
2535
2536 if (Name == "abs.i" || Name == "abs.ll") {
2537 Value *Arg = CI->getArgOperand(0);
2538 Value *Neg = Builder.CreateNeg(Arg, "neg");
2539 Value *Cmp = Builder.CreateICmpSGE(
2540 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2541 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2542 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2543 Type *Ty = (Name == "abs.bf16")
2544 ? Builder.getBFloatTy()
2545 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2546 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2547 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2548 Rep = Builder.CreateBitCast(Abs, CI->getType());
2549 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2550 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2551 : Intrinsic::nvvm_fabs;
2552 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2553 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2554 Name.starts_with("atomic.load.add.f64.p")) {
2555 Value *Ptr = CI->getArgOperand(0);
2556 Value *Val = CI->getArgOperand(1);
2557 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2559 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2560 Name.starts_with("atomic.load.dec.32.p")) {
2561 Value *Ptr = CI->getArgOperand(0);
2562 Value *Val = CI->getArgOperand(1);
2563 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2565 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2567 } else if (Name.consume_front("max.") &&
2568 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2569 Name == "ui" || Name == "ull")) {
2570 Value *Arg0 = CI->getArgOperand(0);
2571 Value *Arg1 = CI->getArgOperand(1);
2572 Value *Cmp = Name.starts_with("u")
2573 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2574 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2575 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2576 } else if (Name.consume_front("min.") &&
2577 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2578 Name == "ui" || Name == "ull")) {
2579 Value *Arg0 = CI->getArgOperand(0);
2580 Value *Arg1 = CI->getArgOperand(1);
2581 Value *Cmp = Name.starts_with("u")
2582 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2583 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2584 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2585 } else if (Name == "clz.ll") {
2586 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2587 Value *Arg = CI->getArgOperand(0);
2588 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2589 {Arg, Builder.getFalse()},
2590 /*FMFSource=*/nullptr, "ctlz");
2591 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2592 } else if (Name == "popc.ll") {
2593 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2594 // i64.
2595 Value *Arg = CI->getArgOperand(0);
2596 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2597 Arg, /*FMFSource=*/nullptr, "ctpop");
2598 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2599 } else if (Name == "h2f") {
2600 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2601 {Builder.getFloatTy()}, CI->getArgOperand(0),
2602 /*FMFSource=*/nullptr, "h2f");
2603 } else if (Name.consume_front("bitcast.") &&
2604 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2605 Name == "d2ll")) {
2606 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2607 } else if (Name == "rotate.b32") {
2608 Value *Arg = CI->getOperand(0);
2609 Value *ShiftAmt = CI->getOperand(1);
2610 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2611 {Arg, Arg, ShiftAmt});
2612 } else if (Name == "rotate.b64") {
2613 Type *Int64Ty = Builder.getInt64Ty();
2614 Value *Arg = CI->getOperand(0);
2615 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2616 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2617 {Arg, Arg, ZExtShiftAmt});
2618 } else if (Name == "rotate.right.b64") {
2619 Type *Int64Ty = Builder.getInt64Ty();
2620 Value *Arg = CI->getOperand(0);
2621 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2622 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2623 {Arg, Arg, ZExtShiftAmt});
2624 } else if (Name == "swap.lo.hi.b64") {
2625 Type *Int64Ty = Builder.getInt64Ty();
2626 Value *Arg = CI->getOperand(0);
2627 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2628 {Arg, Arg, Builder.getInt64(32)});
2629 } else if ((Name.consume_front("ptr.gen.to.") &&
2630 consumeNVVMPtrAddrSpace(Name)) ||
2631 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2632 Name.starts_with(".to.gen"))) {
2633 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2634 } else if (Name.consume_front("ldg.global")) {
2635 Value *Ptr = CI->getArgOperand(0);
2636 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2637 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2638 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2639 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2640 MDNode *MD = MDNode::get(Builder.getContext(), {});
2641 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2642 return LD;
2643 } else if (Name == "tanh.approx.f32") {
2644 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2645 FastMathFlags FMF;
2646 FMF.setApproxFunc();
2647 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2648 FMF);
2649 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2650 Value *Arg =
2651 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2652 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2653 {}, {Arg});
2654 } else if (Name == "barrier") {
2655 Rep = Builder.CreateIntrinsic(
2656 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2657 {CI->getArgOperand(0), CI->getArgOperand(1)});
2658 } else if (Name == "barrier.sync") {
2659 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2660 {CI->getArgOperand(0)});
2661 } else if (Name == "barrier.sync.cnt") {
2662 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2663 {CI->getArgOperand(0), CI->getArgOperand(1)});
2664 } else {
2666 if (IID != Intrinsic::not_intrinsic &&
2667 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2668 rename(F);
2669 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2671 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2672 Value *Arg = CI->getArgOperand(I);
2673 Type *OldType = Arg->getType();
2674 Type *NewType = NewFn->getArg(I)->getType();
2675 Args.push_back(
2676 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2677 ? Builder.CreateBitCast(Arg, NewType)
2678 : Arg);
2679 }
2680 Rep = Builder.CreateCall(NewFn, Args);
2681 if (F->getReturnType()->isIntegerTy())
2682 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2683 }
2684 }
2685
2686 return Rep;
2687}
2688
2690 IRBuilder<> &Builder) {
2691 LLVMContext &C = F->getContext();
2692 Value *Rep = nullptr;
2693
2694 if (Name.starts_with("sse4a.movnt.")) {
2696 Elts.push_back(
2697 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2698 MDNode *Node = MDNode::get(C, Elts);
2699
2700 Value *Arg0 = CI->getArgOperand(0);
2701 Value *Arg1 = CI->getArgOperand(1);
2702
2703 // Nontemporal (unaligned) store of the 0'th element of the float/double
2704 // vector.
2705 Value *Extract =
2706 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2707
2708 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2709 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2710 } else if (Name.starts_with("avx.movnt.") ||
2711 Name.starts_with("avx512.storent.")) {
2713 Elts.push_back(
2714 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2715 MDNode *Node = MDNode::get(C, Elts);
2716
2717 Value *Arg0 = CI->getArgOperand(0);
2718 Value *Arg1 = CI->getArgOperand(1);
2719
2720 StoreInst *SI = Builder.CreateAlignedStore(
2721 Arg1, Arg0,
2723 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2724 } else if (Name == "sse2.storel.dq") {
2725 Value *Arg0 = CI->getArgOperand(0);
2726 Value *Arg1 = CI->getArgOperand(1);
2727
2728 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2729 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2730 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2731 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2732 } else if (Name.starts_with("sse.storeu.") ||
2733 Name.starts_with("sse2.storeu.") ||
2734 Name.starts_with("avx.storeu.")) {
2735 Value *Arg0 = CI->getArgOperand(0);
2736 Value *Arg1 = CI->getArgOperand(1);
2737 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2738 } else if (Name == "avx512.mask.store.ss") {
2739 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2740 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2741 Mask, false);
2742 } else if (Name.starts_with("avx512.mask.store")) {
2743 // "avx512.mask.storeu." or "avx512.mask.store."
2744 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2745 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2746 CI->getArgOperand(2), Aligned);
2747 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2748 // Upgrade packed integer vector compare intrinsics to compare instructions.
2749 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2750 bool CmpEq = Name[9] == 'e';
2751 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2752 CI->getArgOperand(0), CI->getArgOperand(1));
2753 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2754 } else if (Name.starts_with("avx512.broadcastm")) {
2755 Type *ExtTy = Type::getInt32Ty(C);
2756 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2757 ExtTy = Type::getInt64Ty(C);
2758 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2759 ExtTy->getPrimitiveSizeInBits();
2760 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2761 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2762 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2763 Value *Vec = CI->getArgOperand(0);
2764 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2765 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2766 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2767 } else if (Name.starts_with("avx.sqrt.p") ||
2768 Name.starts_with("sse2.sqrt.p") ||
2769 Name.starts_with("sse.sqrt.p")) {
2770 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2771 {CI->getArgOperand(0)});
2772 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2773 if (CI->arg_size() == 4 &&
2774 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2775 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2776 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2777 : Intrinsic::x86_avx512_sqrt_pd_512;
2778
2779 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2780 Rep = Builder.CreateIntrinsic(IID, Args);
2781 } else {
2782 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2783 {CI->getArgOperand(0)});
2784 }
2785 Rep =
2786 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2787 } else if (Name.starts_with("avx512.ptestm") ||
2788 Name.starts_with("avx512.ptestnm")) {
2789 Value *Op0 = CI->getArgOperand(0);
2790 Value *Op1 = CI->getArgOperand(1);
2791 Value *Mask = CI->getArgOperand(2);
2792 Rep = Builder.CreateAnd(Op0, Op1);
2793 llvm::Type *Ty = Op0->getType();
2795 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2798 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2799 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2800 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2801 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2802 ->getNumElements();
2803 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2804 Rep =
2805 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2806 } else if (Name.starts_with("avx512.kunpck")) {
2807 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2808 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2809 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2810 int Indices[64];
2811 for (unsigned i = 0; i != NumElts; ++i)
2812 Indices[i] = i;
2813
2814 // First extract half of each vector. This gives better codegen than
2815 // doing it in a single shuffle.
2816 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2817 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2818 // Concat the vectors.
2819 // NOTE: Operands have to be swapped to match intrinsic definition.
2820 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2821 Rep = Builder.CreateBitCast(Rep, CI->getType());
2822 } else if (Name == "avx512.kand.w") {
2823 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2824 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2825 Rep = Builder.CreateAnd(LHS, RHS);
2826 Rep = Builder.CreateBitCast(Rep, CI->getType());
2827 } else if (Name == "avx512.kandn.w") {
2828 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2829 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2830 LHS = Builder.CreateNot(LHS);
2831 Rep = Builder.CreateAnd(LHS, RHS);
2832 Rep = Builder.CreateBitCast(Rep, CI->getType());
2833 } else if (Name == "avx512.kor.w") {
2834 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2835 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2836 Rep = Builder.CreateOr(LHS, RHS);
2837 Rep = Builder.CreateBitCast(Rep, CI->getType());
2838 } else if (Name == "avx512.kxor.w") {
2839 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2840 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2841 Rep = Builder.CreateXor(LHS, RHS);
2842 Rep = Builder.CreateBitCast(Rep, CI->getType());
2843 } else if (Name == "avx512.kxnor.w") {
2844 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2845 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2846 LHS = Builder.CreateNot(LHS);
2847 Rep = Builder.CreateXor(LHS, RHS);
2848 Rep = Builder.CreateBitCast(Rep, CI->getType());
2849 } else if (Name == "avx512.knot.w") {
2850 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2851 Rep = Builder.CreateNot(Rep);
2852 Rep = Builder.CreateBitCast(Rep, CI->getType());
2853 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2854 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2855 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2856 Rep = Builder.CreateOr(LHS, RHS);
2857 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2858 Value *C;
2859 if (Name[14] == 'c')
2860 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2861 else
2862 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2863 Rep = Builder.CreateICmpEQ(Rep, C);
2864 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2865 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2866 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2867 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2868 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2869 Type *I32Ty = Type::getInt32Ty(C);
2870 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2871 ConstantInt::get(I32Ty, 0));
2872 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2873 ConstantInt::get(I32Ty, 0));
2874 Value *EltOp;
2875 if (Name.contains(".add."))
2876 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2877 else if (Name.contains(".sub."))
2878 EltOp = Builder.CreateFSub(Elt0, Elt1);
2879 else if (Name.contains(".mul."))
2880 EltOp = Builder.CreateFMul(Elt0, Elt1);
2881 else
2882 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2883 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2884 ConstantInt::get(I32Ty, 0));
2885 } else if (Name.starts_with("avx512.mask.pcmp")) {
2886 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2887 bool CmpEq = Name[16] == 'e';
2888 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2889 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2890 Type *OpTy = CI->getArgOperand(0)->getType();
2891 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2892 Intrinsic::ID IID;
2893 switch (VecWidth) {
2894 default:
2895 llvm_unreachable("Unexpected intrinsic");
2896 case 128:
2897 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2898 break;
2899 case 256:
2900 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2901 break;
2902 case 512:
2903 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2904 break;
2905 }
2906
2907 Rep =
2908 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2909 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2910 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2911 Type *OpTy = CI->getArgOperand(0)->getType();
2912 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2913 unsigned EltWidth = OpTy->getScalarSizeInBits();
2914 Intrinsic::ID IID;
2915 if (VecWidth == 128 && EltWidth == 32)
2916 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2917 else if (VecWidth == 256 && EltWidth == 32)
2918 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2919 else if (VecWidth == 512 && EltWidth == 32)
2920 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2921 else if (VecWidth == 128 && EltWidth == 64)
2922 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2923 else if (VecWidth == 256 && EltWidth == 64)
2924 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2925 else if (VecWidth == 512 && EltWidth == 64)
2926 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2927 else
2928 llvm_unreachable("Unexpected intrinsic");
2929
2930 Rep =
2931 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2932 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2933 } else if (Name.starts_with("avx512.cmp.p")) {
2934 SmallVector<Value *, 4> Args(CI->args());
2935 Type *OpTy = Args[0]->getType();
2936 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2937 unsigned EltWidth = OpTy->getScalarSizeInBits();
2938 Intrinsic::ID IID;
2939 if (VecWidth == 128 && EltWidth == 32)
2940 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2941 else if (VecWidth == 256 && EltWidth == 32)
2942 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2943 else if (VecWidth == 512 && EltWidth == 32)
2944 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2945 else if (VecWidth == 128 && EltWidth == 64)
2946 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2947 else if (VecWidth == 256 && EltWidth == 64)
2948 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2949 else if (VecWidth == 512 && EltWidth == 64)
2950 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2951 else
2952 llvm_unreachable("Unexpected intrinsic");
2953
2955 if (VecWidth == 512)
2956 std::swap(Mask, Args.back());
2957 Args.push_back(Mask);
2958
2959 Rep = Builder.CreateIntrinsic(IID, Args);
2960 } else if (Name.starts_with("avx512.mask.cmp.")) {
2961 // Integer compare intrinsics.
2962 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2963 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2964 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2965 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2966 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2967 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2968 Name.starts_with("avx512.cvtw2mask.") ||
2969 Name.starts_with("avx512.cvtd2mask.") ||
2970 Name.starts_with("avx512.cvtq2mask.")) {
2971 Value *Op = CI->getArgOperand(0);
2972 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2973 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2974 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2975 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2976 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2977 Name.starts_with("avx512.mask.pabs")) {
2978 Rep = upgradeAbs(Builder, *CI);
2979 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2980 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2981 Name.starts_with("avx512.mask.pmaxs")) {
2982 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2983 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2984 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2985 Name.starts_with("avx512.mask.pmaxu")) {
2986 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2987 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2988 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2989 Name.starts_with("avx512.mask.pmins")) {
2990 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2991 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2992 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2993 Name.starts_with("avx512.mask.pminu")) {
2994 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2995 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2996 Name == "avx512.pmulu.dq.512" ||
2997 Name.starts_with("avx512.mask.pmulu.dq.")) {
2998 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2999 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3000 Name == "avx512.pmul.dq.512" ||
3001 Name.starts_with("avx512.mask.pmul.dq.")) {
3002 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3003 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3004 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3005 Rep =
3006 Builder.CreateSIToFP(CI->getArgOperand(1),
3007 cast<VectorType>(CI->getType())->getElementType());
3008 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3009 } else if (Name == "avx512.cvtusi2sd") {
3010 Rep =
3011 Builder.CreateUIToFP(CI->getArgOperand(1),
3012 cast<VectorType>(CI->getType())->getElementType());
3013 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3014 } else if (Name == "sse2.cvtss2sd") {
3015 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3016 Rep = Builder.CreateFPExt(
3017 Rep, cast<VectorType>(CI->getType())->getElementType());
3018 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3019 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3020 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3021 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3022 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3023 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3024 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3025 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3026 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3027 Name == "avx512.mask.cvtqq2ps.256" ||
3028 Name == "avx512.mask.cvtqq2ps.512" ||
3029 Name == "avx512.mask.cvtuqq2ps.256" ||
3030 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3031 Name == "avx.cvt.ps2.pd.256" ||
3032 Name == "avx512.mask.cvtps2pd.128" ||
3033 Name == "avx512.mask.cvtps2pd.256") {
3034 auto *DstTy = cast<FixedVectorType>(CI->getType());
3035 Rep = CI->getArgOperand(0);
3036 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3037
3038 unsigned NumDstElts = DstTy->getNumElements();
3039 if (NumDstElts < SrcTy->getNumElements()) {
3040 assert(NumDstElts == 2 && "Unexpected vector size");
3041 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3042 }
3043
3044 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3045 bool IsUnsigned = Name.contains("cvtu");
3046 if (IsPS2PD)
3047 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3048 else if (CI->arg_size() == 4 &&
3049 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3050 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3051 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3052 : Intrinsic::x86_avx512_sitofp_round;
3053 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3054 {Rep, CI->getArgOperand(3)});
3055 } else {
3056 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3057 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3058 }
3059
3060 if (CI->arg_size() >= 3)
3061 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3062 CI->getArgOperand(1));
3063 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3064 Name.starts_with("vcvtph2ps.")) {
3065 auto *DstTy = cast<FixedVectorType>(CI->getType());
3066 Rep = CI->getArgOperand(0);
3067 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3068 unsigned NumDstElts = DstTy->getNumElements();
3069 if (NumDstElts != SrcTy->getNumElements()) {
3070 assert(NumDstElts == 4 && "Unexpected vector size");
3071 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3072 }
3073 Rep = Builder.CreateBitCast(
3074 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3075 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3076 if (CI->arg_size() >= 3)
3077 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3078 CI->getArgOperand(1));
3079 } else if (Name.starts_with("avx512.mask.load")) {
3080 // "avx512.mask.loadu." or "avx512.mask.load."
3081 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3082 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3083 CI->getArgOperand(2), Aligned);
3084 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3085 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3086 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3087 ResultTy->getNumElements());
3088
3089 Rep = Builder.CreateIntrinsic(
3090 Intrinsic::masked_expandload, ResultTy,
3091 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3092 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3093 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3094 Value *MaskVec =
3095 getX86MaskVec(Builder, CI->getArgOperand(2),
3096 cast<FixedVectorType>(ResultTy)->getNumElements());
3097
3098 Rep = Builder.CreateIntrinsic(
3099 Intrinsic::masked_compressstore, ResultTy,
3100 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3101 } else if (Name.starts_with("avx512.mask.compress.") ||
3102 Name.starts_with("avx512.mask.expand.")) {
3103 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3104
3105 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3106 ResultTy->getNumElements());
3107
3108 bool IsCompress = Name[12] == 'c';
3109 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3110 : Intrinsic::x86_avx512_mask_expand;
3111 Rep = Builder.CreateIntrinsic(
3112 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3113 } else if (Name.starts_with("xop.vpcom")) {
3114 bool IsSigned;
3115 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3116 Name.ends_with("uq"))
3117 IsSigned = false;
3118 else if (Name.ends_with("b") || Name.ends_with("w") ||
3119 Name.ends_with("d") || Name.ends_with("q"))
3120 IsSigned = true;
3121 else
3122 llvm_unreachable("Unknown suffix");
3123
3124 unsigned Imm;
3125 if (CI->arg_size() == 3) {
3126 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127 } else {
3128 Name = Name.substr(9); // strip off "xop.vpcom"
3129 if (Name.starts_with("lt"))
3130 Imm = 0;
3131 else if (Name.starts_with("le"))
3132 Imm = 1;
3133 else if (Name.starts_with("gt"))
3134 Imm = 2;
3135 else if (Name.starts_with("ge"))
3136 Imm = 3;
3137 else if (Name.starts_with("eq"))
3138 Imm = 4;
3139 else if (Name.starts_with("ne"))
3140 Imm = 5;
3141 else if (Name.starts_with("false"))
3142 Imm = 6;
3143 else if (Name.starts_with("true"))
3144 Imm = 7;
3145 else
3146 llvm_unreachable("Unknown condition");
3147 }
3148
3149 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3150 } else if (Name.starts_with("xop.vpcmov")) {
3151 Value *Sel = CI->getArgOperand(2);
3152 Value *NotSel = Builder.CreateNot(Sel);
3153 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3154 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3155 Rep = Builder.CreateOr(Sel0, Sel1);
3156 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3157 Name.starts_with("avx512.mask.prol")) {
3158 Rep = upgradeX86Rotate(Builder, *CI, false);
3159 } else if (Name.starts_with("avx512.pror") ||
3160 Name.starts_with("avx512.mask.pror")) {
3161 Rep = upgradeX86Rotate(Builder, *CI, true);
3162 } else if (Name.starts_with("avx512.vpshld.") ||
3163 Name.starts_with("avx512.mask.vpshld") ||
3164 Name.starts_with("avx512.maskz.vpshld")) {
3165 bool ZeroMask = Name[11] == 'z';
3166 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3167 } else if (Name.starts_with("avx512.vpshrd.") ||
3168 Name.starts_with("avx512.mask.vpshrd") ||
3169 Name.starts_with("avx512.maskz.vpshrd")) {
3170 bool ZeroMask = Name[11] == 'z';
3171 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3172 } else if (Name == "sse42.crc32.64.8") {
3173 Value *Trunc0 =
3174 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3175 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3176 {Trunc0, CI->getArgOperand(1)});
3177 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3178 } else if (Name.starts_with("avx.vbroadcast.s") ||
3179 Name.starts_with("avx512.vbroadcast.s")) {
3180 // Replace broadcasts with a series of insertelements.
3181 auto *VecTy = cast<FixedVectorType>(CI->getType());
3182 Type *EltTy = VecTy->getElementType();
3183 unsigned EltNum = VecTy->getNumElements();
3184 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3185 Type *I32Ty = Type::getInt32Ty(C);
3186 Rep = PoisonValue::get(VecTy);
3187 for (unsigned I = 0; I < EltNum; ++I)
3188 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3189 } else if (Name.starts_with("sse41.pmovsx") ||
3190 Name.starts_with("sse41.pmovzx") ||
3191 Name.starts_with("avx2.pmovsx") ||
3192 Name.starts_with("avx2.pmovzx") ||
3193 Name.starts_with("avx512.mask.pmovsx") ||
3194 Name.starts_with("avx512.mask.pmovzx")) {
3195 auto *DstTy = cast<FixedVectorType>(CI->getType());
3196 unsigned NumDstElts = DstTy->getNumElements();
3197
3198 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3199 SmallVector<int, 8> ShuffleMask(NumDstElts);
3200 for (unsigned i = 0; i != NumDstElts; ++i)
3201 ShuffleMask[i] = i;
3202
3203 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3204
3205 bool DoSext = Name.contains("pmovsx");
3206 Rep =
3207 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3208 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3209 if (CI->arg_size() == 3)
3210 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3211 CI->getArgOperand(1));
3212 } else if (Name == "avx512.mask.pmov.qd.256" ||
3213 Name == "avx512.mask.pmov.qd.512" ||
3214 Name == "avx512.mask.pmov.wb.256" ||
3215 Name == "avx512.mask.pmov.wb.512") {
3216 Type *Ty = CI->getArgOperand(1)->getType();
3217 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3218 Rep =
3219 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3220 } else if (Name.starts_with("avx.vbroadcastf128") ||
3221 Name == "avx2.vbroadcasti128") {
3222 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3223 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3224 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3225 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3226 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3227 if (NumSrcElts == 2)
3228 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3229 else
3230 Rep = Builder.CreateShuffleVector(Load,
3231 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3232 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3233 Name.starts_with("avx512.mask.shuf.f")) {
3234 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3235 Type *VT = CI->getType();
3236 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3237 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3238 unsigned ControlBitsMask = NumLanes - 1;
3239 unsigned NumControlBits = NumLanes / 2;
3240 SmallVector<int, 8> ShuffleMask(0);
3241
3242 for (unsigned l = 0; l != NumLanes; ++l) {
3243 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3244 // We actually need the other source.
3245 if (l >= NumLanes / 2)
3246 LaneMask += NumLanes;
3247 for (unsigned i = 0; i != NumElementsInLane; ++i)
3248 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3249 }
3250 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3251 CI->getArgOperand(1), ShuffleMask);
3252 Rep =
3253 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3254 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3255 Name.starts_with("avx512.mask.broadcasti")) {
3256 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3257 ->getNumElements();
3258 unsigned NumDstElts =
3259 cast<FixedVectorType>(CI->getType())->getNumElements();
3260
3261 SmallVector<int, 8> ShuffleMask(NumDstElts);
3262 for (unsigned i = 0; i != NumDstElts; ++i)
3263 ShuffleMask[i] = i % NumSrcElts;
3264
3265 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3266 CI->getArgOperand(0), ShuffleMask);
3267 Rep =
3268 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3269 } else if (Name.starts_with("avx2.pbroadcast") ||
3270 Name.starts_with("avx2.vbroadcast") ||
3271 Name.starts_with("avx512.pbroadcast") ||
3272 Name.starts_with("avx512.mask.broadcast.s")) {
3273 // Replace vp?broadcasts with a vector shuffle.
3274 Value *Op = CI->getArgOperand(0);
3275 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3276 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3279 Rep = Builder.CreateShuffleVector(Op, M);
3280
3281 if (CI->arg_size() == 3)
3282 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3283 CI->getArgOperand(1));
3284 } else if (Name.starts_with("sse2.padds.") ||
3285 Name.starts_with("avx2.padds.") ||
3286 Name.starts_with("avx512.padds.") ||
3287 Name.starts_with("avx512.mask.padds.")) {
3288 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3289 } else if (Name.starts_with("sse2.psubs.") ||
3290 Name.starts_with("avx2.psubs.") ||
3291 Name.starts_with("avx512.psubs.") ||
3292 Name.starts_with("avx512.mask.psubs.")) {
3293 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3294 } else if (Name.starts_with("sse2.paddus.") ||
3295 Name.starts_with("avx2.paddus.") ||
3296 Name.starts_with("avx512.mask.paddus.")) {
3297 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3298 } else if (Name.starts_with("sse2.psubus.") ||
3299 Name.starts_with("avx2.psubus.") ||
3300 Name.starts_with("avx512.mask.psubus.")) {
3301 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3302 } else if (Name.starts_with("avx512.mask.palignr.")) {
3303 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3304 CI->getArgOperand(1), CI->getArgOperand(2),
3305 CI->getArgOperand(3), CI->getArgOperand(4),
3306 false);
3307 } else if (Name.starts_with("avx512.mask.valign.")) {
3309 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3310 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3311 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3312 // 128/256-bit shift left specified in bits.
3313 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3314 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3315 Shift / 8); // Shift is in bits.
3316 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3317 // 128/256-bit shift right specified in bits.
3318 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3319 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3320 Shift / 8); // Shift is in bits.
3321 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3322 Name == "avx512.psll.dq.512") {
3323 // 128/256/512-bit shift left specified in bytes.
3324 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3325 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3326 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3327 Name == "avx512.psrl.dq.512") {
3328 // 128/256/512-bit shift right specified in bytes.
3329 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3330 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3331 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3332 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3333 Name.starts_with("avx2.pblendd.")) {
3334 Value *Op0 = CI->getArgOperand(0);
3335 Value *Op1 = CI->getArgOperand(1);
3336 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3337 auto *VecTy = cast<FixedVectorType>(CI->getType());
3338 unsigned NumElts = VecTy->getNumElements();
3339
3340 SmallVector<int, 16> Idxs(NumElts);
3341 for (unsigned i = 0; i != NumElts; ++i)
3342 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3343
3344 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3345 } else if (Name.starts_with("avx.vinsertf128.") ||
3346 Name == "avx2.vinserti128" ||
3347 Name.starts_with("avx512.mask.insert")) {
3348 Value *Op0 = CI->getArgOperand(0);
3349 Value *Op1 = CI->getArgOperand(1);
3350 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3351 unsigned DstNumElts =
3352 cast<FixedVectorType>(CI->getType())->getNumElements();
3353 unsigned SrcNumElts =
3354 cast<FixedVectorType>(Op1->getType())->getNumElements();
3355 unsigned Scale = DstNumElts / SrcNumElts;
3356
3357 // Mask off the high bits of the immediate value; hardware ignores those.
3358 Imm = Imm % Scale;
3359
3360 // Extend the second operand into a vector the size of the destination.
3361 SmallVector<int, 8> Idxs(DstNumElts);
3362 for (unsigned i = 0; i != SrcNumElts; ++i)
3363 Idxs[i] = i;
3364 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3365 Idxs[i] = SrcNumElts;
3366 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3367
3368 // Insert the second operand into the first operand.
3369
3370 // Note that there is no guarantee that instruction lowering will actually
3371 // produce a vinsertf128 instruction for the created shuffles. In
3372 // particular, the 0 immediate case involves no lane changes, so it can
3373 // be handled as a blend.
3374
3375 // Example of shuffle mask for 32-bit elements:
3376 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3377 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3378
3379 // First fill with identify mask.
3380 for (unsigned i = 0; i != DstNumElts; ++i)
3381 Idxs[i] = i;
3382 // Then replace the elements where we need to insert.
3383 for (unsigned i = 0; i != SrcNumElts; ++i)
3384 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3385 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3386
3387 // If the intrinsic has a mask operand, handle that.
3388 if (CI->arg_size() == 5)
3389 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3390 CI->getArgOperand(3));
3391 } else if (Name.starts_with("avx.vextractf128.") ||
3392 Name == "avx2.vextracti128" ||
3393 Name.starts_with("avx512.mask.vextract")) {
3394 Value *Op0 = CI->getArgOperand(0);
3395 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3396 unsigned DstNumElts =
3397 cast<FixedVectorType>(CI->getType())->getNumElements();
3398 unsigned SrcNumElts =
3399 cast<FixedVectorType>(Op0->getType())->getNumElements();
3400 unsigned Scale = SrcNumElts / DstNumElts;
3401
3402 // Mask off the high bits of the immediate value; hardware ignores those.
3403 Imm = Imm % Scale;
3404
3405 // Get indexes for the subvector of the input vector.
3406 SmallVector<int, 8> Idxs(DstNumElts);
3407 for (unsigned i = 0; i != DstNumElts; ++i) {
3408 Idxs[i] = i + (Imm * DstNumElts);
3409 }
3410 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3411
3412 // If the intrinsic has a mask operand, handle that.
3413 if (CI->arg_size() == 4)
3414 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3415 CI->getArgOperand(2));
3416 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3417 Name.starts_with("avx512.mask.perm.di.")) {
3418 Value *Op0 = CI->getArgOperand(0);
3419 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3420 auto *VecTy = cast<FixedVectorType>(CI->getType());
3421 unsigned NumElts = VecTy->getNumElements();
3422
3423 SmallVector<int, 8> Idxs(NumElts);
3424 for (unsigned i = 0; i != NumElts; ++i)
3425 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3426
3427 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3428
3429 if (CI->arg_size() == 4)
3430 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3431 CI->getArgOperand(2));
3432 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3433 // The immediate permute control byte looks like this:
3434 // [1:0] - select 128 bits from sources for low half of destination
3435 // [2] - ignore
3436 // [3] - zero low half of destination
3437 // [5:4] - select 128 bits from sources for high half of destination
3438 // [6] - ignore
3439 // [7] - zero high half of destination
3440
3441 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3442
3443 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3444 unsigned HalfSize = NumElts / 2;
3445 SmallVector<int, 8> ShuffleMask(NumElts);
3446
3447 // Determine which operand(s) are actually in use for this instruction.
3448 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3449 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3450
3451 // If needed, replace operands based on zero mask.
3452 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3453 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3454
3455 // Permute low half of result.
3456 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3457 for (unsigned i = 0; i < HalfSize; ++i)
3458 ShuffleMask[i] = StartIndex + i;
3459
3460 // Permute high half of result.
3461 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3462 for (unsigned i = 0; i < HalfSize; ++i)
3463 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3464
3465 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3466
3467 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3468 Name.starts_with("avx512.mask.vpermil.p") ||
3469 Name.starts_with("avx512.mask.pshuf.d.")) {
3470 Value *Op0 = CI->getArgOperand(0);
3471 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3472 auto *VecTy = cast<FixedVectorType>(CI->getType());
3473 unsigned NumElts = VecTy->getNumElements();
3474 // Calculate the size of each index in the immediate.
3475 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3476 unsigned IdxMask = ((1 << IdxSize) - 1);
3477
3478 SmallVector<int, 8> Idxs(NumElts);
3479 // Lookup the bits for this element, wrapping around the immediate every
3480 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3481 // to offset by the first index of each group.
3482 for (unsigned i = 0; i != NumElts; ++i)
3483 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3484
3485 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3486
3487 if (CI->arg_size() == 4)
3488 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3489 CI->getArgOperand(2));
3490 } else if (Name == "sse2.pshufl.w" ||
3491 Name.starts_with("avx512.mask.pshufl.w.")) {
3492 Value *Op0 = CI->getArgOperand(0);
3493 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3494 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3495
3496 SmallVector<int, 16> Idxs(NumElts);
3497 for (unsigned l = 0; l != NumElts; l += 8) {
3498 for (unsigned i = 0; i != 4; ++i)
3499 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3500 for (unsigned i = 4; i != 8; ++i)
3501 Idxs[i + l] = i + l;
3502 }
3503
3504 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3505
3506 if (CI->arg_size() == 4)
3507 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3508 CI->getArgOperand(2));
3509 } else if (Name == "sse2.pshufh.w" ||
3510 Name.starts_with("avx512.mask.pshufh.w.")) {
3511 Value *Op0 = CI->getArgOperand(0);
3512 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3513 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3514
3515 SmallVector<int, 16> Idxs(NumElts);
3516 for (unsigned l = 0; l != NumElts; l += 8) {
3517 for (unsigned i = 0; i != 4; ++i)
3518 Idxs[i + l] = i + l;
3519 for (unsigned i = 0; i != 4; ++i)
3520 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3521 }
3522
3523 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3524
3525 if (CI->arg_size() == 4)
3526 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3527 CI->getArgOperand(2));
3528 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3529 Value *Op0 = CI->getArgOperand(0);
3530 Value *Op1 = CI->getArgOperand(1);
3531 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3532 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3533
3534 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3535 unsigned HalfLaneElts = NumLaneElts / 2;
3536
3537 SmallVector<int, 16> Idxs(NumElts);
3538 for (unsigned i = 0; i != NumElts; ++i) {
3539 // Base index is the starting element of the lane.
3540 Idxs[i] = i - (i % NumLaneElts);
3541 // If we are half way through the lane switch to the other source.
3542 if ((i % NumLaneElts) >= HalfLaneElts)
3543 Idxs[i] += NumElts;
3544 // Now select the specific element. By adding HalfLaneElts bits from
3545 // the immediate. Wrapping around the immediate every 8-bits.
3546 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3547 }
3548
3549 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3550
3551 Rep =
3552 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3553 } else if (Name.starts_with("avx512.mask.movddup") ||
3554 Name.starts_with("avx512.mask.movshdup") ||
3555 Name.starts_with("avx512.mask.movsldup")) {
3556 Value *Op0 = CI->getArgOperand(0);
3557 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3558 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3559
3560 unsigned Offset = 0;
3561 if (Name.starts_with("avx512.mask.movshdup."))
3562 Offset = 1;
3563
3564 SmallVector<int, 16> Idxs(NumElts);
3565 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3566 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3567 Idxs[i + l + 0] = i + l + Offset;
3568 Idxs[i + l + 1] = i + l + Offset;
3569 }
3570
3571 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3572
3573 Rep =
3574 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3575 } else if (Name.starts_with("avx512.mask.punpckl") ||
3576 Name.starts_with("avx512.mask.unpckl.")) {
3577 Value *Op0 = CI->getArgOperand(0);
3578 Value *Op1 = CI->getArgOperand(1);
3579 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3580 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3581
3582 SmallVector<int, 64> Idxs(NumElts);
3583 for (int l = 0; l != NumElts; l += NumLaneElts)
3584 for (int i = 0; i != NumLaneElts; ++i)
3585 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3586
3587 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3588
3589 Rep =
3590 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3591 } else if (Name.starts_with("avx512.mask.punpckh") ||
3592 Name.starts_with("avx512.mask.unpckh.")) {
3593 Value *Op0 = CI->getArgOperand(0);
3594 Value *Op1 = CI->getArgOperand(1);
3595 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3596 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3597
3598 SmallVector<int, 64> Idxs(NumElts);
3599 for (int l = 0; l != NumElts; l += NumLaneElts)
3600 for (int i = 0; i != NumLaneElts; ++i)
3601 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3602
3603 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3604
3605 Rep =
3606 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3607 } else if (Name.starts_with("avx512.mask.and.") ||
3608 Name.starts_with("avx512.mask.pand.")) {
3609 VectorType *FTy = cast<VectorType>(CI->getType());
3611 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3612 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3613 Rep = Builder.CreateBitCast(Rep, FTy);
3614 Rep =
3615 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3616 } else if (Name.starts_with("avx512.mask.andn.") ||
3617 Name.starts_with("avx512.mask.pandn.")) {
3618 VectorType *FTy = cast<VectorType>(CI->getType());
3620 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3621 Rep = Builder.CreateAnd(Rep,
3622 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3623 Rep = Builder.CreateBitCast(Rep, FTy);
3624 Rep =
3625 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3626 } else if (Name.starts_with("avx512.mask.or.") ||
3627 Name.starts_with("avx512.mask.por.")) {
3628 VectorType *FTy = cast<VectorType>(CI->getType());
3630 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3631 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3632 Rep = Builder.CreateBitCast(Rep, FTy);
3633 Rep =
3634 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3635 } else if (Name.starts_with("avx512.mask.xor.") ||
3636 Name.starts_with("avx512.mask.pxor.")) {
3637 VectorType *FTy = cast<VectorType>(CI->getType());
3639 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3640 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3641 Rep = Builder.CreateBitCast(Rep, FTy);
3642 Rep =
3643 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3644 } else if (Name.starts_with("avx512.mask.padd.")) {
3645 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3646 Rep =
3647 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3648 } else if (Name.starts_with("avx512.mask.psub.")) {
3649 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3650 Rep =
3651 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3652 } else if (Name.starts_with("avx512.mask.pmull.")) {
3653 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3654 Rep =
3655 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3656 } else if (Name.starts_with("avx512.mask.add.p")) {
3657 if (Name.ends_with(".512")) {
3658 Intrinsic::ID IID;
3659 if (Name[17] == 's')
3660 IID = Intrinsic::x86_avx512_add_ps_512;
3661 else
3662 IID = Intrinsic::x86_avx512_add_pd_512;
3663
3664 Rep = Builder.CreateIntrinsic(
3665 IID,
3666 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3667 } else {
3668 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3669 }
3670 Rep =
3671 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3672 } else if (Name.starts_with("avx512.mask.div.p")) {
3673 if (Name.ends_with(".512")) {
3674 Intrinsic::ID IID;
3675 if (Name[17] == 's')
3676 IID = Intrinsic::x86_avx512_div_ps_512;
3677 else
3678 IID = Intrinsic::x86_avx512_div_pd_512;
3679
3680 Rep = Builder.CreateIntrinsic(
3681 IID,
3682 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3683 } else {
3684 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3685 }
3686 Rep =
3687 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3688 } else if (Name.starts_with("avx512.mask.mul.p")) {
3689 if (Name.ends_with(".512")) {
3690 Intrinsic::ID IID;
3691 if (Name[17] == 's')
3692 IID = Intrinsic::x86_avx512_mul_ps_512;
3693 else
3694 IID = Intrinsic::x86_avx512_mul_pd_512;
3695
3696 Rep = Builder.CreateIntrinsic(
3697 IID,
3698 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3699 } else {
3700 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3701 }
3702 Rep =
3703 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3704 } else if (Name.starts_with("avx512.mask.sub.p")) {
3705 if (Name.ends_with(".512")) {
3706 Intrinsic::ID IID;
3707 if (Name[17] == 's')
3708 IID = Intrinsic::x86_avx512_sub_ps_512;
3709 else
3710 IID = Intrinsic::x86_avx512_sub_pd_512;
3711
3712 Rep = Builder.CreateIntrinsic(
3713 IID,
3714 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3715 } else {
3716 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3717 }
3718 Rep =
3719 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3720 } else if ((Name.starts_with("avx512.mask.max.p") ||
3721 Name.starts_with("avx512.mask.min.p")) &&
3722 Name.drop_front(18) == ".512") {
3723 bool IsDouble = Name[17] == 'd';
3724 bool IsMin = Name[13] == 'i';
3725 static const Intrinsic::ID MinMaxTbl[2][2] = {
3726 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3727 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3728 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3729
3730 Rep = Builder.CreateIntrinsic(
3731 IID,
3732 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3733 Rep =
3734 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3735 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3736 Rep =
3737 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3738 {CI->getArgOperand(0), Builder.getInt1(false)});
3739 Rep =
3740 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3741 } else if (Name.starts_with("avx512.mask.psll")) {
3742 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3743 bool IsVariable = Name[16] == 'v';
3744 char Size = Name[16] == '.' ? Name[17]
3745 : Name[17] == '.' ? Name[18]
3746 : Name[18] == '.' ? Name[19]
3747 : Name[20];
3748
3749 Intrinsic::ID IID;
3750 if (IsVariable && Name[17] != '.') {
3751 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3752 IID = Intrinsic::x86_avx2_psllv_q;
3753 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3754 IID = Intrinsic::x86_avx2_psllv_q_256;
3755 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3756 IID = Intrinsic::x86_avx2_psllv_d;
3757 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3758 IID = Intrinsic::x86_avx2_psllv_d_256;
3759 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3760 IID = Intrinsic::x86_avx512_psllv_w_128;
3761 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3762 IID = Intrinsic::x86_avx512_psllv_w_256;
3763 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3764 IID = Intrinsic::x86_avx512_psllv_w_512;
3765 else
3766 llvm_unreachable("Unexpected size");
3767 } else if (Name.ends_with(".128")) {
3768 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3769 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3770 : Intrinsic::x86_sse2_psll_d;
3771 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3772 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3773 : Intrinsic::x86_sse2_psll_q;
3774 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3775 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3776 : Intrinsic::x86_sse2_psll_w;
3777 else
3778 llvm_unreachable("Unexpected size");
3779 } else if (Name.ends_with(".256")) {
3780 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3781 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3782 : Intrinsic::x86_avx2_psll_d;
3783 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3784 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3785 : Intrinsic::x86_avx2_psll_q;
3786 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3787 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3788 : Intrinsic::x86_avx2_psll_w;
3789 else
3790 llvm_unreachable("Unexpected size");
3791 } else {
3792 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3793 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3794 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3795 : Intrinsic::x86_avx512_psll_d_512;
3796 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3797 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3798 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3799 : Intrinsic::x86_avx512_psll_q_512;
3800 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3801 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3802 : Intrinsic::x86_avx512_psll_w_512;
3803 else
3804 llvm_unreachable("Unexpected size");
3805 }
3806
3807 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3808 } else if (Name.starts_with("avx512.mask.psrl")) {
3809 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3810 bool IsVariable = Name[16] == 'v';
3811 char Size = Name[16] == '.' ? Name[17]
3812 : Name[17] == '.' ? Name[18]
3813 : Name[18] == '.' ? Name[19]
3814 : Name[20];
3815
3816 Intrinsic::ID IID;
3817 if (IsVariable && Name[17] != '.') {
3818 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3819 IID = Intrinsic::x86_avx2_psrlv_q;
3820 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3821 IID = Intrinsic::x86_avx2_psrlv_q_256;
3822 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3823 IID = Intrinsic::x86_avx2_psrlv_d;
3824 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3825 IID = Intrinsic::x86_avx2_psrlv_d_256;
3826 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3827 IID = Intrinsic::x86_avx512_psrlv_w_128;
3828 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3829 IID = Intrinsic::x86_avx512_psrlv_w_256;
3830 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3831 IID = Intrinsic::x86_avx512_psrlv_w_512;
3832 else
3833 llvm_unreachable("Unexpected size");
3834 } else if (Name.ends_with(".128")) {
3835 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3836 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3837 : Intrinsic::x86_sse2_psrl_d;
3838 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3839 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3840 : Intrinsic::x86_sse2_psrl_q;
3841 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3842 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3843 : Intrinsic::x86_sse2_psrl_w;
3844 else
3845 llvm_unreachable("Unexpected size");
3846 } else if (Name.ends_with(".256")) {
3847 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3848 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3849 : Intrinsic::x86_avx2_psrl_d;
3850 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3851 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3852 : Intrinsic::x86_avx2_psrl_q;
3853 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3854 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3855 : Intrinsic::x86_avx2_psrl_w;
3856 else
3857 llvm_unreachable("Unexpected size");
3858 } else {
3859 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3860 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3861 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3862 : Intrinsic::x86_avx512_psrl_d_512;
3863 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3864 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3865 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3866 : Intrinsic::x86_avx512_psrl_q_512;
3867 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3868 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3869 : Intrinsic::x86_avx512_psrl_w_512;
3870 else
3871 llvm_unreachable("Unexpected size");
3872 }
3873
3874 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3875 } else if (Name.starts_with("avx512.mask.psra")) {
3876 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3877 bool IsVariable = Name[16] == 'v';
3878 char Size = Name[16] == '.' ? Name[17]
3879 : Name[17] == '.' ? Name[18]
3880 : Name[18] == '.' ? Name[19]
3881 : Name[20];
3882
3883 Intrinsic::ID IID;
3884 if (IsVariable && Name[17] != '.') {
3885 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3886 IID = Intrinsic::x86_avx2_psrav_d;
3887 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3888 IID = Intrinsic::x86_avx2_psrav_d_256;
3889 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3890 IID = Intrinsic::x86_avx512_psrav_w_128;
3891 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3892 IID = Intrinsic::x86_avx512_psrav_w_256;
3893 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3894 IID = Intrinsic::x86_avx512_psrav_w_512;
3895 else
3896 llvm_unreachable("Unexpected size");
3897 } else if (Name.ends_with(".128")) {
3898 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3899 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3900 : Intrinsic::x86_sse2_psra_d;
3901 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3902 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3903 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3904 : Intrinsic::x86_avx512_psra_q_128;
3905 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3906 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3907 : Intrinsic::x86_sse2_psra_w;
3908 else
3909 llvm_unreachable("Unexpected size");
3910 } else if (Name.ends_with(".256")) {
3911 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3912 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3913 : Intrinsic::x86_avx2_psra_d;
3914 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3915 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3916 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3917 : Intrinsic::x86_avx512_psra_q_256;
3918 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3919 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3920 : Intrinsic::x86_avx2_psra_w;
3921 else
3922 llvm_unreachable("Unexpected size");
3923 } else {
3924 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3925 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3926 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3927 : Intrinsic::x86_avx512_psra_d_512;
3928 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3929 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3930 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3931 : Intrinsic::x86_avx512_psra_q_512;
3932 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3933 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3934 : Intrinsic::x86_avx512_psra_w_512;
3935 else
3936 llvm_unreachable("Unexpected size");
3937 }
3938
3939 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3940 } else if (Name.starts_with("avx512.mask.move.s")) {
3941 Rep = upgradeMaskedMove(Builder, *CI);
3942 } else if (Name.starts_with("avx512.cvtmask2")) {
3943 Rep = upgradeMaskToInt(Builder, *CI);
3944 } else if (Name.ends_with(".movntdqa")) {
3946 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3947
3948 LoadInst *LI = Builder.CreateAlignedLoad(
3949 CI->getType(), CI->getArgOperand(0),
3951 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3952 Rep = LI;
3953 } else if (Name.starts_with("fma.vfmadd.") ||
3954 Name.starts_with("fma.vfmsub.") ||
3955 Name.starts_with("fma.vfnmadd.") ||
3956 Name.starts_with("fma.vfnmsub.")) {
3957 bool NegMul = Name[6] == 'n';
3958 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3959 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3960
3961 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3962 CI->getArgOperand(2)};
3963
3964 if (IsScalar) {
3965 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3966 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3967 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3968 }
3969
3970 if (NegMul && !IsScalar)
3971 Ops[0] = Builder.CreateFNeg(Ops[0]);
3972 if (NegMul && IsScalar)
3973 Ops[1] = Builder.CreateFNeg(Ops[1]);
3974 if (NegAcc)
3975 Ops[2] = Builder.CreateFNeg(Ops[2]);
3976
3977 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3978
3979 if (IsScalar)
3980 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3981 } else if (Name.starts_with("fma4.vfmadd.s")) {
3982 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3983 CI->getArgOperand(2)};
3984
3985 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3986 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3987 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3988
3989 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3990
3991 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3992 Rep, (uint64_t)0);
3993 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3994 Name.starts_with("avx512.maskz.vfmadd.s") ||
3995 Name.starts_with("avx512.mask3.vfmadd.s") ||
3996 Name.starts_with("avx512.mask3.vfmsub.s") ||
3997 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3998 bool IsMask3 = Name[11] == '3';
3999 bool IsMaskZ = Name[11] == 'z';
4000 // Drop the "avx512.mask." to make it easier.
4001 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4002 bool NegMul = Name[2] == 'n';
4003 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4004
4005 Value *A = CI->getArgOperand(0);
4006 Value *B = CI->getArgOperand(1);
4007 Value *C = CI->getArgOperand(2);
4008
4009 if (NegMul && (IsMask3 || IsMaskZ))
4010 A = Builder.CreateFNeg(A);
4011 if (NegMul && !(IsMask3 || IsMaskZ))
4012 B = Builder.CreateFNeg(B);
4013 if (NegAcc)
4014 C = Builder.CreateFNeg(C);
4015
4016 A = Builder.CreateExtractElement(A, (uint64_t)0);
4017 B = Builder.CreateExtractElement(B, (uint64_t)0);
4018 C = Builder.CreateExtractElement(C, (uint64_t)0);
4019
4020 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4021 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4022 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4023
4024 Intrinsic::ID IID;
4025 if (Name.back() == 'd')
4026 IID = Intrinsic::x86_avx512_vfmadd_f64;
4027 else
4028 IID = Intrinsic::x86_avx512_vfmadd_f32;
4029 Rep = Builder.CreateIntrinsic(IID, Ops);
4030 } else {
4031 Rep = Builder.CreateFMA(A, B, C);
4032 }
4033
4034 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4035 : IsMask3 ? C
4036 : A;
4037
4038 // For Mask3 with NegAcc, we need to create a new extractelement that
4039 // avoids the negation above.
4040 if (NegAcc && IsMask3)
4041 PassThru =
4042 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4043
4044 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4045 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4046 (uint64_t)0);
4047 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4048 Name.starts_with("avx512.mask.vfnmadd.p") ||
4049 Name.starts_with("avx512.mask.vfnmsub.p") ||
4050 Name.starts_with("avx512.mask3.vfmadd.p") ||
4051 Name.starts_with("avx512.mask3.vfmsub.p") ||
4052 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4053 Name.starts_with("avx512.maskz.vfmadd.p")) {
4054 bool IsMask3 = Name[11] == '3';
4055 bool IsMaskZ = Name[11] == 'z';
4056 // Drop the "avx512.mask." to make it easier.
4057 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4058 bool NegMul = Name[2] == 'n';
4059 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4060
4061 Value *A = CI->getArgOperand(0);
4062 Value *B = CI->getArgOperand(1);
4063 Value *C = CI->getArgOperand(2);
4064
4065 if (NegMul && (IsMask3 || IsMaskZ))
4066 A = Builder.CreateFNeg(A);
4067 if (NegMul && !(IsMask3 || IsMaskZ))
4068 B = Builder.CreateFNeg(B);
4069 if (NegAcc)
4070 C = Builder.CreateFNeg(C);
4071
4072 if (CI->arg_size() == 5 &&
4073 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4074 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4075 Intrinsic::ID IID;
4076 // Check the character before ".512" in string.
4077 if (Name[Name.size() - 5] == 's')
4078 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4079 else
4080 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4081
4082 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4083 } else {
4084 Rep = Builder.CreateFMA(A, B, C);
4085 }
4086
4087 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4088 : IsMask3 ? CI->getArgOperand(2)
4089 : CI->getArgOperand(0);
4090
4091 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4092 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4093 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4094 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4095 Intrinsic::ID IID;
4096 if (VecWidth == 128 && EltWidth == 32)
4097 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4098 else if (VecWidth == 256 && EltWidth == 32)
4099 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4100 else if (VecWidth == 128 && EltWidth == 64)
4101 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4102 else if (VecWidth == 256 && EltWidth == 64)
4103 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4104 else
4105 llvm_unreachable("Unexpected intrinsic");
4106
4107 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4108 CI->getArgOperand(2)};
4109 Ops[2] = Builder.CreateFNeg(Ops[2]);
4110 Rep = Builder.CreateIntrinsic(IID, Ops);
4111 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4112 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4113 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4114 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4115 bool IsMask3 = Name[11] == '3';
4116 bool IsMaskZ = Name[11] == 'z';
4117 // Drop the "avx512.mask." to make it easier.
4118 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4119 bool IsSubAdd = Name[3] == 's';
4120 if (CI->arg_size() == 5) {
4121 Intrinsic::ID IID;
4122 // Check the character before ".512" in string.
4123 if (Name[Name.size() - 5] == 's')
4124 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4125 else
4126 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4127
4128 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4129 CI->getArgOperand(2), CI->getArgOperand(4)};
4130 if (IsSubAdd)
4131 Ops[2] = Builder.CreateFNeg(Ops[2]);
4132
4133 Rep = Builder.CreateIntrinsic(IID, Ops);
4134 } else {
4135 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4136
4137 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4138 CI->getArgOperand(2)};
4139
4141 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4142 Value *Odd = Builder.CreateCall(FMA, Ops);
4143 Ops[2] = Builder.CreateFNeg(Ops[2]);
4144 Value *Even = Builder.CreateCall(FMA, Ops);
4145
4146 if (IsSubAdd)
4147 std::swap(Even, Odd);
4148
4149 SmallVector<int, 32> Idxs(NumElts);
4150 for (int i = 0; i != NumElts; ++i)
4151 Idxs[i] = i + (i % 2) * NumElts;
4152
4153 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4154 }
4155
4156 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4157 : IsMask3 ? CI->getArgOperand(2)
4158 : CI->getArgOperand(0);
4159
4160 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4161 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4162 Name.starts_with("avx512.maskz.pternlog.")) {
4163 bool ZeroMask = Name[11] == 'z';
4164 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4165 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4166 Intrinsic::ID IID;
4167 if (VecWidth == 128 && EltWidth == 32)
4168 IID = Intrinsic::x86_avx512_pternlog_d_128;
4169 else if (VecWidth == 256 && EltWidth == 32)
4170 IID = Intrinsic::x86_avx512_pternlog_d_256;
4171 else if (VecWidth == 512 && EltWidth == 32)
4172 IID = Intrinsic::x86_avx512_pternlog_d_512;
4173 else if (VecWidth == 128 && EltWidth == 64)
4174 IID = Intrinsic::x86_avx512_pternlog_q_128;
4175 else if (VecWidth == 256 && EltWidth == 64)
4176 IID = Intrinsic::x86_avx512_pternlog_q_256;
4177 else if (VecWidth == 512 && EltWidth == 64)
4178 IID = Intrinsic::x86_avx512_pternlog_q_512;
4179 else
4180 llvm_unreachable("Unexpected intrinsic");
4181
4182 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4183 CI->getArgOperand(2), CI->getArgOperand(3)};
4184 Rep = Builder.CreateIntrinsic(IID, Args);
4185 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4186 : CI->getArgOperand(0);
4187 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4188 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4189 Name.starts_with("avx512.maskz.vpmadd52")) {
4190 bool ZeroMask = Name[11] == 'z';
4191 bool High = Name[20] == 'h' || Name[21] == 'h';
4192 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4193 Intrinsic::ID IID;
4194 if (VecWidth == 128 && !High)
4195 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4196 else if (VecWidth == 256 && !High)
4197 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4198 else if (VecWidth == 512 && !High)
4199 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4200 else if (VecWidth == 128 && High)
4201 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4202 else if (VecWidth == 256 && High)
4203 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4204 else if (VecWidth == 512 && High)
4205 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4206 else
4207 llvm_unreachable("Unexpected intrinsic");
4208
4209 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4210 CI->getArgOperand(2)};
4211 Rep = Builder.CreateIntrinsic(IID, Args);
4212 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4213 : CI->getArgOperand(0);
4214 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4215 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4216 Name.starts_with("avx512.mask.vpermt2var.") ||
4217 Name.starts_with("avx512.maskz.vpermt2var.")) {
4218 bool ZeroMask = Name[11] == 'z';
4219 bool IndexForm = Name[17] == 'i';
4220 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4221 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4222 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4223 Name.starts_with("avx512.mask.vpdpbusds.") ||
4224 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4225 bool ZeroMask = Name[11] == 'z';
4226 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4227 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4228 Intrinsic::ID IID;
4229 if (VecWidth == 128 && !IsSaturating)
4230 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4231 else if (VecWidth == 256 && !IsSaturating)
4232 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4233 else if (VecWidth == 512 && !IsSaturating)
4234 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4235 else if (VecWidth == 128 && IsSaturating)
4236 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4237 else if (VecWidth == 256 && IsSaturating)
4238 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4239 else if (VecWidth == 512 && IsSaturating)
4240 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4241 else
4242 llvm_unreachable("Unexpected intrinsic");
4243
4244 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4245 CI->getArgOperand(2)};
4246
4247 // Input arguments types were incorrectly set to vectors of i32 before but
4248 // they should be vectors of i8. Insert bit cast when encountering the old
4249 // types
4250 if (Args[1]->getType()->isVectorTy() &&
4251 cast<VectorType>(Args[1]->getType())
4252 ->getElementType()
4253 ->isIntegerTy(32) &&
4254 Args[2]->getType()->isVectorTy() &&
4255 cast<VectorType>(Args[2]->getType())
4256 ->getElementType()
4257 ->isIntegerTy(32)) {
4258 Type *NewArgType = nullptr;
4259 if (VecWidth == 128)
4260 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4261 else if (VecWidth == 256)
4262 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4263 else if (VecWidth == 512)
4264 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4265 else
4266 llvm_unreachable("Unexpected vector bit width");
4267
4268 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4269 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4270 }
4271
4272 Rep = Builder.CreateIntrinsic(IID, Args);
4273 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4274 : CI->getArgOperand(0);
4275 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4276 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4277 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4278 Name.starts_with("avx512.mask.vpdpwssds.") ||
4279 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4280 bool ZeroMask = Name[11] == 'z';
4281 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4282 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4283 Intrinsic::ID IID;
4284 if (VecWidth == 128 && !IsSaturating)
4285 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4286 else if (VecWidth == 256 && !IsSaturating)
4287 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4288 else if (VecWidth == 512 && !IsSaturating)
4289 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4290 else if (VecWidth == 128 && IsSaturating)
4291 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4292 else if (VecWidth == 256 && IsSaturating)
4293 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4294 else if (VecWidth == 512 && IsSaturating)
4295 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4296 else
4297 llvm_unreachable("Unexpected intrinsic");
4298
4299 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4300 CI->getArgOperand(2)};
4301 Rep = Builder.CreateIntrinsic(IID, Args);
4302 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4303 : CI->getArgOperand(0);
4304 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4305 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4306 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4307 Name == "subborrow.u32" || Name == "subborrow.u64") {
4308 Intrinsic::ID IID;
4309 if (Name[0] == 'a' && Name.back() == '2')
4310 IID = Intrinsic::x86_addcarry_32;
4311 else if (Name[0] == 'a' && Name.back() == '4')
4312 IID = Intrinsic::x86_addcarry_64;
4313 else if (Name[0] == 's' && Name.back() == '2')
4314 IID = Intrinsic::x86_subborrow_32;
4315 else if (Name[0] == 's' && Name.back() == '4')
4316 IID = Intrinsic::x86_subborrow_64;
4317 else
4318 llvm_unreachable("Unexpected intrinsic");
4319
4320 // Make a call with 3 operands.
4321 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4322 CI->getArgOperand(2)};
4323 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4324
4325 // Extract the second result and store it.
4326 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4327 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4328 // Replace the original call result with the first result of the new call.
4329 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4330
4331 CI->replaceAllUsesWith(CF);
4332 Rep = nullptr;
4333 } else if (Name.starts_with("avx512.mask.") &&
4334 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4335 // Rep will be updated by the call in the condition.
4336 }
4337
4338 return Rep;
4339}
4340
4342 Function *F, IRBuilder<> &Builder) {
4343 if (Name.starts_with("neon.bfcvt")) {
4344 if (Name.starts_with("neon.bfcvtn2")) {
4345 SmallVector<int, 32> LoMask(4);
4346 std::iota(LoMask.begin(), LoMask.end(), 0);
4347 SmallVector<int, 32> ConcatMask(8);
4348 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4349 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4350 Value *Trunc =
4351 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4352 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4353 } else if (Name.starts_with("neon.bfcvtn")) {
4354 SmallVector<int, 32> ConcatMask(8);
4355 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4356 Type *V4BF16 =
4357 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4358 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4359 dbgs() << "Trunc: " << *Trunc << "\n";
4360 return Builder.CreateShuffleVector(
4361 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4362 } else {
4363 return Builder.CreateFPTrunc(CI->getOperand(0),
4364 Type::getBFloatTy(F->getContext()));
4365 }
4366 } else if (Name.starts_with("sve.fcvt")) {
4367 Intrinsic::ID NewID =
4369 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4370 .Case("sve.fcvtnt.bf16f32",
4371 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4373 if (NewID == Intrinsic::not_intrinsic)
4374 llvm_unreachable("Unhandled Intrinsic!");
4375
4376 SmallVector<Value *, 3> Args(CI->args());
4377
4378 // The original intrinsics incorrectly used a predicate based on the
4379 // smallest element type rather than the largest.
4380 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4381 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4382
4383 if (Args[1]->getType() != BadPredTy)
4384 llvm_unreachable("Unexpected predicate type!");
4385
4386 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4387 BadPredTy, Args[1]);
4388 Args[1] = Builder.CreateIntrinsic(
4389 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4390
4391 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4392 CI->getName());
4393 }
4394
4395 llvm_unreachable("Unhandled Intrinsic!");
4396}
4397
4399 IRBuilder<> &Builder) {
4400 if (Name == "mve.vctp64.old") {
4401 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4402 // correct type.
4403 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4404 CI->getArgOperand(0),
4405 /*FMFSource=*/nullptr, CI->getName());
4406 Value *C1 = Builder.CreateIntrinsic(
4407 Intrinsic::arm_mve_pred_v2i,
4408 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4409 return Builder.CreateIntrinsic(
4410 Intrinsic::arm_mve_pred_i2v,
4411 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4412 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4413 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4414 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4415 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4416 Name ==
4417 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4418 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4419 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4420 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4421 Name ==
4422 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4423 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4424 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4425 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4426 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4427 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4428 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4429 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4430 std::vector<Type *> Tys;
4431 unsigned ID = CI->getIntrinsicID();
4432 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4433 switch (ID) {
4434 case Intrinsic::arm_mve_mull_int_predicated:
4435 case Intrinsic::arm_mve_vqdmull_predicated:
4436 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4437 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4438 break;
4439 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4440 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4441 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4442 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4443 V2I1Ty};
4444 break;
4445 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4446 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4447 CI->getOperand(1)->getType(), V2I1Ty};
4448 break;
4449 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4450 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4451 CI->getOperand(2)->getType(), V2I1Ty};
4452 break;
4453 case Intrinsic::arm_cde_vcx1q_predicated:
4454 case Intrinsic::arm_cde_vcx1qa_predicated:
4455 case Intrinsic::arm_cde_vcx2q_predicated:
4456 case Intrinsic::arm_cde_vcx2qa_predicated:
4457 case Intrinsic::arm_cde_vcx3q_predicated:
4458 case Intrinsic::arm_cde_vcx3qa_predicated:
4459 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4460 break;
4461 default:
4462 llvm_unreachable("Unhandled Intrinsic!");
4463 }
4464
4465 std::vector<Value *> Ops;
4466 for (Value *Op : CI->args()) {
4467 Type *Ty = Op->getType();
4468 if (Ty->getScalarSizeInBits() == 1) {
4469 Value *C1 = Builder.CreateIntrinsic(
4470 Intrinsic::arm_mve_pred_v2i,
4471 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4472 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4473 }
4474 Ops.push_back(Op);
4475 }
4476
4477 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4478 CI->getName());
4479 }
4480 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4481}
4482
4483// These are expected to have the arguments:
4484// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4485//
4486// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4487//
4489 Function *F, IRBuilder<> &Builder) {
4490 AtomicRMWInst::BinOp RMWOp =
4492 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4493 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4494 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4495 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4496 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4497 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4498 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4499 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4500 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4501 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4502 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4503
4504 unsigned NumOperands = CI->getNumOperands();
4505 if (NumOperands < 3) // Malformed bitcode.
4506 return nullptr;
4507
4508 Value *Ptr = CI->getArgOperand(0);
4509 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4510 if (!PtrTy) // Malformed.
4511 return nullptr;
4512
4513 Value *Val = CI->getArgOperand(1);
4514 if (Val->getType() != CI->getType()) // Malformed.
4515 return nullptr;
4516
4517 ConstantInt *OrderArg = nullptr;
4518 bool IsVolatile = false;
4519
4520 // These should have 5 arguments (plus the callee). A separate version of the
4521 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4522 if (NumOperands > 3)
4523 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4524
4525 // Ignore scope argument at 3
4526
4527 if (NumOperands > 5) {
4528 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4529 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4530 }
4531
4533 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4534 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4537
4538 LLVMContext &Ctx = F->getContext();
4539
4540 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4541 Type *RetTy = CI->getType();
4542 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4543 if (VT->getElementType()->isIntegerTy(16)) {
4544 VectorType *AsBF16 =
4545 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4546 Val = Builder.CreateBitCast(Val, AsBF16);
4547 }
4548 }
4549
4550 // The scope argument never really worked correctly. Use agent as the most
4551 // conservative option which should still always produce the instruction.
4552 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4553 AtomicRMWInst *RMW =
4554 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4555
4556 unsigned AddrSpace = PtrTy->getAddressSpace();
4557 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4558 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4559 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4560 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4561 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4562 }
4563
4564 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4565 MDBuilder MDB(F->getContext());
4566 MDNode *RangeNotPrivate =
4569 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4570 }
4571
4572 if (IsVolatile)
4573 RMW->setVolatile(true);
4574
4575 return Builder.CreateBitCast(RMW, RetTy);
4576}
4577
4578/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4579/// plain MDNode, as it's the verifier's job to check these are the correct
4580/// types later.
4581static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4582 if (Op < CI->arg_size()) {
4583 if (MetadataAsValue *MAV =
4585 Metadata *MD = MAV->getMetadata();
4586 return dyn_cast_if_present<MDNode>(MD);
4587 }
4588 }
4589 return nullptr;
4590}
4591
4592/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4593static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4594 if (Op < CI->arg_size())
4596 return MAV->getMetadata();
4597 return nullptr;
4598}
4599
4601 // The MDNode attached to this instruction might not be the correct type,
4602 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4603 return I->getDebugLoc().getAsMDNode();
4604}
4605
4606/// Convert debug intrinsic calls to non-instruction debug records.
4607/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4608/// \p CI - The debug intrinsic call.
4610 DbgRecord *DR = nullptr;
4611 if (Name == "label") {
4613 CI->getDebugLoc());
4614 } else if (Name == "assign") {
4617 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4618 unwrapMAVMetadataOp(CI, 4),
4619 /*The address is a Value ref, it will be stored as a Metadata */
4620 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4621 } else if (Name == "declare") {
4624 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4625 getDebugLocSafe(CI));
4626 } else if (Name == "addr") {
4627 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4628 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4629 // Don't try to add something to the expression if it's not an expression.
4630 // Instead, allow the verifier to fail later.
4631 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4632 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4633 }
4636 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4637 getDebugLocSafe(CI));
4638 } else if (Name == "value") {
4639 // An old version of dbg.value had an extra offset argument.
4640 unsigned VarOp = 1;
4641 unsigned ExprOp = 2;
4642 if (CI->arg_size() == 4) {
4644 // Nonzero offset dbg.values get dropped without a replacement.
4645 if (!Offset || !Offset->isZeroValue())
4646 return;
4647 VarOp = 2;
4648 ExprOp = 3;
4649 }
4652 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4653 nullptr, getDebugLocSafe(CI));
4654 }
4655 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4656 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4657}
4658
4659/// Upgrade a call to an old intrinsic. All argument and return casting must be
4660/// provided to seamlessly integrate with existing context.
4662 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4663 // checks the callee's function type matches. It's likely we need to handle
4664 // type changes here.
4666 if (!F)
4667 return;
4668
4669 LLVMContext &C = CI->getContext();
4670 IRBuilder<> Builder(C);
4671 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4672
4673 if (!NewFn) {
4674 // Get the Function's name.
4675 StringRef Name = F->getName();
4676
4677 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4678 Name = Name.substr(5);
4679
4680 bool IsX86 = Name.consume_front("x86.");
4681 bool IsNVVM = Name.consume_front("nvvm.");
4682 bool IsAArch64 = Name.consume_front("aarch64.");
4683 bool IsARM = Name.consume_front("arm.");
4684 bool IsAMDGCN = Name.consume_front("amdgcn.");
4685 bool IsDbg = Name.consume_front("dbg.");
4686 Value *Rep = nullptr;
4687
4688 if (!IsX86 && Name == "stackprotectorcheck") {
4689 Rep = nullptr;
4690 } else if (IsNVVM) {
4691 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4692 } else if (IsX86) {
4693 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4694 } else if (IsAArch64) {
4695 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4696 } else if (IsARM) {
4697 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4698 } else if (IsAMDGCN) {
4699 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4700 } else if (IsDbg) {
4702 } else {
4703 llvm_unreachable("Unknown function for CallBase upgrade.");
4704 }
4705
4706 if (Rep)
4707 CI->replaceAllUsesWith(Rep);
4708 CI->eraseFromParent();
4709 return;
4710 }
4711
4712 const auto &DefaultCase = [&]() -> void {
4713 if (F == NewFn)
4714 return;
4715
4716 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4717 // Handle generic mangling change.
4718 assert(
4719 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4720 "Unknown function for CallBase upgrade and isn't just a name change");
4721 CI->setCalledFunction(NewFn);
4722 return;
4723 }
4724
4725 // This must be an upgrade from a named to a literal struct.
4726 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4727 assert(OldST != NewFn->getReturnType() &&
4728 "Return type must have changed");
4729 assert(OldST->getNumElements() ==
4730 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4731 "Must have same number of elements");
4732
4733 SmallVector<Value *> Args(CI->args());
4734 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4735 NewCI->setAttributes(CI->getAttributes());
4736 Value *Res = PoisonValue::get(OldST);
4737 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4738 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4739 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4740 }
4741 CI->replaceAllUsesWith(Res);
4742 CI->eraseFromParent();
4743 return;
4744 }
4745
4746 // We're probably about to produce something invalid. Let the verifier catch
4747 // it instead of dying here.
4748 CI->setCalledOperand(
4750 return;
4751 };
4752 CallInst *NewCall = nullptr;
4753 switch (NewFn->getIntrinsicID()) {
4754 default: {
4755 DefaultCase();
4756 return;
4757 }
4758 case Intrinsic::arm_neon_vst1:
4759 case Intrinsic::arm_neon_vst2:
4760 case Intrinsic::arm_neon_vst3:
4761 case Intrinsic::arm_neon_vst4:
4762 case Intrinsic::arm_neon_vst2lane:
4763 case Intrinsic::arm_neon_vst3lane:
4764 case Intrinsic::arm_neon_vst4lane: {
4765 SmallVector<Value *, 4> Args(CI->args());
4766 NewCall = Builder.CreateCall(NewFn, Args);
4767 break;
4768 }
4769 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4770 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4771 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4772 LLVMContext &Ctx = F->getParent()->getContext();
4773 SmallVector<Value *, 4> Args(CI->args());
4774 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4775 cast<ConstantInt>(Args[3])->getZExtValue());
4776 NewCall = Builder.CreateCall(NewFn, Args);
4777 break;
4778 }
4779 case Intrinsic::aarch64_sve_ld3_sret:
4780 case Intrinsic::aarch64_sve_ld4_sret:
4781 case Intrinsic::aarch64_sve_ld2_sret: {
4782 StringRef Name = F->getName();
4783 Name = Name.substr(5);
4784 unsigned N = StringSwitch<unsigned>(Name)
4785 .StartsWith("aarch64.sve.ld2", 2)
4786 .StartsWith("aarch64.sve.ld3", 3)
4787 .StartsWith("aarch64.sve.ld4", 4)
4788 .Default(0);
4789 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4790 unsigned MinElts = RetTy->getMinNumElements() / N;
4791 SmallVector<Value *, 2> Args(CI->args());
4792 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4793 Value *Ret = llvm::PoisonValue::get(RetTy);
4794 for (unsigned I = 0; I < N; I++) {
4795 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4796 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4797 }
4798 NewCall = dyn_cast<CallInst>(Ret);
4799 break;
4800 }
4801
4802 case Intrinsic::coro_end: {
4803 SmallVector<Value *, 3> Args(CI->args());
4804 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4805 NewCall = Builder.CreateCall(NewFn, Args);
4806 break;
4807 }
4808
4809 case Intrinsic::vector_extract: {
4810 StringRef Name = F->getName();
4811 Name = Name.substr(5); // Strip llvm
4812 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4813 DefaultCase();
4814 return;
4815 }
4816 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4817 unsigned MinElts = RetTy->getMinNumElements();
4818 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4819 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4820 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4821 break;
4822 }
4823
4824 case Intrinsic::vector_insert: {
4825 StringRef Name = F->getName();
4826 Name = Name.substr(5);
4827 if (!Name.starts_with("aarch64.sve.tuple")) {
4828 DefaultCase();
4829 return;
4830 }
4831 if (Name.starts_with("aarch64.sve.tuple.set")) {
4832 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4833 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4834 Value *NewIdx =
4835 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4836 NewCall = Builder.CreateCall(
4837 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4838 break;
4839 }
4840 if (Name.starts_with("aarch64.sve.tuple.create")) {
4841 unsigned N = StringSwitch<unsigned>(Name)
4842 .StartsWith("aarch64.sve.tuple.create2", 2)
4843 .StartsWith("aarch64.sve.tuple.create3", 3)
4844 .StartsWith("aarch64.sve.tuple.create4", 4)
4845 .Default(0);
4846 assert(N > 1 && "Create is expected to be between 2-4");
4847 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4848 Value *Ret = llvm::PoisonValue::get(RetTy);
4849 unsigned MinElts = RetTy->getMinNumElements() / N;
4850 for (unsigned I = 0; I < N; I++) {
4851 Value *V = CI->getArgOperand(I);
4852 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4853 }
4854 NewCall = dyn_cast<CallInst>(Ret);
4855 }
4856 break;
4857 }
4858
4859 case Intrinsic::arm_neon_bfdot:
4860 case Intrinsic::arm_neon_bfmmla:
4861 case Intrinsic::arm_neon_bfmlalb:
4862 case Intrinsic::arm_neon_bfmlalt:
4863 case Intrinsic::aarch64_neon_bfdot:
4864 case Intrinsic::aarch64_neon_bfmmla:
4865 case Intrinsic::aarch64_neon_bfmlalb:
4866 case Intrinsic::aarch64_neon_bfmlalt: {
4868 assert(CI->arg_size() == 3 &&
4869 "Mismatch between function args and call args");
4870 size_t OperandWidth =
4872 assert((OperandWidth == 64 || OperandWidth == 128) &&
4873 "Unexpected operand width");
4874 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4875 auto Iter = CI->args().begin();
4876 Args.push_back(*Iter++);
4877 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4878 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4879 NewCall = Builder.CreateCall(NewFn, Args);
4880 break;
4881 }
4882
4883 case Intrinsic::bitreverse:
4884 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4885 break;
4886
4887 case Intrinsic::ctlz:
4888 case Intrinsic::cttz:
4889 assert(CI->arg_size() == 1 &&
4890 "Mismatch between function args and call args");
4891 NewCall =
4892 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4893 break;
4894
4895 case Intrinsic::objectsize: {
4896 Value *NullIsUnknownSize =
4897 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4898 Value *Dynamic =
4899 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4900 NewCall = Builder.CreateCall(
4901 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4902 break;
4903 }
4904
4905 case Intrinsic::ctpop:
4906 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4907 break;
4908
4909 case Intrinsic::convert_from_fp16:
4910 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4911 break;
4912
4913 case Intrinsic::dbg_value: {
4914 StringRef Name = F->getName();
4915 Name = Name.substr(5); // Strip llvm.
4916 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4917 if (Name.starts_with("dbg.addr")) {
4919 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4920 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4921 NewCall =
4922 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4923 MetadataAsValue::get(C, Expr)});
4924 break;
4925 }
4926
4927 // Upgrade from the old version that had an extra offset argument.
4928 assert(CI->arg_size() == 4);
4929 // Drop nonzero offsets instead of attempting to upgrade them.
4931 if (Offset->isZeroValue()) {
4932 NewCall = Builder.CreateCall(
4933 NewFn,
4934 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4935 break;
4936 }
4937 CI->eraseFromParent();
4938 return;
4939 }
4940
4941 case Intrinsic::ptr_annotation:
4942 // Upgrade from versions that lacked the annotation attribute argument.
4943 if (CI->arg_size() != 4) {
4944 DefaultCase();
4945 return;
4946 }
4947
4948 // Create a new call with an added null annotation attribute argument.
4949 NewCall = Builder.CreateCall(
4950 NewFn,
4951 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4952 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4953 NewCall->takeName(CI);
4954 CI->replaceAllUsesWith(NewCall);
4955 CI->eraseFromParent();
4956 return;
4957
4958 case Intrinsic::var_annotation:
4959 // Upgrade from versions that lacked the annotation attribute argument.
4960 if (CI->arg_size() != 4) {
4961 DefaultCase();
4962 return;
4963 }
4964 // Create a new call with an added null annotation attribute argument.
4965 NewCall = Builder.CreateCall(
4966 NewFn,
4967 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4968 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4969 NewCall->takeName(CI);
4970 CI->replaceAllUsesWith(NewCall);
4971 CI->eraseFromParent();
4972 return;
4973
4974 case Intrinsic::riscv_aes32dsi:
4975 case Intrinsic::riscv_aes32dsmi:
4976 case Intrinsic::riscv_aes32esi:
4977 case Intrinsic::riscv_aes32esmi:
4978 case Intrinsic::riscv_sm4ks:
4979 case Intrinsic::riscv_sm4ed: {
4980 // The last argument to these intrinsics used to be i8 and changed to i32.
4981 // The type overload for sm4ks and sm4ed was removed.
4982 Value *Arg2 = CI->getArgOperand(2);
4983 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4984 return;
4985
4986 Value *Arg0 = CI->getArgOperand(0);
4987 Value *Arg1 = CI->getArgOperand(1);
4988 if (CI->getType()->isIntegerTy(64)) {
4989 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4990 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4991 }
4992
4993 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4994 cast<ConstantInt>(Arg2)->getZExtValue());
4995
4996 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4997 Value *Res = NewCall;
4998 if (Res->getType() != CI->getType())
4999 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5000 NewCall->takeName(CI);
5001 CI->replaceAllUsesWith(Res);
5002 CI->eraseFromParent();
5003 return;
5004 }
5005 case Intrinsic::nvvm_mapa_shared_cluster: {
5006 // Create a new call with the correct address space.
5007 NewCall =
5008 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5009 Value *Res = NewCall;
5010 Res = Builder.CreateAddrSpaceCast(
5011 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5012 NewCall->takeName(CI);
5013 CI->replaceAllUsesWith(Res);
5014 CI->eraseFromParent();
5015 return;
5016 }
5017 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5018 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5019 // Create a new call with the correct address space.
5020 SmallVector<Value *, 4> Args(CI->args());
5021 Args[0] = Builder.CreateAddrSpaceCast(
5022 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5023
5024 NewCall = Builder.CreateCall(NewFn, Args);
5025 NewCall->takeName(CI);
5026 CI->replaceAllUsesWith(NewCall);
5027 CI->eraseFromParent();
5028 return;
5029 }
5030 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5031 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5032 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5033 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5034 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5035 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5036 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5037 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5038 SmallVector<Value *, 16> Args(CI->args());
5039
5040 // Create AddrSpaceCast to shared_cluster if needed.
5041 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5042 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5044 Args[0] = Builder.CreateAddrSpaceCast(
5045 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5046
5047 // Attach the flag argument for cta_group, with a
5048 // default value of 0. This handles case (2) in
5049 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5050 size_t NumArgs = CI->arg_size();
5051 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5052 if (!FlagArg->getType()->isIntegerTy(1))
5053 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5054
5055 NewCall = Builder.CreateCall(NewFn, Args);
5056 NewCall->takeName(CI);
5057 CI->replaceAllUsesWith(NewCall);
5058 CI->eraseFromParent();
5059 return;
5060 }
5061 case Intrinsic::riscv_sha256sig0:
5062 case Intrinsic::riscv_sha256sig1:
5063 case Intrinsic::riscv_sha256sum0:
5064 case Intrinsic::riscv_sha256sum1:
5065 case Intrinsic::riscv_sm3p0:
5066 case Intrinsic::riscv_sm3p1: {
5067 // The last argument to these intrinsics used to be i8 and changed to i32.
5068 // The type overload for sm4ks and sm4ed was removed.
5069 if (!CI->getType()->isIntegerTy(64))
5070 return;
5071
5072 Value *Arg =
5073 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5074
5075 NewCall = Builder.CreateCall(NewFn, Arg);
5076 Value *Res =
5077 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5078 NewCall->takeName(CI);
5079 CI->replaceAllUsesWith(Res);
5080 CI->eraseFromParent();
5081 return;
5082 }
5083
5084 case Intrinsic::x86_xop_vfrcz_ss:
5085 case Intrinsic::x86_xop_vfrcz_sd:
5086 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5087 break;
5088
5089 case Intrinsic::x86_xop_vpermil2pd:
5090 case Intrinsic::x86_xop_vpermil2ps:
5091 case Intrinsic::x86_xop_vpermil2pd_256:
5092 case Intrinsic::x86_xop_vpermil2ps_256: {
5093 SmallVector<Value *, 4> Args(CI->args());
5094 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5095 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5096 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5097 NewCall = Builder.CreateCall(NewFn, Args);
5098 break;
5099 }
5100
5101 case Intrinsic::x86_sse41_ptestc:
5102 case Intrinsic::x86_sse41_ptestz:
5103 case Intrinsic::x86_sse41_ptestnzc: {
5104 // The arguments for these intrinsics used to be v4f32, and changed
5105 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5106 // So, the only thing required is a bitcast for both arguments.
5107 // First, check the arguments have the old type.
5108 Value *Arg0 = CI->getArgOperand(0);
5109 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5110 return;
5111
5112 // Old intrinsic, add bitcasts
5113 Value *Arg1 = CI->getArgOperand(1);
5114
5115 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5116
5117 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5118 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5119
5120 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5121 break;
5122 }
5123
5124 case Intrinsic::x86_rdtscp: {
5125 // This used to take 1 arguments. If we have no arguments, it is already
5126 // upgraded.
5127 if (CI->getNumOperands() == 0)
5128 return;
5129
5130 NewCall = Builder.CreateCall(NewFn);
5131 // Extract the second result and store it.
5132 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5133 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5134 // Replace the original call result with the first result of the new call.
5135 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5136
5137 NewCall->takeName(CI);
5138 CI->replaceAllUsesWith(TSC);
5139 CI->eraseFromParent();
5140 return;
5141 }
5142
5143 case Intrinsic::x86_sse41_insertps:
5144 case Intrinsic::x86_sse41_dppd:
5145 case Intrinsic::x86_sse41_dpps:
5146 case Intrinsic::x86_sse41_mpsadbw:
5147 case Intrinsic::x86_avx_dp_ps_256:
5148 case Intrinsic::x86_avx2_mpsadbw: {
5149 // Need to truncate the last argument from i32 to i8 -- this argument models
5150 // an inherently 8-bit immediate operand to these x86 instructions.
5151 SmallVector<Value *, 4> Args(CI->args());
5152
5153 // Replace the last argument with a trunc.
5154 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5155 NewCall = Builder.CreateCall(NewFn, Args);
5156 break;
5157 }
5158
5159 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5160 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5161 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5162 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5163 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5164 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5165 SmallVector<Value *, 4> Args(CI->args());
5166 unsigned NumElts =
5167 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5168 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5169
5170 NewCall = Builder.CreateCall(NewFn, Args);
5171 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5172
5173 NewCall->takeName(CI);
5174 CI->replaceAllUsesWith(Res);
5175 CI->eraseFromParent();
5176 return;
5177 }
5178
5179 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5180 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5181 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5182 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5183 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5184 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5185 SmallVector<Value *, 4> Args(CI->args());
5186 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5187 if (NewFn->getIntrinsicID() ==
5188 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5189 Args[1] = Builder.CreateBitCast(
5190 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5191
5192 NewCall = Builder.CreateCall(NewFn, Args);
5193 Value *Res = Builder.CreateBitCast(
5194 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5195
5196 NewCall->takeName(CI);
5197 CI->replaceAllUsesWith(Res);
5198 CI->eraseFromParent();
5199 return;
5200 }
5201 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5202 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5203 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5204 SmallVector<Value *, 4> Args(CI->args());
5205 unsigned NumElts =
5206 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5207 Args[1] = Builder.CreateBitCast(
5208 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5209 Args[2] = Builder.CreateBitCast(
5210 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5211
5212 NewCall = Builder.CreateCall(NewFn, Args);
5213 break;
5214 }
5215
5216 case Intrinsic::thread_pointer: {
5217 NewCall = Builder.CreateCall(NewFn, {});
5218 break;
5219 }
5220
5221 case Intrinsic::memcpy:
5222 case Intrinsic::memmove:
5223 case Intrinsic::memset: {
5224 // We have to make sure that the call signature is what we're expecting.
5225 // We only want to change the old signatures by removing the alignment arg:
5226 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5227 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5228 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5229 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5230 // Note: i8*'s in the above can be any pointer type
5231 if (CI->arg_size() != 5) {
5232 DefaultCase();
5233 return;
5234 }
5235 // Remove alignment argument (3), and add alignment attributes to the
5236 // dest/src pointers.
5237 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5238 CI->getArgOperand(2), CI->getArgOperand(4)};
5239 NewCall = Builder.CreateCall(NewFn, Args);
5240 AttributeList OldAttrs = CI->getAttributes();
5241 AttributeList NewAttrs = AttributeList::get(
5242 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5243 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5244 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5245 NewCall->setAttributes(NewAttrs);
5246 auto *MemCI = cast<MemIntrinsic>(NewCall);
5247 // All mem intrinsics support dest alignment.
5249 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5250 // Memcpy/Memmove also support source alignment.
5251 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5252 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5253 break;
5254 }
5255
5256 case Intrinsic::masked_load:
5257 case Intrinsic::masked_gather:
5258 case Intrinsic::masked_store:
5259 case Intrinsic::masked_scatter: {
5260 if (CI->arg_size() != 4) {
5261 DefaultCase();
5262 return;
5263 }
5264
5265 auto GetMaybeAlign = [](Value *Op) {
5266 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5267 uint64_t Val = CI->getZExtValue();
5268 if (Val == 0)
5269 return MaybeAlign();
5270 if (isPowerOf2_64(Val))
5271 return MaybeAlign(Val);
5272 }
5273 reportFatalUsageError("Invalid alignment argument");
5274 };
5275 auto GetAlign = [&](Value *Op) {
5276 MaybeAlign Align = GetMaybeAlign(Op);
5277 if (Align)
5278 return *Align;
5279 reportFatalUsageError("Invalid zero alignment argument");
5280 };
5281
5282 const DataLayout &DL = CI->getDataLayout();
5283 switch (NewFn->getIntrinsicID()) {
5284 case Intrinsic::masked_load:
5285 NewCall = Builder.CreateMaskedLoad(
5286 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5287 CI->getArgOperand(2), CI->getArgOperand(3));
5288 break;
5289 case Intrinsic::masked_gather:
5290 NewCall = Builder.CreateMaskedGather(
5291 CI->getType(), CI->getArgOperand(0),
5292 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5293 CI->getType()->getScalarType()),
5294 CI->getArgOperand(2), CI->getArgOperand(3));
5295 break;
5296 case Intrinsic::masked_store:
5297 NewCall = Builder.CreateMaskedStore(
5298 CI->getArgOperand(0), CI->getArgOperand(1),
5299 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5300 break;
5301 case Intrinsic::masked_scatter:
5302 NewCall = Builder.CreateMaskedScatter(
5303 CI->getArgOperand(0), CI->getArgOperand(1),
5304 DL.getValueOrABITypeAlignment(
5305 GetMaybeAlign(CI->getArgOperand(2)),
5306 CI->getArgOperand(0)->getType()->getScalarType()),
5307 CI->getArgOperand(3));
5308 break;
5309 default:
5310 llvm_unreachable("Unexpected intrinsic ID");
5311 }
5312 // Previous metadata is still valid.
5313 NewCall->copyMetadata(*CI);
5314 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5315 break;
5316 }
5317
5318 case Intrinsic::lifetime_start:
5319 case Intrinsic::lifetime_end: {
5320 if (CI->arg_size() != 2) {
5321 DefaultCase();
5322 return;
5323 }
5324
5325 Value *Ptr = CI->getArgOperand(1);
5326 // Try to strip pointer casts, such that the lifetime works on an alloca.
5327 Ptr = Ptr->stripPointerCasts();
5328 if (isa<AllocaInst>(Ptr)) {
5329 // Don't use NewFn, as we might have looked through an addrspacecast.
5330 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5331 NewCall = Builder.CreateLifetimeStart(Ptr);
5332 else
5333 NewCall = Builder.CreateLifetimeEnd(Ptr);
5334 break;
5335 }
5336
5337 // Otherwise remove the lifetime marker.
5338 CI->eraseFromParent();
5339 return;
5340 }
5341
5342 case Intrinsic::x86_avx512_vpdpbusd_128:
5343 case Intrinsic::x86_avx512_vpdpbusd_256:
5344 case Intrinsic::x86_avx512_vpdpbusd_512:
5345 case Intrinsic::x86_avx512_vpdpbusds_128:
5346 case Intrinsic::x86_avx512_vpdpbusds_256:
5347 case Intrinsic::x86_avx512_vpdpbusds_512:
5348 case Intrinsic::x86_avx2_vpdpbssd_128:
5349 case Intrinsic::x86_avx2_vpdpbssd_256:
5350 case Intrinsic::x86_avx10_vpdpbssd_512:
5351 case Intrinsic::x86_avx2_vpdpbssds_128:
5352 case Intrinsic::x86_avx2_vpdpbssds_256:
5353 case Intrinsic::x86_avx10_vpdpbssds_512:
5354 case Intrinsic::x86_avx2_vpdpbsud_128:
5355 case Intrinsic::x86_avx2_vpdpbsud_256:
5356 case Intrinsic::x86_avx10_vpdpbsud_512:
5357 case Intrinsic::x86_avx2_vpdpbsuds_128:
5358 case Intrinsic::x86_avx2_vpdpbsuds_256:
5359 case Intrinsic::x86_avx10_vpdpbsuds_512:
5360 case Intrinsic::x86_avx2_vpdpbuud_128:
5361 case Intrinsic::x86_avx2_vpdpbuud_256:
5362 case Intrinsic::x86_avx10_vpdpbuud_512:
5363 case Intrinsic::x86_avx2_vpdpbuuds_128:
5364 case Intrinsic::x86_avx2_vpdpbuuds_256:
5365 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5366 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5367 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5368 CI->getArgOperand(2)};
5369 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5370 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5371 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5372
5373 NewCall = Builder.CreateCall(NewFn, Args);
5374 break;
5375 }
5376 }
5377 assert(NewCall && "Should have either set this variable or returned through "
5378 "the default case");
5379 NewCall->takeName(CI);
5380 CI->replaceAllUsesWith(NewCall);
5381 CI->eraseFromParent();
5382}
5383
5385 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5386
5387 // Check if this function should be upgraded and get the replacement function
5388 // if there is one.
5389 Function *NewFn;
5390 if (UpgradeIntrinsicFunction(F, NewFn)) {
5391 // Replace all users of the old function with the new function or new
5392 // instructions. This is not a range loop because the call is deleted.
5393 for (User *U : make_early_inc_range(F->users()))
5394 if (CallBase *CB = dyn_cast<CallBase>(U))
5395 UpgradeIntrinsicCall(CB, NewFn);
5396
5397 // Remove old function, no longer used, from the module.
5398 if (F != NewFn)
5399 F->eraseFromParent();
5400 }
5401}
5402
5404 const unsigned NumOperands = MD.getNumOperands();
5405 if (NumOperands == 0)
5406 return &MD; // Invalid, punt to a verifier error.
5407
5408 // Check if the tag uses struct-path aware TBAA format.
5409 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5410 return &MD;
5411
5412 auto &Context = MD.getContext();
5413 if (NumOperands == 3) {
5414 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5415 MDNode *ScalarType = MDNode::get(Context, Elts);
5416 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5417 Metadata *Elts2[] = {ScalarType, ScalarType,
5420 MD.getOperand(2)};
5421 return MDNode::get(Context, Elts2);
5422 }
5423 // Create a MDNode <MD, MD, offset 0>
5425 Type::getInt64Ty(Context)))};
5426 return MDNode::get(Context, Elts);
5427}
5428
5430 Instruction *&Temp) {
5431 if (Opc != Instruction::BitCast)
5432 return nullptr;
5433
5434 Temp = nullptr;
5435 Type *SrcTy = V->getType();
5436 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5437 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5438 LLVMContext &Context = V->getContext();
5439
5440 // We have no information about target data layout, so we assume that
5441 // the maximum pointer size is 64bit.
5442 Type *MidTy = Type::getInt64Ty(Context);
5443 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5444
5445 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5446 }
5447
5448 return nullptr;
5449}
5450
5452 if (Opc != Instruction::BitCast)
5453 return nullptr;
5454
5455 Type *SrcTy = C->getType();
5456 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5457 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5458 LLVMContext &Context = C->getContext();
5459
5460 // We have no information about target data layout, so we assume that
5461 // the maximum pointer size is 64bit.
5462 Type *MidTy = Type::getInt64Ty(Context);
5463
5465 DestTy);
5466 }
5467
5468 return nullptr;
5469}
5470
5471/// Check the debug info version number, if it is out-dated, drop the debug
5472/// info. Return true if module is modified.
5475 return false;
5476
5477 llvm::TimeTraceScope timeScope("Upgrade debug info");
5478 // We need to get metadata before the module is verified (i.e., getModuleFlag
5479 // makes assumptions that we haven't verified yet). Carefully extract the flag
5480 // from the metadata.
5481 unsigned Version = 0;
5482 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5483 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5484 if (Flag->getNumOperands() < 3)
5485 return false;
5486 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5487 return K->getString() == "Debug Info Version";
5488 return false;
5489 });
5490 if (OpIt != ModFlags->op_end()) {
5491 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5492 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5493 Version = CI->getZExtValue();
5494 }
5495 }
5496
5498 bool BrokenDebugInfo = false;
5499 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5500 report_fatal_error("Broken module found, compilation aborted!");
5501 if (!BrokenDebugInfo)
5502 // Everything is ok.
5503 return false;
5504 else {
5505 // Diagnose malformed debug info.
5507 M.getContext().diagnose(Diag);
5508 }
5509 }
5510 bool Modified = StripDebugInfo(M);
5512 // Diagnose a version mismatch.
5514 M.getContext().diagnose(DiagVersion);
5515 }
5516 return Modified;
5517}
5518
5519static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5520 GlobalValue *GV, const Metadata *V) {
5521 Function *F = cast<Function>(GV);
5522
5523 constexpr StringLiteral DefaultValue = "1";
5524 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5525 unsigned Length = 0;
5526
5527 if (F->hasFnAttribute(Attr)) {
5528 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5529 // parse these elements placing them into Vect3
5530 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5531 for (; Length < 3 && !S.empty(); Length++) {
5532 auto [Part, Rest] = S.split(',');
5533 Vect3[Length] = Part.trim();
5534 S = Rest;
5535 }
5536 }
5537
5538 const unsigned Dim = DimC - 'x';
5539 assert(Dim < 3 && "Unexpected dim char");
5540
5541 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5542
5543 // local variable required for StringRef in Vect3 to point to.
5544 const std::string VStr = llvm::utostr(VInt);
5545 Vect3[Dim] = VStr;
5546 Length = std::max(Length, Dim + 1);
5547
5548 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5549 F->addFnAttr(Attr, NewAttr);
5550}
5551
5552static inline bool isXYZ(StringRef S) {
5553 return S == "x" || S == "y" || S == "z";
5554}
5555
5557 const Metadata *V) {
5558 if (K == "kernel") {
5560 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5561 return true;
5562 }
5563 if (K == "align") {
5564 // V is a bitfeild specifying two 16-bit values. The alignment value is
5565 // specfied in low 16-bits, The index is specified in the high bits. For the
5566 // index, 0 indicates the return value while higher values correspond to
5567 // each parameter (idx = param + 1).
5568 const uint64_t AlignIdxValuePair =
5569 mdconst::extract<ConstantInt>(V)->getZExtValue();
5570 const unsigned Idx = (AlignIdxValuePair >> 16);
5571 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5572 cast<Function>(GV)->addAttributeAtIndex(
5573 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5574 return true;
5575 }
5576 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5577 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5578 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5579 return true;
5580 }
5581 if (K == "minctasm") {
5582 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5583 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5584 return true;
5585 }
5586 if (K == "maxnreg") {
5587 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5588 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5589 return true;
5590 }
5591 if (K.consume_front("maxntid") && isXYZ(K)) {
5592 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5593 return true;
5594 }
5595 if (K.consume_front("reqntid") && isXYZ(K)) {
5596 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5597 return true;
5598 }
5599 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5600 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5601 return true;
5602 }
5603 if (K == "grid_constant") {
5604 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5605 for (const auto &Op : cast<MDNode>(V)->operands()) {
5606 // For some reason, the index is 1-based in the metadata. Good thing we're
5607 // able to auto-upgrade it!
5608 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5609 cast<Function>(GV)->addParamAttr(Index, Attr);
5610 }
5611 return true;
5612 }
5613
5614 return false;
5615}
5616
5618 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5619 if (!NamedMD)
5620 return;
5621
5622 SmallVector<MDNode *, 8> NewNodes;
5624 for (MDNode *MD : NamedMD->operands()) {
5625 if (!SeenNodes.insert(MD).second)
5626 continue;
5627
5628 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5629 if (!GV)
5630 continue;
5631
5632 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5633
5634 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5635 // Each nvvm.annotations metadata entry will be of the following form:
5636 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5637 // start index = 1, to skip the global variable key
5638 // increment = 2, to skip the value for each property-value pairs
5639 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5640 MDString *K = cast<MDString>(MD->getOperand(j));
5641 const MDOperand &V = MD->getOperand(j + 1);
5642 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5643 if (!Upgraded)
5644 NewOperands.append({K, V});
5645 }
5646
5647 if (NewOperands.size() > 1)
5648 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5649 }
5650
5651 NamedMD->clearOperands();
5652 for (MDNode *N : NewNodes)
5653 NamedMD->addOperand(N);
5654}
5655
5656/// This checks for objc retain release marker which should be upgraded. It
5657/// returns true if module is modified.
5659 bool Changed = false;
5660 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5661 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5662 if (ModRetainReleaseMarker) {
5663 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5664 if (Op) {
5665 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5666 if (ID) {
5667 SmallVector<StringRef, 4> ValueComp;
5668 ID->getString().split(ValueComp, "#");
5669 if (ValueComp.size() == 2) {
5670 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5671 ID = MDString::get(M.getContext(), NewValue);
5672 }
5673 M.addModuleFlag(Module::Error, MarkerKey, ID);
5674 M.eraseNamedMetadata(ModRetainReleaseMarker);
5675 Changed = true;
5676 }
5677 }
5678 }
5679 return Changed;
5680}
5681
5683 // This lambda converts normal function calls to ARC runtime functions to
5684 // intrinsic calls.
5685 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5686 llvm::Intrinsic::ID IntrinsicFunc) {
5687 Function *Fn = M.getFunction(OldFunc);
5688
5689 if (!Fn)
5690 return;
5691
5692 Function *NewFn =
5693 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5694
5695 for (User *U : make_early_inc_range(Fn->users())) {
5697 if (!CI || CI->getCalledFunction() != Fn)
5698 continue;
5699
5700 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5701 FunctionType *NewFuncTy = NewFn->getFunctionType();
5703
5704 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5705 // value to the return type of the old function.
5706 if (NewFuncTy->getReturnType() != CI->getType() &&
5707 !CastInst::castIsValid(Instruction::BitCast, CI,
5708 NewFuncTy->getReturnType()))
5709 continue;
5710
5711 bool InvalidCast = false;
5712
5713 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5714 Value *Arg = CI->getArgOperand(I);
5715
5716 // Bitcast argument to the parameter type of the new function if it's
5717 // not a variadic argument.
5718 if (I < NewFuncTy->getNumParams()) {
5719 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5720 // to the parameter type of the new function.
5721 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5722 NewFuncTy->getParamType(I))) {
5723 InvalidCast = true;
5724 break;
5725 }
5726 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5727 }
5728 Args.push_back(Arg);
5729 }
5730
5731 if (InvalidCast)
5732 continue;
5733
5734 // Create a call instruction that calls the new function.
5735 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5736 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5737 NewCall->takeName(CI);
5738
5739 // Bitcast the return value back to the type of the old call.
5740 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5741
5742 if (!CI->use_empty())
5743 CI->replaceAllUsesWith(NewRetVal);
5744 CI->eraseFromParent();
5745 }
5746
5747 if (Fn->use_empty())
5748 Fn->eraseFromParent();
5749 };
5750
5751 // Unconditionally convert a call to "clang.arc.use" to a call to
5752 // "llvm.objc.clang.arc.use".
5753 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5754
5755 // Upgrade the retain release marker. If there is no need to upgrade
5756 // the marker, that means either the module is already new enough to contain
5757 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5759 return;
5760
5761 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5762 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5763 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5764 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5765 {"objc_autoreleaseReturnValue",
5766 llvm::Intrinsic::objc_autoreleaseReturnValue},
5767 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5768 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5769 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5770 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5771 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5772 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5773 {"objc_release", llvm::Intrinsic::objc_release},
5774 {"objc_retain", llvm::Intrinsic::objc_retain},
5775 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5776 {"objc_retainAutoreleaseReturnValue",
5777 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5778 {"objc_retainAutoreleasedReturnValue",
5779 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5780 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5781 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5782 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5783 {"objc_unsafeClaimAutoreleasedReturnValue",
5784 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5785 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5786 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5787 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5788 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5789 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5790 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5791 {"objc_arc_annotation_topdown_bbstart",
5792 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5793 {"objc_arc_annotation_topdown_bbend",
5794 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5795 {"objc_arc_annotation_bottomup_bbstart",
5796 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5797 {"objc_arc_annotation_bottomup_bbend",
5798 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5799
5800 for (auto &I : RuntimeFuncs)
5801 UpgradeToIntrinsic(I.first, I.second);
5802}
5803
5805 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5806 if (!ModFlags)
5807 return false;
5808
5809 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5810 bool HasSwiftVersionFlag = false;
5811 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5812 uint32_t SwiftABIVersion;
5813 auto Int8Ty = Type::getInt8Ty(M.getContext());
5814 auto Int32Ty = Type::getInt32Ty(M.getContext());
5815
5816 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5817 MDNode *Op = ModFlags->getOperand(I);
5818 if (Op->getNumOperands() != 3)
5819 continue;
5820 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5821 if (!ID)
5822 continue;
5823 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5824 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5825 Type::getInt32Ty(M.getContext()), B)),
5826 MDString::get(M.getContext(), ID->getString()),
5827 Op->getOperand(2)};
5828 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5829 Changed = true;
5830 };
5831
5832 if (ID->getString() == "Objective-C Image Info Version")
5833 HasObjCFlag = true;
5834 if (ID->getString() == "Objective-C Class Properties")
5835 HasClassProperties = true;
5836 // Upgrade PIC from Error/Max to Min.
5837 if (ID->getString() == "PIC Level") {
5838 if (auto *Behavior =
5840 uint64_t V = Behavior->getLimitedValue();
5841 if (V == Module::Error || V == Module::Max)
5842 SetBehavior(Module::Min);
5843 }
5844 }
5845 // Upgrade "PIE Level" from Error to Max.
5846 if (ID->getString() == "PIE Level")
5847 if (auto *Behavior =
5849 if (Behavior->getLimitedValue() == Module::Error)
5850 SetBehavior(Module::Max);
5851
5852 // Upgrade branch protection and return address signing module flags. The
5853 // module flag behavior for these fields were Error and now they are Min.
5854 if (ID->getString() == "branch-target-enforcement" ||
5855 ID->getString().starts_with("sign-return-address")) {
5856 if (auto *Behavior =
5858 if (Behavior->getLimitedValue() == Module::Error) {
5859 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5860 Metadata *Ops[3] = {
5861 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5862 Op->getOperand(1), Op->getOperand(2)};
5863 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5864 Changed = true;
5865 }
5866 }
5867 }
5868
5869 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5870 // section name so that llvm-lto will not complain about mismatching
5871 // module flags that is functionally the same.
5872 if (ID->getString() == "Objective-C Image Info Section") {
5873 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5874 SmallVector<StringRef, 4> ValueComp;
5875 Value->getString().split(ValueComp, " ");
5876 if (ValueComp.size() != 1) {
5877 std::string NewValue;
5878 for (auto &S : ValueComp)
5879 NewValue += S.str();
5880 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5881 MDString::get(M.getContext(), NewValue)};
5882 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5883 Changed = true;
5884 }
5885 }
5886 }
5887
5888 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5889 // If the higher bits are set, it adds new module flag for swift info.
5890 if (ID->getString() == "Objective-C Garbage Collection") {
5891 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5892 if (Md) {
5893 assert(Md->getValue() && "Expected non-empty metadata");
5894 auto Type = Md->getValue()->getType();
5895 if (Type == Int8Ty)
5896 continue;
5897 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5898 if ((Val & 0xff) != Val) {
5899 HasSwiftVersionFlag = true;
5900 SwiftABIVersion = (Val & 0xff00) >> 8;
5901 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5902 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5903 }
5904 Metadata *Ops[3] = {
5906 Op->getOperand(1),
5907 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5908 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5909 Changed = true;
5910 }
5911 }
5912
5913 if (ID->getString() == "amdgpu_code_object_version") {
5914 Metadata *Ops[3] = {
5915 Op->getOperand(0),
5916 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5917 Op->getOperand(2)};
5918 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5919 Changed = true;
5920 }
5921 }
5922
5923 // "Objective-C Class Properties" is recently added for Objective-C. We
5924 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5925 // flag of value 0, so we can correclty downgrade this flag when trying to
5926 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5927 // this module flag.
5928 if (HasObjCFlag && !HasClassProperties) {
5929 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5930 (uint32_t)0);
5931 Changed = true;
5932 }
5933
5934 if (HasSwiftVersionFlag) {
5935 M.addModuleFlag(Module::Error, "Swift ABI Version",
5936 SwiftABIVersion);
5937 M.addModuleFlag(Module::Error, "Swift Major Version",
5938 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5939 M.addModuleFlag(Module::Error, "Swift Minor Version",
5940 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5941 Changed = true;
5942 }
5943
5944 return Changed;
5945}
5946
5948 auto TrimSpaces = [](StringRef Section) -> std::string {
5949 SmallVector<StringRef, 5> Components;
5950 Section.split(Components, ',');
5951
5952 SmallString<32> Buffer;
5953 raw_svector_ostream OS(Buffer);
5954
5955 for (auto Component : Components)
5956 OS << ',' << Component.trim();
5957
5958 return std::string(OS.str().substr(1));
5959 };
5960
5961 for (auto &GV : M.globals()) {
5962 if (!GV.hasSection())
5963 continue;
5964
5965 StringRef Section = GV.getSection();
5966
5967 if (!Section.starts_with("__DATA, __objc_catlist"))
5968 continue;
5969
5970 // __DATA, __objc_catlist, regular, no_dead_strip
5971 // __DATA,__objc_catlist,regular,no_dead_strip
5972 GV.setSection(TrimSpaces(Section));
5973 }
5974}
5975
5976namespace {
5977// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5978// callsites within a function that did not also have the strictfp attribute.
5979// Since 10.0, if strict FP semantics are needed within a function, the
5980// function must have the strictfp attribute and all calls within the function
5981// must also have the strictfp attribute. This latter restriction is
5982// necessary to prevent unwanted libcall simplification when a function is
5983// being cloned (such as for inlining).
5984//
5985// The "dangling" strictfp attribute usage was only used to prevent constant
5986// folding and other libcall simplification. The nobuiltin attribute on the
5987// callsite has the same effect.
5988struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5989 StrictFPUpgradeVisitor() = default;
5990
5991 void visitCallBase(CallBase &Call) {
5992 if (!Call.isStrictFP())
5993 return;
5995 return;
5996 // If we get here, the caller doesn't have the strictfp attribute
5997 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5998 Call.removeFnAttr(Attribute::StrictFP);
5999 Call.addFnAttr(Attribute::NoBuiltin);
6000 }
6001};
6002
6003/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6004struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6005 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6006 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6007
6008 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6009 if (!RMW.isFloatingPointOperation())
6010 return;
6011
6012 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6013 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6014 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6015 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6016 }
6017};
6018} // namespace
6019
6021 // If a function definition doesn't have the strictfp attribute,
6022 // convert any callsite strictfp attributes to nobuiltin.
6023 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6024 StrictFPUpgradeVisitor SFPV;
6025 SFPV.visit(F);
6026 }
6027
6028 // Remove all incompatibile attributes from function.
6029 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6030 F.getReturnType(), F.getAttributes().getRetAttrs()));
6031 for (auto &Arg : F.args())
6032 Arg.removeAttrs(
6033 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6034
6035 // Older versions of LLVM treated an "implicit-section-name" attribute
6036 // similarly to directly setting the section on a Function.
6037 if (Attribute A = F.getFnAttribute("implicit-section-name");
6038 A.isValid() && A.isStringAttribute()) {
6039 F.setSection(A.getValueAsString());
6040 F.removeFnAttr("implicit-section-name");
6041 }
6042
6043 if (!F.empty()) {
6044 // For some reason this is called twice, and the first time is before any
6045 // instructions are loaded into the body.
6046
6047 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6048 A.isValid()) {
6049
6050 if (A.getValueAsBool()) {
6051 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6052 Visitor.visit(F);
6053 }
6054
6055 // We will leave behind dead attribute uses on external declarations, but
6056 // clang never added these to declarations anyway.
6057 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6058 }
6059 }
6060}
6061
6062// Check if the function attribute is not present and set it.
6064 StringRef Value) {
6065 if (!F.hasFnAttribute(FnAttrName))
6066 F.addFnAttr(FnAttrName, Value);
6067}
6068
6069// Check if the function attribute is not present and set it if needed.
6070// If the attribute is "false" then removes it.
6071// If the attribute is "true" resets it to a valueless attribute.
6072static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6073 if (!F.hasFnAttribute(FnAttrName)) {
6074 if (Set)
6075 F.addFnAttr(FnAttrName);
6076 } else {
6077 auto A = F.getFnAttribute(FnAttrName);
6078 if ("false" == A.getValueAsString())
6079 F.removeFnAttr(FnAttrName);
6080 else if ("true" == A.getValueAsString()) {
6081 F.removeFnAttr(FnAttrName);
6082 F.addFnAttr(FnAttrName);
6083 }
6084 }
6085}
6086
6088 Triple T(M.getTargetTriple());
6089 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6090 return;
6091
6092 uint64_t BTEValue = 0;
6093 uint64_t BPPLRValue = 0;
6094 uint64_t GCSValue = 0;
6095 uint64_t SRAValue = 0;
6096 uint64_t SRAALLValue = 0;
6097 uint64_t SRABKeyValue = 0;
6098
6099 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6100 if (ModFlags) {
6101 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6102 MDNode *Op = ModFlags->getOperand(I);
6103 if (Op->getNumOperands() != 3)
6104 continue;
6105
6106 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6107 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6108 if (!ID || !CI)
6109 continue;
6110
6111 StringRef IDStr = ID->getString();
6112 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6113 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6114 : IDStr == "guarded-control-stack" ? &GCSValue
6115 : IDStr == "sign-return-address" ? &SRAValue
6116 : IDStr == "sign-return-address-all" ? &SRAALLValue
6117 : IDStr == "sign-return-address-with-bkey"
6118 ? &SRABKeyValue
6119 : nullptr;
6120 if (!ValPtr)
6121 continue;
6122
6123 *ValPtr = CI->getZExtValue();
6124 if (*ValPtr == 2)
6125 return;
6126 }
6127 }
6128
6129 bool BTE = BTEValue == 1;
6130 bool BPPLR = BPPLRValue == 1;
6131 bool GCS = GCSValue == 1;
6132 bool SRA = SRAValue == 1;
6133
6134 StringRef SignTypeValue = "non-leaf";
6135 if (SRA && SRAALLValue == 1)
6136 SignTypeValue = "all";
6137
6138 StringRef SignKeyValue = "a_key";
6139 if (SRA && SRABKeyValue == 1)
6140 SignKeyValue = "b_key";
6141
6142 for (Function &F : M.getFunctionList()) {
6143 if (F.isDeclaration())
6144 continue;
6145
6146 if (SRA) {
6147 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6148 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6149 } else {
6150 if (auto A = F.getFnAttribute("sign-return-address");
6151 A.isValid() && "none" == A.getValueAsString()) {
6152 F.removeFnAttr("sign-return-address");
6153 F.removeFnAttr("sign-return-address-key");
6154 }
6155 }
6156 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6157 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6158 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6159 }
6160
6161 if (BTE)
6162 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6163 if (BPPLR)
6164 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6165 if (GCS)
6166 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6167 if (SRA) {
6168 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6169 if (SRAALLValue == 1)
6170 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6171 if (SRABKeyValue == 1)
6172 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6173 }
6174}
6175
6176static bool isOldLoopArgument(Metadata *MD) {
6177 auto *T = dyn_cast_or_null<MDTuple>(MD);
6178 if (!T)
6179 return false;
6180 if (T->getNumOperands() < 1)
6181 return false;
6182 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6183 if (!S)
6184 return false;
6185 return S->getString().starts_with("llvm.vectorizer.");
6186}
6187
6189 StringRef OldPrefix = "llvm.vectorizer.";
6190 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6191
6192 if (OldTag == "llvm.vectorizer.unroll")
6193 return MDString::get(C, "llvm.loop.interleave.count");
6194
6195 return MDString::get(
6196 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6197 .str());
6198}
6199
6201 auto *T = dyn_cast_or_null<MDTuple>(MD);
6202 if (!T)
6203 return MD;
6204 if (T->getNumOperands() < 1)
6205 return MD;
6206 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6207 if (!OldTag)
6208 return MD;
6209 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6210 return MD;
6211
6212 // This has an old tag. Upgrade it.
6214 Ops.reserve(T->getNumOperands());
6215 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6216 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6217 Ops.push_back(T->getOperand(I));
6218
6219 return MDTuple::get(T->getContext(), Ops);
6220}
6221
6223 auto *T = dyn_cast<MDTuple>(&N);
6224 if (!T)
6225 return &N;
6226
6227 if (none_of(T->operands(), isOldLoopArgument))
6228 return &N;
6229
6231 Ops.reserve(T->getNumOperands());
6232 for (Metadata *MD : T->operands())
6233 Ops.push_back(upgradeLoopArgument(MD));
6234
6235 return MDTuple::get(T->getContext(), Ops);
6236}
6237
6239 Triple T(TT);
6240 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6241 // the address space of globals to 1. This does not apply to SPIRV Logical.
6242 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6243 !DL.contains("-G") && !DL.starts_with("G")) {
6244 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6245 }
6246
6247 if (T.isLoongArch64() || T.isRISCV64()) {
6248 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6249 auto I = DL.find("-n64-");
6250 if (I != StringRef::npos)
6251 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6252 return DL.str();
6253 }
6254
6255 // AMDGPU data layout upgrades.
6256 std::string Res = DL.str();
6257 if (T.isAMDGPU()) {
6258 // Define address spaces for constants.
6259 if (!DL.contains("-G") && !DL.starts_with("G"))
6260 Res.append(Res.empty() ? "G1" : "-G1");
6261
6262 // AMDGCN data layout upgrades.
6263 if (T.isAMDGCN()) {
6264
6265 // Add missing non-integral declarations.
6266 // This goes before adding new address spaces to prevent incoherent string
6267 // values.
6268 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6269 Res.append("-ni:7:8:9");
6270 // Update ni:7 to ni:7:8:9.
6271 if (DL.ends_with("ni:7"))
6272 Res.append(":8:9");
6273 if (DL.ends_with("ni:7:8"))
6274 Res.append(":9");
6275
6276 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6277 // resources) An empty data layout has already been upgraded to G1 by now.
6278 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6279 Res.append("-p7:160:256:256:32");
6280 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6281 Res.append("-p8:128:128:128:48");
6282 constexpr StringRef OldP8("-p8:128:128-");
6283 if (DL.contains(OldP8))
6284 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6285 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6286 Res.append("-p9:192:256:256:32");
6287 }
6288
6289 // Upgrade the ELF mangling mode.
6290 if (!DL.contains("m:e"))
6291 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6292
6293 return Res;
6294 }
6295
6296 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6297 // If the datalayout matches the expected format, add pointer size address
6298 // spaces to the datalayout.
6299 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6300 if (!DL.contains(AddrSpaces)) {
6302 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6303 if (R.match(Res, &Groups))
6304 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6305 }
6306 };
6307
6308 // AArch64 data layout upgrades.
6309 if (T.isAArch64()) {
6310 // Add "-Fn32"
6311 if (!DL.empty() && !DL.contains("-Fn32"))
6312 Res.append("-Fn32");
6313 AddPtr32Ptr64AddrSpaces();
6314 return Res;
6315 }
6316
6317 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6318 T.isWasm()) {
6319 // Mips64 with o32 ABI did not add "-i128:128".
6320 // Add "-i128:128"
6321 std::string I64 = "-i64:64";
6322 std::string I128 = "-i128:128";
6323 if (!StringRef(Res).contains(I128)) {
6324 size_t Pos = Res.find(I64);
6325 if (Pos != size_t(-1))
6326 Res.insert(Pos + I64.size(), I128);
6327 }
6328 return Res;
6329 }
6330
6331 if (!T.isX86())
6332 return Res;
6333
6334 AddPtr32Ptr64AddrSpaces();
6335
6336 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6337 // for i128 operations prior to this being reflected in the data layout, and
6338 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6339 // boundaries, so although this is a breaking change, the upgrade is expected
6340 // to fix more IR than it breaks.
6341 // Intel MCU is an exception and uses 4-byte-alignment.
6342 if (!T.isOSIAMCU()) {
6343 std::string I128 = "-i128:128";
6344 if (StringRef Ref = Res; !Ref.contains(I128)) {
6346 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6347 if (R.match(Res, &Groups))
6348 Res = (Groups[1] + I128 + Groups[3]).str();
6349 }
6350 }
6351
6352 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6353 // Raising the alignment is safe because Clang did not produce f80 values in
6354 // the MSVC environment before this upgrade was added.
6355 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6356 StringRef Ref = Res;
6357 auto I = Ref.find("-f80:32-");
6358 if (I != StringRef::npos)
6359 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6360 }
6361
6362 return Res;
6363}
6364
6365void llvm::UpgradeAttributes(AttrBuilder &B) {
6366 StringRef FramePointer;
6367 Attribute A = B.getAttribute("no-frame-pointer-elim");
6368 if (A.isValid()) {
6369 // The value can be "true" or "false".
6370 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6371 B.removeAttribute("no-frame-pointer-elim");
6372 }
6373 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6374 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6375 if (FramePointer != "all")
6376 FramePointer = "non-leaf";
6377 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6378 }
6379 if (!FramePointer.empty())
6380 B.addAttribute("frame-pointer", FramePointer);
6381
6382 A = B.getAttribute("null-pointer-is-valid");
6383 if (A.isValid()) {
6384 // The value can be "true" or "false".
6385 bool NullPointerIsValid = A.getValueAsString() == "true";
6386 B.removeAttribute("null-pointer-is-valid");
6387 if (NullPointerIsValid)
6388 B.addAttribute(Attribute::NullPointerIsValid);
6389 }
6390}
6391
6392void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6393 // clang.arc.attachedcall bundles are now required to have an operand.
6394 // If they don't, it's okay to drop them entirely: when there is an operand,
6395 // the "attachedcall" is meaningful and required, but without an operand,
6396 // it's just a marker NOP. Dropping it merely prevents an optimization.
6397 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6398 return OBD.getTag() == "clang.arc.attachedcall" &&
6399 OBD.inputs().empty();
6400 });
6401}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:448
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:825
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:816
static constexpr size_t npos
Definition StringRef.h:57
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106