LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
128// Upgrade the declaration of multipy and add words intrinsics whose input
129// arguments' types have changed to vectors of i32 to vectors of i16
131 Function *&NewFn) {
132 // check if input argument type is a vector of i16
133 Type *Arg1Type = F->getFunctionType()->getParamType(1);
134 Type *Arg2Type = F->getFunctionType()->getParamType(2);
135 if (Arg1Type->isVectorTy() &&
136 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
137 Arg2Type->isVectorTy() &&
138 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
139 return false;
140
141 rename(F);
142 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
143 return true;
144}
145
147 Function *&NewFn) {
148 if (F->getReturnType()->getScalarType()->isBFloatTy())
149 return false;
150
151 rename(F);
152 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
153 return true;
154}
155
157 Function *&NewFn) {
158 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
159 return false;
160
161 rename(F);
162 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
163 return true;
164}
165
167 // All of the intrinsics matches below should be marked with which llvm
168 // version started autoupgrading them. At some point in the future we would
169 // like to use this information to remove upgrade code for some older
170 // intrinsics. It is currently undecided how we will determine that future
171 // point.
172 if (Name.consume_front("avx."))
173 return (Name.starts_with("blend.p") || // Added in 3.7
174 Name == "cvt.ps2.pd.256" || // Added in 3.9
175 Name == "cvtdq2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.ps.256" || // Added in 7.0
177 Name.starts_with("movnt.") || // Added in 3.2
178 Name.starts_with("sqrt.p") || // Added in 7.0
179 Name.starts_with("storeu.") || // Added in 3.9
180 Name.starts_with("vbroadcast.s") || // Added in 3.5
181 Name.starts_with("vbroadcastf128") || // Added in 4.0
182 Name.starts_with("vextractf128.") || // Added in 3.7
183 Name.starts_with("vinsertf128.") || // Added in 3.7
184 Name.starts_with("vperm2f128.") || // Added in 6.0
185 Name.starts_with("vpermil.")); // Added in 3.1
186
187 if (Name.consume_front("avx2."))
188 return (Name == "movntdqa" || // Added in 5.0
189 Name.starts_with("pabs.") || // Added in 6.0
190 Name.starts_with("padds.") || // Added in 8.0
191 Name.starts_with("paddus.") || // Added in 8.0
192 Name.starts_with("pblendd.") || // Added in 3.7
193 Name == "pblendw" || // Added in 3.7
194 Name.starts_with("pbroadcast") || // Added in 3.8
195 Name.starts_with("pcmpeq.") || // Added in 3.1
196 Name.starts_with("pcmpgt.") || // Added in 3.1
197 Name.starts_with("pmax") || // Added in 3.9
198 Name.starts_with("pmin") || // Added in 3.9
199 Name.starts_with("pmovsx") || // Added in 3.9
200 Name.starts_with("pmovzx") || // Added in 3.9
201 Name == "pmul.dq" || // Added in 7.0
202 Name == "pmulu.dq" || // Added in 7.0
203 Name.starts_with("psll.dq") || // Added in 3.7
204 Name.starts_with("psrl.dq") || // Added in 3.7
205 Name.starts_with("psubs.") || // Added in 8.0
206 Name.starts_with("psubus.") || // Added in 8.0
207 Name.starts_with("vbroadcast") || // Added in 3.8
208 Name == "vbroadcasti128" || // Added in 3.7
209 Name == "vextracti128" || // Added in 3.7
210 Name == "vinserti128" || // Added in 3.7
211 Name == "vperm2i128"); // Added in 6.0
212
213 if (Name.consume_front("avx512.")) {
214 if (Name.consume_front("mask."))
215 // 'avx512.mask.*'
216 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
217 Name.starts_with("and.") || // Added in 3.9
218 Name.starts_with("andn.") || // Added in 3.9
219 Name.starts_with("broadcast.s") || // Added in 3.9
220 Name.starts_with("broadcastf32x4.") || // Added in 6.0
221 Name.starts_with("broadcastf32x8.") || // Added in 6.0
222 Name.starts_with("broadcastf64x2.") || // Added in 6.0
223 Name.starts_with("broadcastf64x4.") || // Added in 6.0
224 Name.starts_with("broadcasti32x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x8.") || // Added in 6.0
226 Name.starts_with("broadcasti64x2.") || // Added in 6.0
227 Name.starts_with("broadcasti64x4.") || // Added in 6.0
228 Name.starts_with("cmp.b") || // Added in 5.0
229 Name.starts_with("cmp.d") || // Added in 5.0
230 Name.starts_with("cmp.q") || // Added in 5.0
231 Name.starts_with("cmp.w") || // Added in 5.0
232 Name.starts_with("compress.b") || // Added in 9.0
233 Name.starts_with("compress.d") || // Added in 9.0
234 Name.starts_with("compress.p") || // Added in 9.0
235 Name.starts_with("compress.q") || // Added in 9.0
236 Name.starts_with("compress.store.") || // Added in 7.0
237 Name.starts_with("compress.w") || // Added in 9.0
238 Name.starts_with("conflict.") || // Added in 9.0
239 Name.starts_with("cvtdq2pd.") || // Added in 4.0
240 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
241 Name == "cvtpd2dq.256" || // Added in 7.0
242 Name == "cvtpd2ps.256" || // Added in 7.0
243 Name == "cvtps2pd.128" || // Added in 7.0
244 Name == "cvtps2pd.256" || // Added in 7.0
245 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
246 Name == "cvtqq2ps.256" || // Added in 9.0
247 Name == "cvtqq2ps.512" || // Added in 9.0
248 Name == "cvttpd2dq.256" || // Added in 7.0
249 Name == "cvttps2dq.128" || // Added in 7.0
250 Name == "cvttps2dq.256" || // Added in 7.0
251 Name.starts_with("cvtudq2pd.") || // Added in 4.0
252 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
253 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
254 Name == "cvtuqq2ps.256" || // Added in 9.0
255 Name == "cvtuqq2ps.512" || // Added in 9.0
256 Name.starts_with("dbpsadbw.") || // Added in 7.0
257 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("expand.b") || // Added in 9.0
259 Name.starts_with("expand.d") || // Added in 9.0
260 Name.starts_with("expand.load.") || // Added in 7.0
261 Name.starts_with("expand.p") || // Added in 9.0
262 Name.starts_with("expand.q") || // Added in 9.0
263 Name.starts_with("expand.w") || // Added in 9.0
264 Name.starts_with("fpclass.p") || // Added in 7.0
265 Name.starts_with("insert") || // Added in 4.0
266 Name.starts_with("load.") || // Added in 3.9
267 Name.starts_with("loadu.") || // Added in 3.9
268 Name.starts_with("lzcnt.") || // Added in 5.0
269 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
270 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("movddup") || // Added in 3.9
272 Name.starts_with("move.s") || // Added in 4.0
273 Name.starts_with("movshdup") || // Added in 3.9
274 Name.starts_with("movsldup") || // Added in 3.9
275 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
276 Name.starts_with("or.") || // Added in 3.9
277 Name.starts_with("pabs.") || // Added in 6.0
278 Name.starts_with("packssdw.") || // Added in 5.0
279 Name.starts_with("packsswb.") || // Added in 5.0
280 Name.starts_with("packusdw.") || // Added in 5.0
281 Name.starts_with("packuswb.") || // Added in 5.0
282 Name.starts_with("padd.") || // Added in 4.0
283 Name.starts_with("padds.") || // Added in 8.0
284 Name.starts_with("paddus.") || // Added in 8.0
285 Name.starts_with("palignr.") || // Added in 3.9
286 Name.starts_with("pand.") || // Added in 3.9
287 Name.starts_with("pandn.") || // Added in 3.9
288 Name.starts_with("pavg") || // Added in 6.0
289 Name.starts_with("pbroadcast") || // Added in 6.0
290 Name.starts_with("pcmpeq.") || // Added in 3.9
291 Name.starts_with("pcmpgt.") || // Added in 3.9
292 Name.starts_with("perm.df.") || // Added in 3.9
293 Name.starts_with("perm.di.") || // Added in 3.9
294 Name.starts_with("permvar.") || // Added in 7.0
295 Name.starts_with("pmaddubs.w.") || // Added in 7.0
296 Name.starts_with("pmaddw.d.") || // Added in 7.0
297 Name.starts_with("pmax") || // Added in 4.0
298 Name.starts_with("pmin") || // Added in 4.0
299 Name == "pmov.qd.256" || // Added in 9.0
300 Name == "pmov.qd.512" || // Added in 9.0
301 Name == "pmov.wb.256" || // Added in 9.0
302 Name == "pmov.wb.512" || // Added in 9.0
303 Name.starts_with("pmovsx") || // Added in 4.0
304 Name.starts_with("pmovzx") || // Added in 4.0
305 Name.starts_with("pmul.dq.") || // Added in 4.0
306 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
307 Name.starts_with("pmulh.w.") || // Added in 7.0
308 Name.starts_with("pmulhu.w.") || // Added in 7.0
309 Name.starts_with("pmull.") || // Added in 4.0
310 Name.starts_with("pmultishift.qb.") || // Added in 8.0
311 Name.starts_with("pmulu.dq.") || // Added in 4.0
312 Name.starts_with("por.") || // Added in 3.9
313 Name.starts_with("prol.") || // Added in 8.0
314 Name.starts_with("prolv.") || // Added in 8.0
315 Name.starts_with("pror.") || // Added in 8.0
316 Name.starts_with("prorv.") || // Added in 8.0
317 Name.starts_with("pshuf.b.") || // Added in 4.0
318 Name.starts_with("pshuf.d.") || // Added in 3.9
319 Name.starts_with("pshufh.w.") || // Added in 3.9
320 Name.starts_with("pshufl.w.") || // Added in 3.9
321 Name.starts_with("psll.d") || // Added in 4.0
322 Name.starts_with("psll.q") || // Added in 4.0
323 Name.starts_with("psll.w") || // Added in 4.0
324 Name.starts_with("pslli") || // Added in 4.0
325 Name.starts_with("psllv") || // Added in 4.0
326 Name.starts_with("psra.d") || // Added in 4.0
327 Name.starts_with("psra.q") || // Added in 4.0
328 Name.starts_with("psra.w") || // Added in 4.0
329 Name.starts_with("psrai") || // Added in 4.0
330 Name.starts_with("psrav") || // Added in 4.0
331 Name.starts_with("psrl.d") || // Added in 4.0
332 Name.starts_with("psrl.q") || // Added in 4.0
333 Name.starts_with("psrl.w") || // Added in 4.0
334 Name.starts_with("psrli") || // Added in 4.0
335 Name.starts_with("psrlv") || // Added in 4.0
336 Name.starts_with("psub.") || // Added in 4.0
337 Name.starts_with("psubs.") || // Added in 8.0
338 Name.starts_with("psubus.") || // Added in 8.0
339 Name.starts_with("pternlog.") || // Added in 7.0
340 Name.starts_with("punpckh") || // Added in 3.9
341 Name.starts_with("punpckl") || // Added in 3.9
342 Name.starts_with("pxor.") || // Added in 3.9
343 Name.starts_with("shuf.f") || // Added in 6.0
344 Name.starts_with("shuf.i") || // Added in 6.0
345 Name.starts_with("shuf.p") || // Added in 4.0
346 Name.starts_with("sqrt.p") || // Added in 7.0
347 Name.starts_with("store.b.") || // Added in 3.9
348 Name.starts_with("store.d.") || // Added in 3.9
349 Name.starts_with("store.p") || // Added in 3.9
350 Name.starts_with("store.q.") || // Added in 3.9
351 Name.starts_with("store.w.") || // Added in 3.9
352 Name == "store.ss" || // Added in 7.0
353 Name.starts_with("storeu.") || // Added in 3.9
354 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
355 Name.starts_with("ucmp.") || // Added in 5.0
356 Name.starts_with("unpckh.") || // Added in 3.9
357 Name.starts_with("unpckl.") || // Added in 3.9
358 Name.starts_with("valign.") || // Added in 4.0
359 Name == "vcvtph2ps.128" || // Added in 11.0
360 Name == "vcvtph2ps.256" || // Added in 11.0
361 Name.starts_with("vextract") || // Added in 4.0
362 Name.starts_with("vfmadd.") || // Added in 7.0
363 Name.starts_with("vfmaddsub.") || // Added in 7.0
364 Name.starts_with("vfnmadd.") || // Added in 7.0
365 Name.starts_with("vfnmsub.") || // Added in 7.0
366 Name.starts_with("vpdpbusd.") || // Added in 7.0
367 Name.starts_with("vpdpbusds.") || // Added in 7.0
368 Name.starts_with("vpdpwssd.") || // Added in 7.0
369 Name.starts_with("vpdpwssds.") || // Added in 7.0
370 Name.starts_with("vpermi2var.") || // Added in 7.0
371 Name.starts_with("vpermil.p") || // Added in 3.9
372 Name.starts_with("vpermilvar.") || // Added in 4.0
373 Name.starts_with("vpermt2var.") || // Added in 7.0
374 Name.starts_with("vpmadd52") || // Added in 7.0
375 Name.starts_with("vpshld.") || // Added in 7.0
376 Name.starts_with("vpshldv.") || // Added in 8.0
377 Name.starts_with("vpshrd.") || // Added in 7.0
378 Name.starts_with("vpshrdv.") || // Added in 8.0
379 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
380 Name.starts_with("xor.")); // Added in 3.9
381
382 if (Name.consume_front("mask3."))
383 // 'avx512.mask3.*'
384 return (Name.starts_with("vfmadd.") || // Added in 7.0
385 Name.starts_with("vfmaddsub.") || // Added in 7.0
386 Name.starts_with("vfmsub.") || // Added in 7.0
387 Name.starts_with("vfmsubadd.") || // Added in 7.0
388 Name.starts_with("vfnmsub.")); // Added in 7.0
389
390 if (Name.consume_front("maskz."))
391 // 'avx512.maskz.*'
392 return (Name.starts_with("pternlog.") || // Added in 7.0
393 Name.starts_with("vfmadd.") || // Added in 7.0
394 Name.starts_with("vfmaddsub.") || // Added in 7.0
395 Name.starts_with("vpdpbusd.") || // Added in 7.0
396 Name.starts_with("vpdpbusds.") || // Added in 7.0
397 Name.starts_with("vpdpwssd.") || // Added in 7.0
398 Name.starts_with("vpdpwssds.") || // Added in 7.0
399 Name.starts_with("vpermt2var.") || // Added in 7.0
400 Name.starts_with("vpmadd52") || // Added in 7.0
401 Name.starts_with("vpshldv.") || // Added in 8.0
402 Name.starts_with("vpshrdv.")); // Added in 8.0
403
404 // 'avx512.*'
405 return (Name == "movntdqa" || // Added in 5.0
406 Name == "pmul.dq.512" || // Added in 7.0
407 Name == "pmulu.dq.512" || // Added in 7.0
408 Name.starts_with("broadcastm") || // Added in 6.0
409 Name.starts_with("cmp.p") || // Added in 12.0
410 Name.starts_with("cvtb2mask.") || // Added in 7.0
411 Name.starts_with("cvtd2mask.") || // Added in 7.0
412 Name.starts_with("cvtmask2") || // Added in 5.0
413 Name.starts_with("cvtq2mask.") || // Added in 7.0
414 Name == "cvtusi2sd" || // Added in 7.0
415 Name.starts_with("cvtw2mask.") || // Added in 7.0
416 Name == "kand.w" || // Added in 7.0
417 Name == "kandn.w" || // Added in 7.0
418 Name == "knot.w" || // Added in 7.0
419 Name == "kor.w" || // Added in 7.0
420 Name == "kortestc.w" || // Added in 7.0
421 Name == "kortestz.w" || // Added in 7.0
422 Name.starts_with("kunpck") || // added in 6.0
423 Name == "kxnor.w" || // Added in 7.0
424 Name == "kxor.w" || // Added in 7.0
425 Name.starts_with("padds.") || // Added in 8.0
426 Name.starts_with("pbroadcast") || // Added in 3.9
427 Name.starts_with("prol") || // Added in 8.0
428 Name.starts_with("pror") || // Added in 8.0
429 Name.starts_with("psll.dq") || // Added in 3.9
430 Name.starts_with("psrl.dq") || // Added in 3.9
431 Name.starts_with("psubs.") || // Added in 8.0
432 Name.starts_with("ptestm") || // Added in 6.0
433 Name.starts_with("ptestnm") || // Added in 6.0
434 Name.starts_with("storent.") || // Added in 3.9
435 Name.starts_with("vbroadcast.s") || // Added in 7.0
436 Name.starts_with("vpshld.") || // Added in 8.0
437 Name.starts_with("vpshrd.")); // Added in 8.0
438 }
439
440 if (Name.consume_front("fma."))
441 return (Name.starts_with("vfmadd.") || // Added in 7.0
442 Name.starts_with("vfmsub.") || // Added in 7.0
443 Name.starts_with("vfmsubadd.") || // Added in 7.0
444 Name.starts_with("vfnmadd.") || // Added in 7.0
445 Name.starts_with("vfnmsub.")); // Added in 7.0
446
447 if (Name.consume_front("fma4."))
448 return Name.starts_with("vfmadd.s"); // Added in 7.0
449
450 if (Name.consume_front("sse."))
451 return (Name == "add.ss" || // Added in 4.0
452 Name == "cvtsi2ss" || // Added in 7.0
453 Name == "cvtsi642ss" || // Added in 7.0
454 Name == "div.ss" || // Added in 4.0
455 Name == "mul.ss" || // Added in 4.0
456 Name.starts_with("sqrt.p") || // Added in 7.0
457 Name == "sqrt.ss" || // Added in 7.0
458 Name.starts_with("storeu.") || // Added in 3.9
459 Name == "sub.ss"); // Added in 4.0
460
461 if (Name.consume_front("sse2."))
462 return (Name == "add.sd" || // Added in 4.0
463 Name == "cvtdq2pd" || // Added in 3.9
464 Name == "cvtdq2ps" || // Added in 7.0
465 Name == "cvtps2pd" || // Added in 3.9
466 Name == "cvtsi2sd" || // Added in 7.0
467 Name == "cvtsi642sd" || // Added in 7.0
468 Name == "cvtss2sd" || // Added in 7.0
469 Name == "div.sd" || // Added in 4.0
470 Name == "mul.sd" || // Added in 4.0
471 Name.starts_with("padds.") || // Added in 8.0
472 Name.starts_with("paddus.") || // Added in 8.0
473 Name.starts_with("pcmpeq.") || // Added in 3.1
474 Name.starts_with("pcmpgt.") || // Added in 3.1
475 Name == "pmaxs.w" || // Added in 3.9
476 Name == "pmaxu.b" || // Added in 3.9
477 Name == "pmins.w" || // Added in 3.9
478 Name == "pminu.b" || // Added in 3.9
479 Name == "pmulu.dq" || // Added in 7.0
480 Name.starts_with("pshuf") || // Added in 3.9
481 Name.starts_with("psll.dq") || // Added in 3.7
482 Name.starts_with("psrl.dq") || // Added in 3.7
483 Name.starts_with("psubs.") || // Added in 8.0
484 Name.starts_with("psubus.") || // Added in 8.0
485 Name.starts_with("sqrt.p") || // Added in 7.0
486 Name == "sqrt.sd" || // Added in 7.0
487 Name == "storel.dq" || // Added in 3.9
488 Name.starts_with("storeu.") || // Added in 3.9
489 Name == "sub.sd"); // Added in 4.0
490
491 if (Name.consume_front("sse41."))
492 return (Name.starts_with("blendp") || // Added in 3.7
493 Name == "movntdqa" || // Added in 5.0
494 Name == "pblendw" || // Added in 3.7
495 Name == "pmaxsb" || // Added in 3.9
496 Name == "pmaxsd" || // Added in 3.9
497 Name == "pmaxud" || // Added in 3.9
498 Name == "pmaxuw" || // Added in 3.9
499 Name == "pminsb" || // Added in 3.9
500 Name == "pminsd" || // Added in 3.9
501 Name == "pminud" || // Added in 3.9
502 Name == "pminuw" || // Added in 3.9
503 Name.starts_with("pmovsx") || // Added in 3.8
504 Name.starts_with("pmovzx") || // Added in 3.9
505 Name == "pmuldq"); // Added in 7.0
506
507 if (Name.consume_front("sse42."))
508 return Name == "crc32.64.8"; // Added in 3.4
509
510 if (Name.consume_front("sse4a."))
511 return Name.starts_with("movnt."); // Added in 3.9
512
513 if (Name.consume_front("ssse3."))
514 return (Name == "pabs.b.128" || // Added in 6.0
515 Name == "pabs.d.128" || // Added in 6.0
516 Name == "pabs.w.128"); // Added in 6.0
517
518 if (Name.consume_front("xop."))
519 return (Name == "vpcmov" || // Added in 3.8
520 Name == "vpcmov.256" || // Added in 5.0
521 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
522 Name.starts_with("vprot")); // Added in 8.0
523
524 return (Name == "addcarry.u32" || // Added in 8.0
525 Name == "addcarry.u64" || // Added in 8.0
526 Name == "addcarryx.u32" || // Added in 8.0
527 Name == "addcarryx.u64" || // Added in 8.0
528 Name == "subborrow.u32" || // Added in 8.0
529 Name == "subborrow.u64" || // Added in 8.0
530 Name.starts_with("vcvtph2ps.")); // Added in 11.0
531}
532
534 Function *&NewFn) {
535 // Only handle intrinsics that start with "x86.".
536 if (!Name.consume_front("x86."))
537 return false;
538
539 if (shouldUpgradeX86Intrinsic(F, Name)) {
540 NewFn = nullptr;
541 return true;
542 }
543
544 if (Name == "rdtscp") { // Added in 8.0
545 // If this intrinsic has 0 operands, it's the new version.
546 if (F->getFunctionType()->getNumParams() == 0)
547 return false;
548
549 rename(F);
550 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
551 Intrinsic::x86_rdtscp);
552 return true;
553 }
554
556
557 // SSE4.1 ptest functions may have an old signature.
558 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
560 .Case("c", Intrinsic::x86_sse41_ptestc)
561 .Case("z", Intrinsic::x86_sse41_ptestz)
562 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
565 return upgradePTESTIntrinsic(F, ID, NewFn);
566
567 return false;
568 }
569
570 // Several blend and other instructions with masks used the wrong number of
571 // bits.
572
573 // Added in 3.6
575 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
576 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
577 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
578 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
579 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
580 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
583 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
584
585 if (Name.consume_front("avx512.")) {
586 if (Name.consume_front("mask.cmp.")) {
587 // Added in 7.0
589 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
590 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
591 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
592 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
593 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
594 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
597 return upgradeX86MaskedFPCompare(F, ID, NewFn);
598 } else if (Name.starts_with("vpdpbusd.") ||
599 Name.starts_with("vpdpbusds.")) {
600 // Added in 21.1
602 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
603 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
604 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
605 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
606 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
607 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
610 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
611 } else if (Name.starts_with("vpdpwssd.") ||
612 Name.starts_with("vpdpwssds.")) {
613 // Added in 21.1
615 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
616 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
617 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
618 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
619 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
620 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
623 return upgradeX86MultiplyAddWords(F, ID, NewFn);
624 }
625 return false; // No other 'x86.avx512.*'.
626 }
627
628 if (Name.consume_front("avx2.")) {
629 if (Name.consume_front("vpdpb")) {
630 // Added in 21.1
632 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
633 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
634 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
635 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
636 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
637 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
638 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
639 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
640 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
641 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
642 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
643 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
646 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
647 } else if (Name.consume_front("vpdpw")) {
648 // Added in 21.1
650 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
651 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
652 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
653 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
654 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
655 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
656 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
657 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
658 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
659 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
660 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
661 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
664 return upgradeX86MultiplyAddWords(F, ID, NewFn);
665 }
666 return false; // No other 'x86.avx2.*'
667 }
668
669 if (Name.consume_front("avx10.")) {
670 if (Name.consume_front("vpdpb")) {
671 // Added in 21.1
673 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
674 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
675 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
676 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
677 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
678 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
681 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
682 } else if (Name.consume_front("vpdpw")) {
684 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
685 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
686 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
687 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
692 return upgradeX86MultiplyAddWords(F, ID, NewFn);
693 }
694 return false; // No other 'x86.avx10.*'
695 }
696
697 if (Name.consume_front("avx512bf16.")) {
698 // Added in 9.0
700 .Case("cvtne2ps2bf16.128",
701 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
702 .Case("cvtne2ps2bf16.256",
703 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
704 .Case("cvtne2ps2bf16.512",
705 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
706 .Case("mask.cvtneps2bf16.128",
707 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
708 .Case("cvtneps2bf16.256",
709 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
710 .Case("cvtneps2bf16.512",
711 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
714 return upgradeX86BF16Intrinsic(F, ID, NewFn);
715
716 // Added in 9.0
718 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
719 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
720 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
723 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
724 return false; // No other 'x86.avx512bf16.*'.
725 }
726
727 if (Name.consume_front("xop.")) {
729 if (Name.starts_with("vpermil2")) { // Added in 3.9
730 // Upgrade any XOP PERMIL2 index operand still using a float/double
731 // vector.
732 auto Idx = F->getFunctionType()->getParamType(2);
733 if (Idx->isFPOrFPVectorTy()) {
734 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
735 unsigned EltSize = Idx->getScalarSizeInBits();
736 if (EltSize == 64 && IdxSize == 128)
737 ID = Intrinsic::x86_xop_vpermil2pd;
738 else if (EltSize == 32 && IdxSize == 128)
739 ID = Intrinsic::x86_xop_vpermil2ps;
740 else if (EltSize == 64 && IdxSize == 256)
741 ID = Intrinsic::x86_xop_vpermil2pd_256;
742 else
743 ID = Intrinsic::x86_xop_vpermil2ps_256;
744 }
745 } else if (F->arg_size() == 2)
746 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
748 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
749 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
751
753 rename(F);
754 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
755 return true;
756 }
757 return false; // No other 'x86.xop.*'
758 }
759
760 if (Name == "seh.recoverfp") {
761 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
762 Intrinsic::eh_recoverfp);
763 return true;
764 }
765
766 return false;
767}
768
769// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
770// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
772 StringRef Name,
773 Function *&NewFn) {
774 if (Name.starts_with("rbit")) {
775 // '(arm|aarch64).rbit'.
777 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
778 return true;
779 }
780
781 if (Name == "thread.pointer") {
782 // '(arm|aarch64).thread.pointer'.
784 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
785 return true;
786 }
787
788 bool Neon = Name.consume_front("neon.");
789 if (Neon) {
790 // '(arm|aarch64).neon.*'.
791 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
792 // v16i8 respectively.
793 if (Name.consume_front("bfdot.")) {
794 // (arm|aarch64).neon.bfdot.*'.
797 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
798 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
799 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
802 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
803 assert((OperandWidth == 64 || OperandWidth == 128) &&
804 "Unexpected operand width");
805 LLVMContext &Ctx = F->getParent()->getContext();
806 std::array<Type *, 2> Tys{
807 {F->getReturnType(),
808 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
809 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
810 return true;
811 }
812 return false; // No other '(arm|aarch64).neon.bfdot.*'.
813 }
814
815 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
816 // anymore and accept v8bf16 instead of v16i8.
817 if (Name.consume_front("bfm")) {
818 // (arm|aarch64).neon.bfm*'.
819 if (Name.consume_back(".v4f32.v16i8")) {
820 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
823 .Case("mla",
824 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
825 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
826 .Case("lalb",
827 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
828 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
829 .Case("lalt",
830 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
831 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
834 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
835 return true;
836 }
837 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
838 }
839 return false; // No other '(arm|aarch64).neon.bfm*.
840 }
841 // Continue on to Aarch64 Neon or Arm Neon.
842 }
843 // Continue on to Arm or Aarch64.
844
845 if (IsArm) {
846 // 'arm.*'.
847 if (Neon) {
848 // 'arm.neon.*'.
850 .StartsWith("vclz.", Intrinsic::ctlz)
851 .StartsWith("vcnt.", Intrinsic::ctpop)
852 .StartsWith("vqadds.", Intrinsic::sadd_sat)
853 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
854 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
855 .StartsWith("vqsubu.", Intrinsic::usub_sat)
856 .StartsWith("vrinta.", Intrinsic::round)
857 .StartsWith("vrintn.", Intrinsic::roundeven)
858 .StartsWith("vrintm.", Intrinsic::floor)
859 .StartsWith("vrintp.", Intrinsic::ceil)
860 .StartsWith("vrintx.", Intrinsic::rint)
861 .StartsWith("vrintz.", Intrinsic::trunc)
864 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
865 F->arg_begin()->getType());
866 return true;
867 }
868
869 if (Name.consume_front("vst")) {
870 // 'arm.neon.vst*'.
871 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
873 if (vstRegex.match(Name, &Groups)) {
874 static const Intrinsic::ID StoreInts[] = {
875 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
876 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
877
878 static const Intrinsic::ID StoreLaneInts[] = {
879 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
880 Intrinsic::arm_neon_vst4lane};
881
882 auto fArgs = F->getFunctionType()->params();
883 Type *Tys[] = {fArgs[0], fArgs[1]};
884 if (Groups[1].size() == 1)
886 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
887 else
889 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
890 return true;
891 }
892 return false; // No other 'arm.neon.vst*'.
893 }
894
895 return false; // No other 'arm.neon.*'.
896 }
897
898 if (Name.consume_front("mve.")) {
899 // 'arm.mve.*'.
900 if (Name == "vctp64") {
901 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
902 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
903 // the function and deal with it below in UpgradeIntrinsicCall.
904 rename(F);
905 return true;
906 }
907 return false; // Not 'arm.mve.vctp64'.
908 }
909
910 if (Name.starts_with("vrintn.v")) {
912 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
913 return true;
914 }
915
916 // These too are changed to accept a v2i1 instead of the old v4i1.
917 if (Name.consume_back(".v4i1")) {
918 // 'arm.mve.*.v4i1'.
919 if (Name.consume_back(".predicated.v2i64.v4i32"))
920 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
921 return Name == "mull.int" || Name == "vqdmull";
922
923 if (Name.consume_back(".v2i64")) {
924 // 'arm.mve.*.v2i64.v4i1'
925 bool IsGather = Name.consume_front("vldr.gather.");
926 if (IsGather || Name.consume_front("vstr.scatter.")) {
927 if (Name.consume_front("base.")) {
928 // Optional 'wb.' prefix.
929 Name.consume_front("wb.");
930 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
931 // predicated.v2i64.v2i64.v4i1'.
932 return Name == "predicated.v2i64";
933 }
934
935 if (Name.consume_front("offset.predicated."))
936 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
937 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
938
939 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
940 return false;
941 }
942
943 return false; // No other 'arm.mve.*.v2i64.v4i1'.
944 }
945 return false; // No other 'arm.mve.*.v4i1'.
946 }
947 return false; // No other 'arm.mve.*'.
948 }
949
950 if (Name.consume_front("cde.vcx")) {
951 // 'arm.cde.vcx*'.
952 if (Name.consume_back(".predicated.v2i64.v4i1"))
953 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
954 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
955 Name == "3q" || Name == "3qa";
956
957 return false; // No other 'arm.cde.vcx*'.
958 }
959 } else {
960 // 'aarch64.*'.
961 if (Neon) {
962 // 'aarch64.neon.*'.
964 .StartsWith("frintn", Intrinsic::roundeven)
965 .StartsWith("rbit", Intrinsic::bitreverse)
968 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
969 F->arg_begin()->getType());
970 return true;
971 }
972
973 if (Name.starts_with("addp")) {
974 // 'aarch64.neon.addp*'.
975 if (F->arg_size() != 2)
976 return false; // Invalid IR.
977 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
978 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
980 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
981 return true;
982 }
983 }
984
985 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
986 if (Name.starts_with("bfcvt")) {
987 NewFn = nullptr;
988 return true;
989 }
990
991 return false; // No other 'aarch64.neon.*'.
992 }
993 if (Name.consume_front("sve.")) {
994 // 'aarch64.sve.*'.
995 if (Name.consume_front("bf")) {
996 if (Name.consume_back(".lane")) {
997 // 'aarch64.sve.bf*.lane'.
1000 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1001 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1002 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1005 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1006 return true;
1007 }
1008 return false; // No other 'aarch64.sve.bf*.lane'.
1009 }
1010 return false; // No other 'aarch64.sve.bf*'.
1011 }
1012
1013 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1014 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1015 NewFn = nullptr;
1016 return true;
1017 }
1018
1019 if (Name.consume_front("addqv")) {
1020 // 'aarch64.sve.addqv'.
1021 if (!F->getReturnType()->isFPOrFPVectorTy())
1022 return false;
1023
1024 auto Args = F->getFunctionType()->params();
1025 Type *Tys[] = {F->getReturnType(), Args[1]};
1027 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1028 return true;
1029 }
1030
1031 if (Name.consume_front("ld")) {
1032 // 'aarch64.sve.ld*'.
1033 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1034 if (LdRegex.match(Name)) {
1035 Type *ScalarTy =
1036 cast<VectorType>(F->getReturnType())->getElementType();
1037 ElementCount EC =
1038 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1039 Type *Ty = VectorType::get(ScalarTy, EC);
1040 static const Intrinsic::ID LoadIDs[] = {
1041 Intrinsic::aarch64_sve_ld2_sret,
1042 Intrinsic::aarch64_sve_ld3_sret,
1043 Intrinsic::aarch64_sve_ld4_sret,
1044 };
1045 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1046 LoadIDs[Name[0] - '2'], Ty);
1047 return true;
1048 }
1049 return false; // No other 'aarch64.sve.ld*'.
1050 }
1051
1052 if (Name.consume_front("tuple.")) {
1053 // 'aarch64.sve.tuple.*'.
1054 if (Name.starts_with("get")) {
1055 // 'aarch64.sve.tuple.get*'.
1056 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1058 F->getParent(), Intrinsic::vector_extract, Tys);
1059 return true;
1060 }
1061
1062 if (Name.starts_with("set")) {
1063 // 'aarch64.sve.tuple.set*'.
1064 auto Args = F->getFunctionType()->params();
1065 Type *Tys[] = {Args[0], Args[2], Args[1]};
1067 F->getParent(), Intrinsic::vector_insert, Tys);
1068 return true;
1069 }
1070
1071 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1072 if (CreateTupleRegex.match(Name)) {
1073 // 'aarch64.sve.tuple.create*'.
1074 auto Args = F->getFunctionType()->params();
1075 Type *Tys[] = {F->getReturnType(), Args[1]};
1077 F->getParent(), Intrinsic::vector_insert, Tys);
1078 return true;
1079 }
1080 return false; // No other 'aarch64.sve.tuple.*'.
1081 }
1082
1083 if (Name.starts_with("rev.nxv")) {
1084 // 'aarch64.sve.rev.<Ty>'
1086 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1087 return true;
1088 }
1089
1090 return false; // No other 'aarch64.sve.*'.
1091 }
1092 }
1093 return false; // No other 'arm.*', 'aarch64.*'.
1094}
1095
1097 StringRef Name) {
1098 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1101 .Case("im2col.3d",
1102 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1103 .Case("im2col.4d",
1104 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1105 .Case("im2col.5d",
1106 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1107 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1108 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1109 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1110 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1111 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1113
1115 return ID;
1116
1117 // These intrinsics may need upgrade for two reasons:
1118 // (1) When the address-space of the first argument is shared[AS=3]
1119 // (and we upgrade it to use shared_cluster address-space[AS=7])
1120 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1122 return ID;
1123
1124 // (2) When there are only two boolean flag arguments at the end:
1125 //
1126 // The last three parameters of the older version of these
1127 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1128 //
1129 // The newer version reads as:
1130 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1131 //
1132 // So, when the type of the [N-3]rd argument is "not i1", then
1133 // it is the older version and we need to upgrade.
1134 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1135 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1136 if (!ArgType->isIntegerTy(1))
1137 return ID;
1138 }
1139
1141}
1142
1144 StringRef Name) {
1145 if (Name.consume_front("mapa.shared.cluster"))
1146 if (F->getReturnType()->getPointerAddressSpace() ==
1148 return Intrinsic::nvvm_mapa_shared_cluster;
1149
1150 if (Name.consume_front("cp.async.bulk.")) {
1153 .Case("global.to.shared.cluster",
1154 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1155 .Case("shared.cta.to.cluster",
1156 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1158
1160 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1162 return ID;
1163 }
1164
1166}
1167
1169 if (Name.consume_front("fma.rn."))
1170 return StringSwitch<Intrinsic::ID>(Name)
1171 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1172 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1173 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1174 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1175 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1176 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1177 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1178 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1179 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1180 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1181 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1182 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1184
1185 if (Name.consume_front("fmax."))
1186 return StringSwitch<Intrinsic::ID>(Name)
1187 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1188 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1189 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1190 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1191 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1192 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1193 .Case("ftz.nan.xorsign.abs.bf16",
1194 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1195 .Case("ftz.nan.xorsign.abs.bf16x2",
1196 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1197 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1198 .Case("ftz.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1200 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1201 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1202 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1203 .Case("nan.xorsign.abs.bf16x2",
1204 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1205 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1206 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1208
1209 if (Name.consume_front("fmin."))
1210 return StringSwitch<Intrinsic::ID>(Name)
1211 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1212 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1213 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1214 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1215 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1216 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1217 .Case("ftz.nan.xorsign.abs.bf16",
1218 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1219 .Case("ftz.nan.xorsign.abs.bf16x2",
1220 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1221 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1222 .Case("ftz.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1224 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1225 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1226 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1227 .Case("nan.xorsign.abs.bf16x2",
1228 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1229 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1230 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1232
1233 if (Name.consume_front("neg."))
1234 return StringSwitch<Intrinsic::ID>(Name)
1235 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1236 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1238
1240}
1241
1243 return Name.consume_front("local") || Name.consume_front("shared") ||
1244 Name.consume_front("global") || Name.consume_front("constant") ||
1245 Name.consume_front("param");
1246}
1247
1249 bool CanUpgradeDebugIntrinsicsToRecords) {
1250 assert(F && "Illegal to upgrade a non-existent Function.");
1251
1252 StringRef Name = F->getName();
1253
1254 // Quickly eliminate it, if it's not a candidate.
1255 if (!Name.consume_front("llvm.") || Name.empty())
1256 return false;
1257
1258 switch (Name[0]) {
1259 default: break;
1260 case 'a': {
1261 bool IsArm = Name.consume_front("arm.");
1262 if (IsArm || Name.consume_front("aarch64.")) {
1263 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1264 return true;
1265 break;
1266 }
1267
1268 if (Name.consume_front("amdgcn.")) {
1269 if (Name == "alignbit") {
1270 // Target specific intrinsic became redundant
1272 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1273 return true;
1274 }
1275
1276 if (Name.consume_front("atomic.")) {
1277 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1278 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1279 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1280 // and usub_sat so there's no new declaration.
1281 NewFn = nullptr;
1282 return true;
1283 }
1284 break; // No other 'amdgcn.atomic.*'
1285 }
1286
1287 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1288 Name.consume_front("flat.atomic.")) {
1289 if (Name.starts_with("fadd") ||
1290 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1291 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1292 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1293 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1294 // declaration.
1295 NewFn = nullptr;
1296 return true;
1297 }
1298 }
1299
1300 if (Name.starts_with("ldexp.")) {
1301 // Target specific intrinsic became redundant
1303 F->getParent(), Intrinsic::ldexp,
1304 {F->getReturnType(), F->getArg(1)->getType()});
1305 return true;
1306 }
1307 break; // No other 'amdgcn.*'
1308 }
1309
1310 break;
1311 }
1312 case 'c': {
1313 if (F->arg_size() == 1) {
1315 .StartsWith("ctlz.", Intrinsic::ctlz)
1316 .StartsWith("cttz.", Intrinsic::cttz)
1319 rename(F);
1320 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1321 F->arg_begin()->getType());
1322 return true;
1323 }
1324 }
1325
1326 if (F->arg_size() == 2 && Name == "coro.end") {
1327 rename(F);
1328 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1329 Intrinsic::coro_end);
1330 return true;
1331 }
1332
1333 break;
1334 }
1335 case 'd':
1336 if (Name.consume_front("dbg.")) {
1337 // Mark debug intrinsics for upgrade to new debug format.
1338 if (CanUpgradeDebugIntrinsicsToRecords) {
1339 if (Name == "addr" || Name == "value" || Name == "assign" ||
1340 Name == "declare" || Name == "label") {
1341 // There's no function to replace these with.
1342 NewFn = nullptr;
1343 // But we do want these to get upgraded.
1344 return true;
1345 }
1346 }
1347 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1348 // converted to DbgVariableRecords later.
1349 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1350 rename(F);
1351 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1352 Intrinsic::dbg_value);
1353 return true;
1354 }
1355 break; // No other 'dbg.*'.
1356 }
1357 break;
1358 case 'e':
1359 if (Name.consume_front("experimental.vector.")) {
1362 // Skip over extract.last.active, otherwise it will be 'upgraded'
1363 // to a regular vector extract which is a different operation.
1364 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1365 .StartsWith("extract.", Intrinsic::vector_extract)
1366 .StartsWith("insert.", Intrinsic::vector_insert)
1367 .StartsWith("splice.", Intrinsic::vector_splice)
1368 .StartsWith("reverse.", Intrinsic::vector_reverse)
1369 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1370 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1371 .StartsWith("partial.reduce.add",
1372 Intrinsic::vector_partial_reduce_add)
1375 const auto *FT = F->getFunctionType();
1377 if (ID == Intrinsic::vector_extract ||
1378 ID == Intrinsic::vector_interleave2)
1379 // Extracting overloads the return type.
1380 Tys.push_back(FT->getReturnType());
1381 if (ID != Intrinsic::vector_interleave2)
1382 Tys.push_back(FT->getParamType(0));
1383 if (ID == Intrinsic::vector_insert ||
1384 ID == Intrinsic::vector_partial_reduce_add)
1385 // Inserting overloads the inserted type.
1386 Tys.push_back(FT->getParamType(1));
1387 rename(F);
1388 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1389 return true;
1390 }
1391
1392 if (Name.consume_front("reduce.")) {
1394 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1395 if (R.match(Name, &Groups))
1397 .Case("add", Intrinsic::vector_reduce_add)
1398 .Case("mul", Intrinsic::vector_reduce_mul)
1399 .Case("and", Intrinsic::vector_reduce_and)
1400 .Case("or", Intrinsic::vector_reduce_or)
1401 .Case("xor", Intrinsic::vector_reduce_xor)
1402 .Case("smax", Intrinsic::vector_reduce_smax)
1403 .Case("smin", Intrinsic::vector_reduce_smin)
1404 .Case("umax", Intrinsic::vector_reduce_umax)
1405 .Case("umin", Intrinsic::vector_reduce_umin)
1406 .Case("fmax", Intrinsic::vector_reduce_fmax)
1407 .Case("fmin", Intrinsic::vector_reduce_fmin)
1409
1410 bool V2 = false;
1412 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1413 Groups.clear();
1414 V2 = true;
1415 if (R2.match(Name, &Groups))
1417 .Case("fadd", Intrinsic::vector_reduce_fadd)
1418 .Case("fmul", Intrinsic::vector_reduce_fmul)
1420 }
1422 rename(F);
1423 auto Args = F->getFunctionType()->params();
1424 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1425 {Args[V2 ? 1 : 0]});
1426 return true;
1427 }
1428 break; // No other 'expermental.vector.reduce.*'.
1429 }
1430 break; // No other 'experimental.vector.*'.
1431 }
1432 if (Name.consume_front("experimental.stepvector.")) {
1433 Intrinsic::ID ID = Intrinsic::stepvector;
1434 rename(F);
1436 F->getParent(), ID, F->getFunctionType()->getReturnType());
1437 return true;
1438 }
1439 break; // No other 'e*'.
1440 case 'f':
1441 if (Name.starts_with("flt.rounds")) {
1442 rename(F);
1443 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1444 Intrinsic::get_rounding);
1445 return true;
1446 }
1447 break;
1448 case 'i':
1449 if (Name.starts_with("invariant.group.barrier")) {
1450 // Rename invariant.group.barrier to launder.invariant.group
1451 auto Args = F->getFunctionType()->params();
1452 Type* ObjectPtr[1] = {Args[0]};
1453 rename(F);
1455 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1456 return true;
1457 }
1458 break;
1459 case 'l':
1460 if ((Name.starts_with("lifetime.start") ||
1461 Name.starts_with("lifetime.end")) &&
1462 F->arg_size() == 2) {
1463 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1464 ? Intrinsic::lifetime_start
1465 : Intrinsic::lifetime_end;
1466 rename(F);
1467 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1468 F->getArg(0)->getType());
1469 return true;
1470 }
1471 break;
1472 case 'm': {
1473 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1474 // alignment parameter to embedding the alignment as an attribute of
1475 // the pointer args.
1476 if (unsigned ID = StringSwitch<unsigned>(Name)
1477 .StartsWith("memcpy.", Intrinsic::memcpy)
1478 .StartsWith("memmove.", Intrinsic::memmove)
1479 .Default(0)) {
1480 if (F->arg_size() == 5) {
1481 rename(F);
1482 // Get the types of dest, src, and len
1483 ArrayRef<Type *> ParamTypes =
1484 F->getFunctionType()->params().slice(0, 3);
1485 NewFn =
1486 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1487 return true;
1488 }
1489 }
1490 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1491 rename(F);
1492 // Get the types of dest, and len
1493 const auto *FT = F->getFunctionType();
1494 Type *ParamTypes[2] = {
1495 FT->getParamType(0), // Dest
1496 FT->getParamType(2) // len
1497 };
1498 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1499 Intrinsic::memset, ParamTypes);
1500 return true;
1501 }
1502
1503 unsigned MaskedID =
1505 .StartsWith("masked.load", Intrinsic::masked_load)
1506 .StartsWith("masked.gather", Intrinsic::masked_gather)
1507 .StartsWith("masked.store", Intrinsic::masked_store)
1508 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1509 .Default(0);
1510 if (MaskedID && F->arg_size() == 4) {
1511 rename(F);
1512 if (MaskedID == Intrinsic::masked_load ||
1513 MaskedID == Intrinsic::masked_gather) {
1515 F->getParent(), MaskedID,
1516 {F->getReturnType(), F->getArg(0)->getType()});
1517 return true;
1518 }
1520 F->getParent(), MaskedID,
1521 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1522 return true;
1523 }
1524 break;
1525 }
1526 case 'n': {
1527 if (Name.consume_front("nvvm.")) {
1528 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1529 if (F->arg_size() == 1) {
1530 Intrinsic::ID IID =
1532 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1533 .Case("clz.i", Intrinsic::ctlz)
1534 .Case("popc.i", Intrinsic::ctpop)
1536 if (IID != Intrinsic::not_intrinsic) {
1537 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1538 {F->getReturnType()});
1539 return true;
1540 }
1541 }
1542
1543 // Check for nvvm intrinsics that need a return type adjustment.
1544 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1546 if (IID != Intrinsic::not_intrinsic) {
1547 NewFn = nullptr;
1548 return true;
1549 }
1550 }
1551
1552 // Upgrade Distributed Shared Memory Intrinsics
1554 if (IID != Intrinsic::not_intrinsic) {
1555 rename(F);
1556 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1557 return true;
1558 }
1559
1560 // Upgrade TMA copy G2S Intrinsics
1562 if (IID != Intrinsic::not_intrinsic) {
1563 rename(F);
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1565 return true;
1566 }
1567
1568 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1569 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1570 //
1571 // TODO: We could add lohi.i2d.
1572 bool Expand = false;
1573 if (Name.consume_front("abs."))
1574 // nvvm.abs.{i,ii}
1575 Expand =
1576 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1577 else if (Name.consume_front("fabs."))
1578 // nvvm.fabs.{f,ftz.f,d}
1579 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1580 else if (Name.consume_front("ex2.approx."))
1581 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1582 Expand =
1583 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1584 else if (Name.consume_front("max.") || Name.consume_front("min."))
1585 // nvvm.{min,max}.{i,ii,ui,ull}
1586 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1587 Name == "ui" || Name == "ull";
1588 else if (Name.consume_front("atomic.load."))
1589 // nvvm.atomic.load.add.{f32,f64}.p
1590 // nvvm.atomic.load.{inc,dec}.32.p
1591 Expand = StringSwitch<bool>(Name)
1592 .StartsWith("add.f32.p", true)
1593 .StartsWith("add.f64.p", true)
1594 .StartsWith("inc.32.p", true)
1595 .StartsWith("dec.32.p", true)
1596 .Default(false);
1597 else if (Name.consume_front("bitcast."))
1598 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1599 Expand =
1600 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1601 else if (Name.consume_front("rotate."))
1602 // nvvm.rotate.{b32,b64,right.b64}
1603 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1604 else if (Name.consume_front("ptr.gen.to."))
1605 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1606 Expand = consumeNVVMPtrAddrSpace(Name);
1607 else if (Name.consume_front("ptr."))
1608 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1609 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1610 else if (Name.consume_front("ldg.global."))
1611 // nvvm.ldg.global.{i,p,f}
1612 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1613 Name.starts_with("p."));
1614 else
1615 Expand = StringSwitch<bool>(Name)
1616 .Case("barrier0", true)
1617 .Case("barrier.n", true)
1618 .Case("barrier.sync.cnt", true)
1619 .Case("barrier.sync", true)
1620 .Case("barrier", true)
1621 .Case("bar.sync", true)
1622 .Case("barrier0.popc", true)
1623 .Case("barrier0.and", true)
1624 .Case("barrier0.or", true)
1625 .Case("clz.ll", true)
1626 .Case("popc.ll", true)
1627 .Case("h2f", true)
1628 .Case("swap.lo.hi.b64", true)
1629 .Case("tanh.approx.f32", true)
1630 .Default(false);
1631
1632 if (Expand) {
1633 NewFn = nullptr;
1634 return true;
1635 }
1636 break; // No other 'nvvm.*'.
1637 }
1638 break;
1639 }
1640 case 'o':
1641 if (Name.starts_with("objectsize.")) {
1642 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1643 if (F->arg_size() == 2 || F->arg_size() == 3) {
1644 rename(F);
1645 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1646 Intrinsic::objectsize, Tys);
1647 return true;
1648 }
1649 }
1650 break;
1651
1652 case 'p':
1653 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1654 rename(F);
1656 F->getParent(), Intrinsic::ptr_annotation,
1657 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1658 return true;
1659 }
1660 break;
1661
1662 case 'r': {
1663 if (Name.consume_front("riscv.")) {
1666 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1667 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1668 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1669 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1672 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1673 rename(F);
1674 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1675 return true;
1676 }
1677 break; // No other applicable upgrades.
1678 }
1679
1681 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1682 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1685 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1686 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1687 rename(F);
1688 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1689 return true;
1690 }
1691 break; // No other applicable upgrades.
1692 }
1693
1695 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1696 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1697 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1698 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1699 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1700 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1703 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1704 rename(F);
1705 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1706 return true;
1707 }
1708 break; // No other applicable upgrades.
1709 }
1710 break; // No other 'riscv.*' intrinsics
1711 }
1712 } break;
1713
1714 case 's':
1715 if (Name == "stackprotectorcheck") {
1716 NewFn = nullptr;
1717 return true;
1718 }
1719 break;
1720
1721 case 't':
1722 if (Name == "thread.pointer") {
1724 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1725 return true;
1726 }
1727 break;
1728
1729 case 'v': {
1730 if (Name == "var.annotation" && F->arg_size() == 4) {
1731 rename(F);
1733 F->getParent(), Intrinsic::var_annotation,
1734 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1735 return true;
1736 }
1737 break;
1738 }
1739
1740 case 'w':
1741 if (Name.consume_front("wasm.")) {
1744 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1745 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1746 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1749 rename(F);
1750 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1751 F->getReturnType());
1752 return true;
1753 }
1754
1755 if (Name.consume_front("dot.i8x16.i7x16.")) {
1757 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1758 .Case("add.signed",
1759 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1762 rename(F);
1763 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1764 return true;
1765 }
1766 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1767 }
1768 break; // No other 'wasm.*'.
1769 }
1770 break;
1771
1772 case 'x':
1773 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1774 return true;
1775 }
1776
1777 auto *ST = dyn_cast<StructType>(F->getReturnType());
1778 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1779 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1780 // Replace return type with literal non-packed struct. Only do this for
1781 // intrinsics declared to return a struct, not for intrinsics with
1782 // overloaded return type, in which case the exact struct type will be
1783 // mangled into the name.
1786 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1787 auto *FT = F->getFunctionType();
1788 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1789 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1790 std::string Name = F->getName().str();
1791 rename(F);
1792 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1793 Name, F->getParent());
1794
1795 // The new function may also need remangling.
1796 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1797 NewFn = *Result;
1798 return true;
1799 }
1800 }
1801
1802 // Remangle our intrinsic since we upgrade the mangling
1804 if (Result != std::nullopt) {
1805 NewFn = *Result;
1806 return true;
1807 }
1808
1809 // This may not belong here. This function is effectively being overloaded
1810 // to both detect an intrinsic which needs upgrading, and to provide the
1811 // upgraded form of the intrinsic. We should perhaps have two separate
1812 // functions for this.
1813 return false;
1814}
1815
1817 bool CanUpgradeDebugIntrinsicsToRecords) {
1818 NewFn = nullptr;
1819 bool Upgraded =
1820 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1821
1822 // Upgrade intrinsic attributes. This does not change the function.
1823 if (NewFn)
1824 F = NewFn;
1825 if (Intrinsic::ID id = F->getIntrinsicID()) {
1826 // Only do this if the intrinsic signature is valid.
1827 SmallVector<Type *> OverloadTys;
1828 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1829 F->setAttributes(
1830 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1831 }
1832 return Upgraded;
1833}
1834
1836 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1837 GV->getName() == "llvm.global_dtors")) ||
1838 !GV->hasInitializer())
1839 return nullptr;
1841 if (!ATy)
1842 return nullptr;
1844 if (!STy || STy->getNumElements() != 2)
1845 return nullptr;
1846
1847 LLVMContext &C = GV->getContext();
1848 IRBuilder<> IRB(C);
1849 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1850 IRB.getPtrTy());
1851 Constant *Init = GV->getInitializer();
1852 unsigned N = Init->getNumOperands();
1853 std::vector<Constant *> NewCtors(N);
1854 for (unsigned i = 0; i != N; ++i) {
1855 auto Ctor = cast<Constant>(Init->getOperand(i));
1856 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1857 Ctor->getAggregateElement(1),
1859 }
1860 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1861
1862 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1863 NewInit, GV->getName());
1864}
1865
1866// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1867// to byte shuffles.
1869 unsigned Shift) {
1870 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1871 unsigned NumElts = ResultTy->getNumElements() * 8;
1872
1873 // Bitcast from a 64-bit element type to a byte element type.
1874 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1875 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1876
1877 // We'll be shuffling in zeroes.
1878 Value *Res = Constant::getNullValue(VecTy);
1879
1880 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1881 // we'll just return the zero vector.
1882 if (Shift < 16) {
1883 int Idxs[64];
1884 // 256/512-bit version is split into 2/4 16-byte lanes.
1885 for (unsigned l = 0; l != NumElts; l += 16)
1886 for (unsigned i = 0; i != 16; ++i) {
1887 unsigned Idx = NumElts + i - Shift;
1888 if (Idx < NumElts)
1889 Idx -= NumElts - 16; // end of lane, switch operand.
1890 Idxs[l + i] = Idx + l;
1891 }
1892
1893 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1894 }
1895
1896 // Bitcast back to a 64-bit element type.
1897 return Builder.CreateBitCast(Res, ResultTy, "cast");
1898}
1899
1900// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1901// to byte shuffles.
1903 unsigned Shift) {
1904 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1905 unsigned NumElts = ResultTy->getNumElements() * 8;
1906
1907 // Bitcast from a 64-bit element type to a byte element type.
1908 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1909 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1910
1911 // We'll be shuffling in zeroes.
1912 Value *Res = Constant::getNullValue(VecTy);
1913
1914 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1915 // we'll just return the zero vector.
1916 if (Shift < 16) {
1917 int Idxs[64];
1918 // 256/512-bit version is split into 2/4 16-byte lanes.
1919 for (unsigned l = 0; l != NumElts; l += 16)
1920 for (unsigned i = 0; i != 16; ++i) {
1921 unsigned Idx = i + Shift;
1922 if (Idx >= 16)
1923 Idx += NumElts - 16; // end of lane, switch operand.
1924 Idxs[l + i] = Idx + l;
1925 }
1926
1927 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1928 }
1929
1930 // Bitcast back to a 64-bit element type.
1931 return Builder.CreateBitCast(Res, ResultTy, "cast");
1932}
1933
1934static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1935 unsigned NumElts) {
1936 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1938 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1939 Mask = Builder.CreateBitCast(Mask, MaskTy);
1940
1941 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1942 // i8 and we need to extract down to the right number of elements.
1943 if (NumElts <= 4) {
1944 int Indices[4];
1945 for (unsigned i = 0; i != NumElts; ++i)
1946 Indices[i] = i;
1947 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1948 "extract");
1949 }
1950
1951 return Mask;
1952}
1953
1954static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1955 Value *Op1) {
1956 // If the mask is all ones just emit the first operation.
1957 if (const auto *C = dyn_cast<Constant>(Mask))
1958 if (C->isAllOnesValue())
1959 return Op0;
1960
1961 Mask = getX86MaskVec(Builder, Mask,
1962 cast<FixedVectorType>(Op0->getType())->getNumElements());
1963 return Builder.CreateSelect(Mask, Op0, Op1);
1964}
1965
1966static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1967 Value *Op1) {
1968 // If the mask is all ones just emit the first operation.
1969 if (const auto *C = dyn_cast<Constant>(Mask))
1970 if (C->isAllOnesValue())
1971 return Op0;
1972
1973 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1974 Mask->getType()->getIntegerBitWidth());
1975 Mask = Builder.CreateBitCast(Mask, MaskTy);
1976 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1977 return Builder.CreateSelect(Mask, Op0, Op1);
1978}
1979
1980// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1981// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1982// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1984 Value *Op1, Value *Shift,
1985 Value *Passthru, Value *Mask,
1986 bool IsVALIGN) {
1987 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1988
1989 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1990 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1991 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1992 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1993
1994 // Mask the immediate for VALIGN.
1995 if (IsVALIGN)
1996 ShiftVal &= (NumElts - 1);
1997
1998 // If palignr is shifting the pair of vectors more than the size of two
1999 // lanes, emit zero.
2000 if (ShiftVal >= 32)
2002
2003 // If palignr is shifting the pair of input vectors more than one lane,
2004 // but less than two lanes, convert to shifting in zeroes.
2005 if (ShiftVal > 16) {
2006 ShiftVal -= 16;
2007 Op1 = Op0;
2009 }
2010
2011 int Indices[64];
2012 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2013 for (unsigned l = 0; l < NumElts; l += 16) {
2014 for (unsigned i = 0; i != 16; ++i) {
2015 unsigned Idx = ShiftVal + i;
2016 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2017 Idx += NumElts - 16; // End of lane, switch operand.
2018 Indices[l + i] = Idx + l;
2019 }
2020 }
2021
2022 Value *Align = Builder.CreateShuffleVector(
2023 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2024
2025 return emitX86Select(Builder, Mask, Align, Passthru);
2026}
2027
2029 bool ZeroMask, bool IndexForm) {
2030 Type *Ty = CI.getType();
2031 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2032 unsigned EltWidth = Ty->getScalarSizeInBits();
2033 bool IsFloat = Ty->isFPOrFPVectorTy();
2034 Intrinsic::ID IID;
2035 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2036 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2037 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2038 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2039 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2040 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2041 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2042 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2043 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2044 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2045 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2046 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2047 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2048 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2049 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2050 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2051 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2052 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2053 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2054 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2055 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2056 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2057 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2058 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2059 else if (VecWidth == 128 && EltWidth == 16)
2060 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2061 else if (VecWidth == 256 && EltWidth == 16)
2062 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2063 else if (VecWidth == 512 && EltWidth == 16)
2064 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2065 else if (VecWidth == 128 && EltWidth == 8)
2066 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2067 else if (VecWidth == 256 && EltWidth == 8)
2068 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2069 else if (VecWidth == 512 && EltWidth == 8)
2070 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073
2074 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2075 CI.getArgOperand(2) };
2076
2077 // If this isn't index form we need to swap operand 0 and 1.
2078 if (!IndexForm)
2079 std::swap(Args[0], Args[1]);
2080
2081 Value *V = Builder.CreateIntrinsic(IID, Args);
2082 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2083 : Builder.CreateBitCast(CI.getArgOperand(1),
2084 Ty);
2085 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2086}
2087
2089 Intrinsic::ID IID) {
2090 Type *Ty = CI.getType();
2091 Value *Op0 = CI.getOperand(0);
2092 Value *Op1 = CI.getOperand(1);
2093 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2094
2095 if (CI.arg_size() == 4) { // For masked intrinsics.
2096 Value *VecSrc = CI.getOperand(2);
2097 Value *Mask = CI.getOperand(3);
2098 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2099 }
2100 return Res;
2101}
2102
2104 bool IsRotateRight) {
2105 Type *Ty = CI.getType();
2106 Value *Src = CI.getArgOperand(0);
2107 Value *Amt = CI.getArgOperand(1);
2108
2109 // Amount may be scalar immediate, in which case create a splat vector.
2110 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2111 // we only care about the lowest log2 bits anyway.
2112 if (Amt->getType() != Ty) {
2113 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2114 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2115 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2116 }
2117
2118 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2119 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2120
2121 if (CI.arg_size() == 4) { // For masked intrinsics.
2122 Value *VecSrc = CI.getOperand(2);
2123 Value *Mask = CI.getOperand(3);
2124 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2125 }
2126 return Res;
2127}
2128
2129static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2130 bool IsSigned) {
2131 Type *Ty = CI.getType();
2132 Value *LHS = CI.getArgOperand(0);
2133 Value *RHS = CI.getArgOperand(1);
2134
2135 CmpInst::Predicate Pred;
2136 switch (Imm) {
2137 case 0x0:
2138 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2139 break;
2140 case 0x1:
2141 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2142 break;
2143 case 0x2:
2144 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2145 break;
2146 case 0x3:
2147 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2148 break;
2149 case 0x4:
2150 Pred = ICmpInst::ICMP_EQ;
2151 break;
2152 case 0x5:
2153 Pred = ICmpInst::ICMP_NE;
2154 break;
2155 case 0x6:
2156 return Constant::getNullValue(Ty); // FALSE
2157 case 0x7:
2158 return Constant::getAllOnesValue(Ty); // TRUE
2159 default:
2160 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2161 }
2162
2163 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2164 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2165 return Ext;
2166}
2167
2169 bool IsShiftRight, bool ZeroMask) {
2170 Type *Ty = CI.getType();
2171 Value *Op0 = CI.getArgOperand(0);
2172 Value *Op1 = CI.getArgOperand(1);
2173 Value *Amt = CI.getArgOperand(2);
2174
2175 if (IsShiftRight)
2176 std::swap(Op0, Op1);
2177
2178 // Amount may be scalar immediate, in which case create a splat vector.
2179 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2180 // we only care about the lowest log2 bits anyway.
2181 if (Amt->getType() != Ty) {
2182 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2183 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2184 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2185 }
2186
2187 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2188 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2189
2190 unsigned NumArgs = CI.arg_size();
2191 if (NumArgs >= 4) { // For masked intrinsics.
2192 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2193 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2194 CI.getArgOperand(0);
2195 Value *Mask = CI.getOperand(NumArgs - 1);
2196 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2197 }
2198 return Res;
2199}
2200
2202 Value *Mask, bool Aligned) {
2203 const Align Alignment =
2204 Aligned
2205 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2206 : Align(1);
2207
2208 // If the mask is all ones just emit a regular store.
2209 if (const auto *C = dyn_cast<Constant>(Mask))
2210 if (C->isAllOnesValue())
2211 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2212
2213 // Convert the mask from an integer type to a vector of i1.
2214 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2215 Mask = getX86MaskVec(Builder, Mask, NumElts);
2216 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2217}
2218
2220 Value *Passthru, Value *Mask, bool Aligned) {
2221 Type *ValTy = Passthru->getType();
2222 const Align Alignment =
2223 Aligned
2224 ? Align(
2226 8)
2227 : Align(1);
2228
2229 // If the mask is all ones just emit a regular store.
2230 if (const auto *C = dyn_cast<Constant>(Mask))
2231 if (C->isAllOnesValue())
2232 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2233
2234 // Convert the mask from an integer type to a vector of i1.
2235 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2236 Mask = getX86MaskVec(Builder, Mask, NumElts);
2237 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2238}
2239
2240static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2241 Type *Ty = CI.getType();
2242 Value *Op0 = CI.getArgOperand(0);
2243 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2244 {Op0, Builder.getInt1(false)});
2245 if (CI.arg_size() == 3)
2246 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2247 return Res;
2248}
2249
2250static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2251 Type *Ty = CI.getType();
2252
2253 // Arguments have a vXi32 type so cast to vXi64.
2254 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2255 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2256
2257 if (IsSigned) {
2258 // Shift left then arithmetic shift right.
2259 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2260 LHS = Builder.CreateShl(LHS, ShiftAmt);
2261 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2262 RHS = Builder.CreateShl(RHS, ShiftAmt);
2263 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2264 } else {
2265 // Clear the upper bits.
2266 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2267 LHS = Builder.CreateAnd(LHS, Mask);
2268 RHS = Builder.CreateAnd(RHS, Mask);
2269 }
2270
2271 Value *Res = Builder.CreateMul(LHS, RHS);
2272
2273 if (CI.arg_size() == 4)
2274 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2275
2276 return Res;
2277}
2278
2279// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2281 Value *Mask) {
2282 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2283 if (Mask) {
2284 const auto *C = dyn_cast<Constant>(Mask);
2285 if (!C || !C->isAllOnesValue())
2286 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2287 }
2288
2289 if (NumElts < 8) {
2290 int Indices[8];
2291 for (unsigned i = 0; i != NumElts; ++i)
2292 Indices[i] = i;
2293 for (unsigned i = NumElts; i != 8; ++i)
2294 Indices[i] = NumElts + i % NumElts;
2295 Vec = Builder.CreateShuffleVector(Vec,
2297 Indices);
2298 }
2299 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2300}
2301
2303 unsigned CC, bool Signed) {
2304 Value *Op0 = CI.getArgOperand(0);
2305 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2306
2307 Value *Cmp;
2308 if (CC == 3) {
2310 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2311 } else if (CC == 7) {
2313 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2314 } else {
2316 switch (CC) {
2317 default: llvm_unreachable("Unknown condition code");
2318 case 0: Pred = ICmpInst::ICMP_EQ; break;
2319 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2320 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2321 case 4: Pred = ICmpInst::ICMP_NE; break;
2322 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2323 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2324 }
2325 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2326 }
2327
2328 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2329
2330 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2331}
2332
2333// Replace a masked intrinsic with an older unmasked intrinsic.
2335 Intrinsic::ID IID) {
2336 Value *Rep =
2337 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2338 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2339}
2340
2342 Value* A = CI.getArgOperand(0);
2343 Value* B = CI.getArgOperand(1);
2344 Value* Src = CI.getArgOperand(2);
2345 Value* Mask = CI.getArgOperand(3);
2346
2347 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2348 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2349 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2350 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2351 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2352 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2353}
2354
2356 Value* Op = CI.getArgOperand(0);
2357 Type* ReturnOp = CI.getType();
2358 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2359 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2360 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2361}
2362
2363// Replace intrinsic with unmasked version and a select.
2365 CallBase &CI, Value *&Rep) {
2366 Name = Name.substr(12); // Remove avx512.mask.
2367
2368 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2369 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2370 Intrinsic::ID IID;
2371 if (Name.starts_with("max.p")) {
2372 if (VecWidth == 128 && EltWidth == 32)
2373 IID = Intrinsic::x86_sse_max_ps;
2374 else if (VecWidth == 128 && EltWidth == 64)
2375 IID = Intrinsic::x86_sse2_max_pd;
2376 else if (VecWidth == 256 && EltWidth == 32)
2377 IID = Intrinsic::x86_avx_max_ps_256;
2378 else if (VecWidth == 256 && EltWidth == 64)
2379 IID = Intrinsic::x86_avx_max_pd_256;
2380 else
2381 llvm_unreachable("Unexpected intrinsic");
2382 } else if (Name.starts_with("min.p")) {
2383 if (VecWidth == 128 && EltWidth == 32)
2384 IID = Intrinsic::x86_sse_min_ps;
2385 else if (VecWidth == 128 && EltWidth == 64)
2386 IID = Intrinsic::x86_sse2_min_pd;
2387 else if (VecWidth == 256 && EltWidth == 32)
2388 IID = Intrinsic::x86_avx_min_ps_256;
2389 else if (VecWidth == 256 && EltWidth == 64)
2390 IID = Intrinsic::x86_avx_min_pd_256;
2391 else
2392 llvm_unreachable("Unexpected intrinsic");
2393 } else if (Name.starts_with("pshuf.b.")) {
2394 if (VecWidth == 128)
2395 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2396 else if (VecWidth == 256)
2397 IID = Intrinsic::x86_avx2_pshuf_b;
2398 else if (VecWidth == 512)
2399 IID = Intrinsic::x86_avx512_pshuf_b_512;
2400 else
2401 llvm_unreachable("Unexpected intrinsic");
2402 } else if (Name.starts_with("pmul.hr.sw.")) {
2403 if (VecWidth == 128)
2404 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2405 else if (VecWidth == 256)
2406 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2407 else if (VecWidth == 512)
2408 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2409 else
2410 llvm_unreachable("Unexpected intrinsic");
2411 } else if (Name.starts_with("pmulh.w.")) {
2412 if (VecWidth == 128)
2413 IID = Intrinsic::x86_sse2_pmulh_w;
2414 else if (VecWidth == 256)
2415 IID = Intrinsic::x86_avx2_pmulh_w;
2416 else if (VecWidth == 512)
2417 IID = Intrinsic::x86_avx512_pmulh_w_512;
2418 else
2419 llvm_unreachable("Unexpected intrinsic");
2420 } else if (Name.starts_with("pmulhu.w.")) {
2421 if (VecWidth == 128)
2422 IID = Intrinsic::x86_sse2_pmulhu_w;
2423 else if (VecWidth == 256)
2424 IID = Intrinsic::x86_avx2_pmulhu_w;
2425 else if (VecWidth == 512)
2426 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2427 else
2428 llvm_unreachable("Unexpected intrinsic");
2429 } else if (Name.starts_with("pmaddw.d.")) {
2430 if (VecWidth == 128)
2431 IID = Intrinsic::x86_sse2_pmadd_wd;
2432 else if (VecWidth == 256)
2433 IID = Intrinsic::x86_avx2_pmadd_wd;
2434 else if (VecWidth == 512)
2435 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2436 else
2437 llvm_unreachable("Unexpected intrinsic");
2438 } else if (Name.starts_with("pmaddubs.w.")) {
2439 if (VecWidth == 128)
2440 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2441 else if (VecWidth == 256)
2442 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2443 else if (VecWidth == 512)
2444 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2445 else
2446 llvm_unreachable("Unexpected intrinsic");
2447 } else if (Name.starts_with("packsswb.")) {
2448 if (VecWidth == 128)
2449 IID = Intrinsic::x86_sse2_packsswb_128;
2450 else if (VecWidth == 256)
2451 IID = Intrinsic::x86_avx2_packsswb;
2452 else if (VecWidth == 512)
2453 IID = Intrinsic::x86_avx512_packsswb_512;
2454 else
2455 llvm_unreachable("Unexpected intrinsic");
2456 } else if (Name.starts_with("packssdw.")) {
2457 if (VecWidth == 128)
2458 IID = Intrinsic::x86_sse2_packssdw_128;
2459 else if (VecWidth == 256)
2460 IID = Intrinsic::x86_avx2_packssdw;
2461 else if (VecWidth == 512)
2462 IID = Intrinsic::x86_avx512_packssdw_512;
2463 else
2464 llvm_unreachable("Unexpected intrinsic");
2465 } else if (Name.starts_with("packuswb.")) {
2466 if (VecWidth == 128)
2467 IID = Intrinsic::x86_sse2_packuswb_128;
2468 else if (VecWidth == 256)
2469 IID = Intrinsic::x86_avx2_packuswb;
2470 else if (VecWidth == 512)
2471 IID = Intrinsic::x86_avx512_packuswb_512;
2472 else
2473 llvm_unreachable("Unexpected intrinsic");
2474 } else if (Name.starts_with("packusdw.")) {
2475 if (VecWidth == 128)
2476 IID = Intrinsic::x86_sse41_packusdw;
2477 else if (VecWidth == 256)
2478 IID = Intrinsic::x86_avx2_packusdw;
2479 else if (VecWidth == 512)
2480 IID = Intrinsic::x86_avx512_packusdw_512;
2481 else
2482 llvm_unreachable("Unexpected intrinsic");
2483 } else if (Name.starts_with("vpermilvar.")) {
2484 if (VecWidth == 128 && EltWidth == 32)
2485 IID = Intrinsic::x86_avx_vpermilvar_ps;
2486 else if (VecWidth == 128 && EltWidth == 64)
2487 IID = Intrinsic::x86_avx_vpermilvar_pd;
2488 else if (VecWidth == 256 && EltWidth == 32)
2489 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2490 else if (VecWidth == 256 && EltWidth == 64)
2491 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2492 else if (VecWidth == 512 && EltWidth == 32)
2493 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2494 else if (VecWidth == 512 && EltWidth == 64)
2495 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2496 else
2497 llvm_unreachable("Unexpected intrinsic");
2498 } else if (Name == "cvtpd2dq.256") {
2499 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2500 } else if (Name == "cvtpd2ps.256") {
2501 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2502 } else if (Name == "cvttpd2dq.256") {
2503 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2504 } else if (Name == "cvttps2dq.128") {
2505 IID = Intrinsic::x86_sse2_cvttps2dq;
2506 } else if (Name == "cvttps2dq.256") {
2507 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2508 } else if (Name.starts_with("permvar.")) {
2509 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2510 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2511 IID = Intrinsic::x86_avx2_permps;
2512 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2513 IID = Intrinsic::x86_avx2_permd;
2514 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2515 IID = Intrinsic::x86_avx512_permvar_df_256;
2516 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2517 IID = Intrinsic::x86_avx512_permvar_di_256;
2518 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2519 IID = Intrinsic::x86_avx512_permvar_sf_512;
2520 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2521 IID = Intrinsic::x86_avx512_permvar_si_512;
2522 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2523 IID = Intrinsic::x86_avx512_permvar_df_512;
2524 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2525 IID = Intrinsic::x86_avx512_permvar_di_512;
2526 else if (VecWidth == 128 && EltWidth == 16)
2527 IID = Intrinsic::x86_avx512_permvar_hi_128;
2528 else if (VecWidth == 256 && EltWidth == 16)
2529 IID = Intrinsic::x86_avx512_permvar_hi_256;
2530 else if (VecWidth == 512 && EltWidth == 16)
2531 IID = Intrinsic::x86_avx512_permvar_hi_512;
2532 else if (VecWidth == 128 && EltWidth == 8)
2533 IID = Intrinsic::x86_avx512_permvar_qi_128;
2534 else if (VecWidth == 256 && EltWidth == 8)
2535 IID = Intrinsic::x86_avx512_permvar_qi_256;
2536 else if (VecWidth == 512 && EltWidth == 8)
2537 IID = Intrinsic::x86_avx512_permvar_qi_512;
2538 else
2539 llvm_unreachable("Unexpected intrinsic");
2540 } else if (Name.starts_with("dbpsadbw.")) {
2541 if (VecWidth == 128)
2542 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2543 else if (VecWidth == 256)
2544 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2545 else if (VecWidth == 512)
2546 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2547 else
2548 llvm_unreachable("Unexpected intrinsic");
2549 } else if (Name.starts_with("pmultishift.qb.")) {
2550 if (VecWidth == 128)
2551 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2552 else if (VecWidth == 256)
2553 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2554 else if (VecWidth == 512)
2555 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2556 else
2557 llvm_unreachable("Unexpected intrinsic");
2558 } else if (Name.starts_with("conflict.")) {
2559 if (Name[9] == 'd' && VecWidth == 128)
2560 IID = Intrinsic::x86_avx512_conflict_d_128;
2561 else if (Name[9] == 'd' && VecWidth == 256)
2562 IID = Intrinsic::x86_avx512_conflict_d_256;
2563 else if (Name[9] == 'd' && VecWidth == 512)
2564 IID = Intrinsic::x86_avx512_conflict_d_512;
2565 else if (Name[9] == 'q' && VecWidth == 128)
2566 IID = Intrinsic::x86_avx512_conflict_q_128;
2567 else if (Name[9] == 'q' && VecWidth == 256)
2568 IID = Intrinsic::x86_avx512_conflict_q_256;
2569 else if (Name[9] == 'q' && VecWidth == 512)
2570 IID = Intrinsic::x86_avx512_conflict_q_512;
2571 else
2572 llvm_unreachable("Unexpected intrinsic");
2573 } else if (Name.starts_with("pavg.")) {
2574 if (Name[5] == 'b' && VecWidth == 128)
2575 IID = Intrinsic::x86_sse2_pavg_b;
2576 else if (Name[5] == 'b' && VecWidth == 256)
2577 IID = Intrinsic::x86_avx2_pavg_b;
2578 else if (Name[5] == 'b' && VecWidth == 512)
2579 IID = Intrinsic::x86_avx512_pavg_b_512;
2580 else if (Name[5] == 'w' && VecWidth == 128)
2581 IID = Intrinsic::x86_sse2_pavg_w;
2582 else if (Name[5] == 'w' && VecWidth == 256)
2583 IID = Intrinsic::x86_avx2_pavg_w;
2584 else if (Name[5] == 'w' && VecWidth == 512)
2585 IID = Intrinsic::x86_avx512_pavg_w_512;
2586 else
2587 llvm_unreachable("Unexpected intrinsic");
2588 } else
2589 return false;
2590
2591 SmallVector<Value *, 4> Args(CI.args());
2592 Args.pop_back();
2593 Args.pop_back();
2594 Rep = Builder.CreateIntrinsic(IID, Args);
2595 unsigned NumArgs = CI.arg_size();
2596 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2597 CI.getArgOperand(NumArgs - 2));
2598 return true;
2599}
2600
2601/// Upgrade comment in call to inline asm that represents an objc retain release
2602/// marker.
2603void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2604 size_t Pos;
2605 if (AsmStr->find("mov\tfp") == 0 &&
2606 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2607 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2608 AsmStr->replace(Pos, 1, ";");
2609 }
2610}
2611
2613 Function *F, IRBuilder<> &Builder) {
2614 Value *Rep = nullptr;
2615
2616 if (Name == "abs.i" || Name == "abs.ll") {
2617 Value *Arg = CI->getArgOperand(0);
2618 Value *Neg = Builder.CreateNeg(Arg, "neg");
2619 Value *Cmp = Builder.CreateICmpSGE(
2620 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2621 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2622 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2623 Type *Ty = (Name == "abs.bf16")
2624 ? Builder.getBFloatTy()
2625 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2626 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2627 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2628 Rep = Builder.CreateBitCast(Abs, CI->getType());
2629 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2630 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2631 : Intrinsic::nvvm_fabs;
2632 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2633 } else if (Name.consume_front("ex2.approx.")) {
2634 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2635 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2636 : Intrinsic::nvvm_ex2_approx;
2637 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2638 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2639 Name.starts_with("atomic.load.add.f64.p")) {
2640 Value *Ptr = CI->getArgOperand(0);
2641 Value *Val = CI->getArgOperand(1);
2642 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2644 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2645 Name.starts_with("atomic.load.dec.32.p")) {
2646 Value *Ptr = CI->getArgOperand(0);
2647 Value *Val = CI->getArgOperand(1);
2648 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2650 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2652 } else if (Name.consume_front("max.") &&
2653 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2654 Name == "ui" || Name == "ull")) {
2655 Value *Arg0 = CI->getArgOperand(0);
2656 Value *Arg1 = CI->getArgOperand(1);
2657 Value *Cmp = Name.starts_with("u")
2658 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2659 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2660 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2661 } else if (Name.consume_front("min.") &&
2662 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2663 Name == "ui" || Name == "ull")) {
2664 Value *Arg0 = CI->getArgOperand(0);
2665 Value *Arg1 = CI->getArgOperand(1);
2666 Value *Cmp = Name.starts_with("u")
2667 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2668 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2669 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2670 } else if (Name == "clz.ll") {
2671 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2672 Value *Arg = CI->getArgOperand(0);
2673 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2674 {Arg, Builder.getFalse()},
2675 /*FMFSource=*/nullptr, "ctlz");
2676 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2677 } else if (Name == "popc.ll") {
2678 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2679 // i64.
2680 Value *Arg = CI->getArgOperand(0);
2681 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2682 Arg, /*FMFSource=*/nullptr, "ctpop");
2683 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2684 } else if (Name == "h2f") {
2685 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2686 {Builder.getFloatTy()}, CI->getArgOperand(0),
2687 /*FMFSource=*/nullptr, "h2f");
2688 } else if (Name.consume_front("bitcast.") &&
2689 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2690 Name == "d2ll")) {
2691 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2692 } else if (Name == "rotate.b32") {
2693 Value *Arg = CI->getOperand(0);
2694 Value *ShiftAmt = CI->getOperand(1);
2695 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2696 {Arg, Arg, ShiftAmt});
2697 } else if (Name == "rotate.b64") {
2698 Type *Int64Ty = Builder.getInt64Ty();
2699 Value *Arg = CI->getOperand(0);
2700 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2701 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2702 {Arg, Arg, ZExtShiftAmt});
2703 } else if (Name == "rotate.right.b64") {
2704 Type *Int64Ty = Builder.getInt64Ty();
2705 Value *Arg = CI->getOperand(0);
2706 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2707 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2708 {Arg, Arg, ZExtShiftAmt});
2709 } else if (Name == "swap.lo.hi.b64") {
2710 Type *Int64Ty = Builder.getInt64Ty();
2711 Value *Arg = CI->getOperand(0);
2712 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2713 {Arg, Arg, Builder.getInt64(32)});
2714 } else if ((Name.consume_front("ptr.gen.to.") &&
2715 consumeNVVMPtrAddrSpace(Name)) ||
2716 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2717 Name.starts_with(".to.gen"))) {
2718 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2719 } else if (Name.consume_front("ldg.global")) {
2720 Value *Ptr = CI->getArgOperand(0);
2721 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2722 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2723 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2724 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2725 MDNode *MD = MDNode::get(Builder.getContext(), {});
2726 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2727 return LD;
2728 } else if (Name == "tanh.approx.f32") {
2729 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2730 FastMathFlags FMF;
2731 FMF.setApproxFunc();
2732 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2733 FMF);
2734 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2735 Value *Arg =
2736 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2737 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2738 {}, {Arg});
2739 } else if (Name == "barrier") {
2740 Rep = Builder.CreateIntrinsic(
2741 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2742 {CI->getArgOperand(0), CI->getArgOperand(1)});
2743 } else if (Name == "barrier.sync") {
2744 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2745 {CI->getArgOperand(0)});
2746 } else if (Name == "barrier.sync.cnt") {
2747 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2748 {CI->getArgOperand(0), CI->getArgOperand(1)});
2749 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2750 Name == "barrier0.or") {
2751 Value *C = CI->getArgOperand(0);
2752 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2753
2754 Intrinsic::ID IID =
2756 .Case("barrier0.popc",
2757 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2758 .Case("barrier0.and",
2759 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2760 .Case("barrier0.or",
2761 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2762 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2763 Rep = Builder.CreateZExt(Bar, CI->getType());
2764 } else {
2766 if (IID != Intrinsic::not_intrinsic &&
2767 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2768 rename(F);
2769 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2771 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2772 Value *Arg = CI->getArgOperand(I);
2773 Type *OldType = Arg->getType();
2774 Type *NewType = NewFn->getArg(I)->getType();
2775 Args.push_back(
2776 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2777 ? Builder.CreateBitCast(Arg, NewType)
2778 : Arg);
2779 }
2780 Rep = Builder.CreateCall(NewFn, Args);
2781 if (F->getReturnType()->isIntegerTy())
2782 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2783 }
2784 }
2785
2786 return Rep;
2787}
2788
2790 IRBuilder<> &Builder) {
2791 LLVMContext &C = F->getContext();
2792 Value *Rep = nullptr;
2793
2794 if (Name.starts_with("sse4a.movnt.")) {
2796 Elts.push_back(
2797 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2798 MDNode *Node = MDNode::get(C, Elts);
2799
2800 Value *Arg0 = CI->getArgOperand(0);
2801 Value *Arg1 = CI->getArgOperand(1);
2802
2803 // Nontemporal (unaligned) store of the 0'th element of the float/double
2804 // vector.
2805 Value *Extract =
2806 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2807
2808 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2809 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2810 } else if (Name.starts_with("avx.movnt.") ||
2811 Name.starts_with("avx512.storent.")) {
2813 Elts.push_back(
2814 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2815 MDNode *Node = MDNode::get(C, Elts);
2816
2817 Value *Arg0 = CI->getArgOperand(0);
2818 Value *Arg1 = CI->getArgOperand(1);
2819
2820 StoreInst *SI = Builder.CreateAlignedStore(
2821 Arg1, Arg0,
2823 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2824 } else if (Name == "sse2.storel.dq") {
2825 Value *Arg0 = CI->getArgOperand(0);
2826 Value *Arg1 = CI->getArgOperand(1);
2827
2828 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2829 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2830 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2831 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2832 } else if (Name.starts_with("sse.storeu.") ||
2833 Name.starts_with("sse2.storeu.") ||
2834 Name.starts_with("avx.storeu.")) {
2835 Value *Arg0 = CI->getArgOperand(0);
2836 Value *Arg1 = CI->getArgOperand(1);
2837 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2838 } else if (Name == "avx512.mask.store.ss") {
2839 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2840 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2841 Mask, false);
2842 } else if (Name.starts_with("avx512.mask.store")) {
2843 // "avx512.mask.storeu." or "avx512.mask.store."
2844 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2845 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2846 CI->getArgOperand(2), Aligned);
2847 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2848 // Upgrade packed integer vector compare intrinsics to compare instructions.
2849 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2850 bool CmpEq = Name[9] == 'e';
2851 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2852 CI->getArgOperand(0), CI->getArgOperand(1));
2853 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2854 } else if (Name.starts_with("avx512.broadcastm")) {
2855 Type *ExtTy = Type::getInt32Ty(C);
2856 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2857 ExtTy = Type::getInt64Ty(C);
2858 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2859 ExtTy->getPrimitiveSizeInBits();
2860 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2861 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2862 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2863 Value *Vec = CI->getArgOperand(0);
2864 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2865 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2866 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2867 } else if (Name.starts_with("avx.sqrt.p") ||
2868 Name.starts_with("sse2.sqrt.p") ||
2869 Name.starts_with("sse.sqrt.p")) {
2870 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2871 {CI->getArgOperand(0)});
2872 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2873 if (CI->arg_size() == 4 &&
2874 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2875 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2876 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2877 : Intrinsic::x86_avx512_sqrt_pd_512;
2878
2879 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2880 Rep = Builder.CreateIntrinsic(IID, Args);
2881 } else {
2882 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2883 {CI->getArgOperand(0)});
2884 }
2885 Rep =
2886 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2887 } else if (Name.starts_with("avx512.ptestm") ||
2888 Name.starts_with("avx512.ptestnm")) {
2889 Value *Op0 = CI->getArgOperand(0);
2890 Value *Op1 = CI->getArgOperand(1);
2891 Value *Mask = CI->getArgOperand(2);
2892 Rep = Builder.CreateAnd(Op0, Op1);
2893 llvm::Type *Ty = Op0->getType();
2895 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2898 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2899 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2900 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2901 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2902 ->getNumElements();
2903 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2904 Rep =
2905 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2906 } else if (Name.starts_with("avx512.kunpck")) {
2907 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2908 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2909 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2910 int Indices[64];
2911 for (unsigned i = 0; i != NumElts; ++i)
2912 Indices[i] = i;
2913
2914 // First extract half of each vector. This gives better codegen than
2915 // doing it in a single shuffle.
2916 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2917 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2918 // Concat the vectors.
2919 // NOTE: Operands have to be swapped to match intrinsic definition.
2920 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2921 Rep = Builder.CreateBitCast(Rep, CI->getType());
2922 } else if (Name == "avx512.kand.w") {
2923 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2924 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2925 Rep = Builder.CreateAnd(LHS, RHS);
2926 Rep = Builder.CreateBitCast(Rep, CI->getType());
2927 } else if (Name == "avx512.kandn.w") {
2928 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2929 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2930 LHS = Builder.CreateNot(LHS);
2931 Rep = Builder.CreateAnd(LHS, RHS);
2932 Rep = Builder.CreateBitCast(Rep, CI->getType());
2933 } else if (Name == "avx512.kor.w") {
2934 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2935 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2936 Rep = Builder.CreateOr(LHS, RHS);
2937 Rep = Builder.CreateBitCast(Rep, CI->getType());
2938 } else if (Name == "avx512.kxor.w") {
2939 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2940 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2941 Rep = Builder.CreateXor(LHS, RHS);
2942 Rep = Builder.CreateBitCast(Rep, CI->getType());
2943 } else if (Name == "avx512.kxnor.w") {
2944 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2945 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2946 LHS = Builder.CreateNot(LHS);
2947 Rep = Builder.CreateXor(LHS, RHS);
2948 Rep = Builder.CreateBitCast(Rep, CI->getType());
2949 } else if (Name == "avx512.knot.w") {
2950 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2951 Rep = Builder.CreateNot(Rep);
2952 Rep = Builder.CreateBitCast(Rep, CI->getType());
2953 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2954 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2955 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2956 Rep = Builder.CreateOr(LHS, RHS);
2957 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2958 Value *C;
2959 if (Name[14] == 'c')
2960 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2961 else
2962 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2963 Rep = Builder.CreateICmpEQ(Rep, C);
2964 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2965 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2966 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2967 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2968 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2969 Type *I32Ty = Type::getInt32Ty(C);
2970 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2971 ConstantInt::get(I32Ty, 0));
2972 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2973 ConstantInt::get(I32Ty, 0));
2974 Value *EltOp;
2975 if (Name.contains(".add."))
2976 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2977 else if (Name.contains(".sub."))
2978 EltOp = Builder.CreateFSub(Elt0, Elt1);
2979 else if (Name.contains(".mul."))
2980 EltOp = Builder.CreateFMul(Elt0, Elt1);
2981 else
2982 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2983 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2984 ConstantInt::get(I32Ty, 0));
2985 } else if (Name.starts_with("avx512.mask.pcmp")) {
2986 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2987 bool CmpEq = Name[16] == 'e';
2988 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2989 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2990 Type *OpTy = CI->getArgOperand(0)->getType();
2991 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2992 Intrinsic::ID IID;
2993 switch (VecWidth) {
2994 default:
2995 llvm_unreachable("Unexpected intrinsic");
2996 case 128:
2997 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2998 break;
2999 case 256:
3000 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3001 break;
3002 case 512:
3003 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3004 break;
3005 }
3006
3007 Rep =
3008 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3009 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3010 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3011 Type *OpTy = CI->getArgOperand(0)->getType();
3012 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3013 unsigned EltWidth = OpTy->getScalarSizeInBits();
3014 Intrinsic::ID IID;
3015 if (VecWidth == 128 && EltWidth == 32)
3016 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3017 else if (VecWidth == 256 && EltWidth == 32)
3018 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3019 else if (VecWidth == 512 && EltWidth == 32)
3020 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3021 else if (VecWidth == 128 && EltWidth == 64)
3022 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3023 else if (VecWidth == 256 && EltWidth == 64)
3024 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3025 else if (VecWidth == 512 && EltWidth == 64)
3026 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3027 else
3028 llvm_unreachable("Unexpected intrinsic");
3029
3030 Rep =
3031 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3032 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3033 } else if (Name.starts_with("avx512.cmp.p")) {
3034 SmallVector<Value *, 4> Args(CI->args());
3035 Type *OpTy = Args[0]->getType();
3036 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3037 unsigned EltWidth = OpTy->getScalarSizeInBits();
3038 Intrinsic::ID IID;
3039 if (VecWidth == 128 && EltWidth == 32)
3040 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3041 else if (VecWidth == 256 && EltWidth == 32)
3042 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3043 else if (VecWidth == 512 && EltWidth == 32)
3044 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3045 else if (VecWidth == 128 && EltWidth == 64)
3046 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3047 else if (VecWidth == 256 && EltWidth == 64)
3048 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3049 else if (VecWidth == 512 && EltWidth == 64)
3050 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3051 else
3052 llvm_unreachable("Unexpected intrinsic");
3053
3055 if (VecWidth == 512)
3056 std::swap(Mask, Args.back());
3057 Args.push_back(Mask);
3058
3059 Rep = Builder.CreateIntrinsic(IID, Args);
3060 } else if (Name.starts_with("avx512.mask.cmp.")) {
3061 // Integer compare intrinsics.
3062 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3063 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3064 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3065 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3066 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3067 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3068 Name.starts_with("avx512.cvtw2mask.") ||
3069 Name.starts_with("avx512.cvtd2mask.") ||
3070 Name.starts_with("avx512.cvtq2mask.")) {
3071 Value *Op = CI->getArgOperand(0);
3072 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3073 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3074 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3075 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3076 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3077 Name.starts_with("avx512.mask.pabs")) {
3078 Rep = upgradeAbs(Builder, *CI);
3079 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3080 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3081 Name.starts_with("avx512.mask.pmaxs")) {
3082 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3083 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3084 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3085 Name.starts_with("avx512.mask.pmaxu")) {
3086 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3087 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3088 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3089 Name.starts_with("avx512.mask.pmins")) {
3090 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3091 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3092 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3093 Name.starts_with("avx512.mask.pminu")) {
3094 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3095 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3096 Name == "avx512.pmulu.dq.512" ||
3097 Name.starts_with("avx512.mask.pmulu.dq.")) {
3098 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3099 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3100 Name == "avx512.pmul.dq.512" ||
3101 Name.starts_with("avx512.mask.pmul.dq.")) {
3102 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3103 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3104 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3105 Rep =
3106 Builder.CreateSIToFP(CI->getArgOperand(1),
3107 cast<VectorType>(CI->getType())->getElementType());
3108 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3109 } else if (Name == "avx512.cvtusi2sd") {
3110 Rep =
3111 Builder.CreateUIToFP(CI->getArgOperand(1),
3112 cast<VectorType>(CI->getType())->getElementType());
3113 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3114 } else if (Name == "sse2.cvtss2sd") {
3115 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3116 Rep = Builder.CreateFPExt(
3117 Rep, cast<VectorType>(CI->getType())->getElementType());
3118 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3119 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3120 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3121 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3122 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3123 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3124 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3125 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3126 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3127 Name == "avx512.mask.cvtqq2ps.256" ||
3128 Name == "avx512.mask.cvtqq2ps.512" ||
3129 Name == "avx512.mask.cvtuqq2ps.256" ||
3130 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3131 Name == "avx.cvt.ps2.pd.256" ||
3132 Name == "avx512.mask.cvtps2pd.128" ||
3133 Name == "avx512.mask.cvtps2pd.256") {
3134 auto *DstTy = cast<FixedVectorType>(CI->getType());
3135 Rep = CI->getArgOperand(0);
3136 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3137
3138 unsigned NumDstElts = DstTy->getNumElements();
3139 if (NumDstElts < SrcTy->getNumElements()) {
3140 assert(NumDstElts == 2 && "Unexpected vector size");
3141 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3142 }
3143
3144 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3145 bool IsUnsigned = Name.contains("cvtu");
3146 if (IsPS2PD)
3147 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3148 else if (CI->arg_size() == 4 &&
3149 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3150 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3151 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3152 : Intrinsic::x86_avx512_sitofp_round;
3153 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3154 {Rep, CI->getArgOperand(3)});
3155 } else {
3156 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3157 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3158 }
3159
3160 if (CI->arg_size() >= 3)
3161 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3162 CI->getArgOperand(1));
3163 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3164 Name.starts_with("vcvtph2ps.")) {
3165 auto *DstTy = cast<FixedVectorType>(CI->getType());
3166 Rep = CI->getArgOperand(0);
3167 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3168 unsigned NumDstElts = DstTy->getNumElements();
3169 if (NumDstElts != SrcTy->getNumElements()) {
3170 assert(NumDstElts == 4 && "Unexpected vector size");
3171 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3172 }
3173 Rep = Builder.CreateBitCast(
3174 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3175 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3176 if (CI->arg_size() >= 3)
3177 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3178 CI->getArgOperand(1));
3179 } else if (Name.starts_with("avx512.mask.load")) {
3180 // "avx512.mask.loadu." or "avx512.mask.load."
3181 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3182 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3183 CI->getArgOperand(2), Aligned);
3184 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3185 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3186 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3187 ResultTy->getNumElements());
3188
3189 Rep = Builder.CreateIntrinsic(
3190 Intrinsic::masked_expandload, ResultTy,
3191 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3192 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3193 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3194 Value *MaskVec =
3195 getX86MaskVec(Builder, CI->getArgOperand(2),
3196 cast<FixedVectorType>(ResultTy)->getNumElements());
3197
3198 Rep = Builder.CreateIntrinsic(
3199 Intrinsic::masked_compressstore, ResultTy,
3200 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3201 } else if (Name.starts_with("avx512.mask.compress.") ||
3202 Name.starts_with("avx512.mask.expand.")) {
3203 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3204
3205 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3206 ResultTy->getNumElements());
3207
3208 bool IsCompress = Name[12] == 'c';
3209 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3210 : Intrinsic::x86_avx512_mask_expand;
3211 Rep = Builder.CreateIntrinsic(
3212 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3213 } else if (Name.starts_with("xop.vpcom")) {
3214 bool IsSigned;
3215 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3216 Name.ends_with("uq"))
3217 IsSigned = false;
3218 else if (Name.ends_with("b") || Name.ends_with("w") ||
3219 Name.ends_with("d") || Name.ends_with("q"))
3220 IsSigned = true;
3221 else
3222 llvm_unreachable("Unknown suffix");
3223
3224 unsigned Imm;
3225 if (CI->arg_size() == 3) {
3226 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3227 } else {
3228 Name = Name.substr(9); // strip off "xop.vpcom"
3229 if (Name.starts_with("lt"))
3230 Imm = 0;
3231 else if (Name.starts_with("le"))
3232 Imm = 1;
3233 else if (Name.starts_with("gt"))
3234 Imm = 2;
3235 else if (Name.starts_with("ge"))
3236 Imm = 3;
3237 else if (Name.starts_with("eq"))
3238 Imm = 4;
3239 else if (Name.starts_with("ne"))
3240 Imm = 5;
3241 else if (Name.starts_with("false"))
3242 Imm = 6;
3243 else if (Name.starts_with("true"))
3244 Imm = 7;
3245 else
3246 llvm_unreachable("Unknown condition");
3247 }
3248
3249 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3250 } else if (Name.starts_with("xop.vpcmov")) {
3251 Value *Sel = CI->getArgOperand(2);
3252 Value *NotSel = Builder.CreateNot(Sel);
3253 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3254 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3255 Rep = Builder.CreateOr(Sel0, Sel1);
3256 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3257 Name.starts_with("avx512.mask.prol")) {
3258 Rep = upgradeX86Rotate(Builder, *CI, false);
3259 } else if (Name.starts_with("avx512.pror") ||
3260 Name.starts_with("avx512.mask.pror")) {
3261 Rep = upgradeX86Rotate(Builder, *CI, true);
3262 } else if (Name.starts_with("avx512.vpshld.") ||
3263 Name.starts_with("avx512.mask.vpshld") ||
3264 Name.starts_with("avx512.maskz.vpshld")) {
3265 bool ZeroMask = Name[11] == 'z';
3266 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3267 } else if (Name.starts_with("avx512.vpshrd.") ||
3268 Name.starts_with("avx512.mask.vpshrd") ||
3269 Name.starts_with("avx512.maskz.vpshrd")) {
3270 bool ZeroMask = Name[11] == 'z';
3271 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3272 } else if (Name == "sse42.crc32.64.8") {
3273 Value *Trunc0 =
3274 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3275 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3276 {Trunc0, CI->getArgOperand(1)});
3277 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3278 } else if (Name.starts_with("avx.vbroadcast.s") ||
3279 Name.starts_with("avx512.vbroadcast.s")) {
3280 // Replace broadcasts with a series of insertelements.
3281 auto *VecTy = cast<FixedVectorType>(CI->getType());
3282 Type *EltTy = VecTy->getElementType();
3283 unsigned EltNum = VecTy->getNumElements();
3284 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3285 Type *I32Ty = Type::getInt32Ty(C);
3286 Rep = PoisonValue::get(VecTy);
3287 for (unsigned I = 0; I < EltNum; ++I)
3288 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3289 } else if (Name.starts_with("sse41.pmovsx") ||
3290 Name.starts_with("sse41.pmovzx") ||
3291 Name.starts_with("avx2.pmovsx") ||
3292 Name.starts_with("avx2.pmovzx") ||
3293 Name.starts_with("avx512.mask.pmovsx") ||
3294 Name.starts_with("avx512.mask.pmovzx")) {
3295 auto *DstTy = cast<FixedVectorType>(CI->getType());
3296 unsigned NumDstElts = DstTy->getNumElements();
3297
3298 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3299 SmallVector<int, 8> ShuffleMask(NumDstElts);
3300 for (unsigned i = 0; i != NumDstElts; ++i)
3301 ShuffleMask[i] = i;
3302
3303 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3304
3305 bool DoSext = Name.contains("pmovsx");
3306 Rep =
3307 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3308 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3309 if (CI->arg_size() == 3)
3310 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3311 CI->getArgOperand(1));
3312 } else if (Name == "avx512.mask.pmov.qd.256" ||
3313 Name == "avx512.mask.pmov.qd.512" ||
3314 Name == "avx512.mask.pmov.wb.256" ||
3315 Name == "avx512.mask.pmov.wb.512") {
3316 Type *Ty = CI->getArgOperand(1)->getType();
3317 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3318 Rep =
3319 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3320 } else if (Name.starts_with("avx.vbroadcastf128") ||
3321 Name == "avx2.vbroadcasti128") {
3322 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3323 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3324 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3325 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3326 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3327 if (NumSrcElts == 2)
3328 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3329 else
3330 Rep = Builder.CreateShuffleVector(Load,
3331 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3332 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3333 Name.starts_with("avx512.mask.shuf.f")) {
3334 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3335 Type *VT = CI->getType();
3336 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3337 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3338 unsigned ControlBitsMask = NumLanes - 1;
3339 unsigned NumControlBits = NumLanes / 2;
3340 SmallVector<int, 8> ShuffleMask(0);
3341
3342 for (unsigned l = 0; l != NumLanes; ++l) {
3343 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3344 // We actually need the other source.
3345 if (l >= NumLanes / 2)
3346 LaneMask += NumLanes;
3347 for (unsigned i = 0; i != NumElementsInLane; ++i)
3348 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3349 }
3350 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3351 CI->getArgOperand(1), ShuffleMask);
3352 Rep =
3353 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3354 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3355 Name.starts_with("avx512.mask.broadcasti")) {
3356 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3357 ->getNumElements();
3358 unsigned NumDstElts =
3359 cast<FixedVectorType>(CI->getType())->getNumElements();
3360
3361 SmallVector<int, 8> ShuffleMask(NumDstElts);
3362 for (unsigned i = 0; i != NumDstElts; ++i)
3363 ShuffleMask[i] = i % NumSrcElts;
3364
3365 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3366 CI->getArgOperand(0), ShuffleMask);
3367 Rep =
3368 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3369 } else if (Name.starts_with("avx2.pbroadcast") ||
3370 Name.starts_with("avx2.vbroadcast") ||
3371 Name.starts_with("avx512.pbroadcast") ||
3372 Name.starts_with("avx512.mask.broadcast.s")) {
3373 // Replace vp?broadcasts with a vector shuffle.
3374 Value *Op = CI->getArgOperand(0);
3375 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3376 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3379 Rep = Builder.CreateShuffleVector(Op, M);
3380
3381 if (CI->arg_size() == 3)
3382 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3383 CI->getArgOperand(1));
3384 } else if (Name.starts_with("sse2.padds.") ||
3385 Name.starts_with("avx2.padds.") ||
3386 Name.starts_with("avx512.padds.") ||
3387 Name.starts_with("avx512.mask.padds.")) {
3388 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3389 } else if (Name.starts_with("sse2.psubs.") ||
3390 Name.starts_with("avx2.psubs.") ||
3391 Name.starts_with("avx512.psubs.") ||
3392 Name.starts_with("avx512.mask.psubs.")) {
3393 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3394 } else if (Name.starts_with("sse2.paddus.") ||
3395 Name.starts_with("avx2.paddus.") ||
3396 Name.starts_with("avx512.mask.paddus.")) {
3397 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3398 } else if (Name.starts_with("sse2.psubus.") ||
3399 Name.starts_with("avx2.psubus.") ||
3400 Name.starts_with("avx512.mask.psubus.")) {
3401 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3402 } else if (Name.starts_with("avx512.mask.palignr.")) {
3403 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3404 CI->getArgOperand(1), CI->getArgOperand(2),
3405 CI->getArgOperand(3), CI->getArgOperand(4),
3406 false);
3407 } else if (Name.starts_with("avx512.mask.valign.")) {
3409 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3410 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3411 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3412 // 128/256-bit shift left specified in bits.
3413 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3414 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3415 Shift / 8); // Shift is in bits.
3416 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3417 // 128/256-bit shift right specified in bits.
3418 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3419 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3420 Shift / 8); // Shift is in bits.
3421 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3422 Name == "avx512.psll.dq.512") {
3423 // 128/256/512-bit shift left specified in bytes.
3424 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3425 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3426 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3427 Name == "avx512.psrl.dq.512") {
3428 // 128/256/512-bit shift right specified in bytes.
3429 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3430 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3431 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3432 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3433 Name.starts_with("avx2.pblendd.")) {
3434 Value *Op0 = CI->getArgOperand(0);
3435 Value *Op1 = CI->getArgOperand(1);
3436 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3437 auto *VecTy = cast<FixedVectorType>(CI->getType());
3438 unsigned NumElts = VecTy->getNumElements();
3439
3440 SmallVector<int, 16> Idxs(NumElts);
3441 for (unsigned i = 0; i != NumElts; ++i)
3442 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3443
3444 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3445 } else if (Name.starts_with("avx.vinsertf128.") ||
3446 Name == "avx2.vinserti128" ||
3447 Name.starts_with("avx512.mask.insert")) {
3448 Value *Op0 = CI->getArgOperand(0);
3449 Value *Op1 = CI->getArgOperand(1);
3450 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3451 unsigned DstNumElts =
3452 cast<FixedVectorType>(CI->getType())->getNumElements();
3453 unsigned SrcNumElts =
3454 cast<FixedVectorType>(Op1->getType())->getNumElements();
3455 unsigned Scale = DstNumElts / SrcNumElts;
3456
3457 // Mask off the high bits of the immediate value; hardware ignores those.
3458 Imm = Imm % Scale;
3459
3460 // Extend the second operand into a vector the size of the destination.
3461 SmallVector<int, 8> Idxs(DstNumElts);
3462 for (unsigned i = 0; i != SrcNumElts; ++i)
3463 Idxs[i] = i;
3464 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3465 Idxs[i] = SrcNumElts;
3466 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3467
3468 // Insert the second operand into the first operand.
3469
3470 // Note that there is no guarantee that instruction lowering will actually
3471 // produce a vinsertf128 instruction for the created shuffles. In
3472 // particular, the 0 immediate case involves no lane changes, so it can
3473 // be handled as a blend.
3474
3475 // Example of shuffle mask for 32-bit elements:
3476 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3477 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3478
3479 // First fill with identify mask.
3480 for (unsigned i = 0; i != DstNumElts; ++i)
3481 Idxs[i] = i;
3482 // Then replace the elements where we need to insert.
3483 for (unsigned i = 0; i != SrcNumElts; ++i)
3484 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3485 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3486
3487 // If the intrinsic has a mask operand, handle that.
3488 if (CI->arg_size() == 5)
3489 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3490 CI->getArgOperand(3));
3491 } else if (Name.starts_with("avx.vextractf128.") ||
3492 Name == "avx2.vextracti128" ||
3493 Name.starts_with("avx512.mask.vextract")) {
3494 Value *Op0 = CI->getArgOperand(0);
3495 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3496 unsigned DstNumElts =
3497 cast<FixedVectorType>(CI->getType())->getNumElements();
3498 unsigned SrcNumElts =
3499 cast<FixedVectorType>(Op0->getType())->getNumElements();
3500 unsigned Scale = SrcNumElts / DstNumElts;
3501
3502 // Mask off the high bits of the immediate value; hardware ignores those.
3503 Imm = Imm % Scale;
3504
3505 // Get indexes for the subvector of the input vector.
3506 SmallVector<int, 8> Idxs(DstNumElts);
3507 for (unsigned i = 0; i != DstNumElts; ++i) {
3508 Idxs[i] = i + (Imm * DstNumElts);
3509 }
3510 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3511
3512 // If the intrinsic has a mask operand, handle that.
3513 if (CI->arg_size() == 4)
3514 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3515 CI->getArgOperand(2));
3516 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3517 Name.starts_with("avx512.mask.perm.di.")) {
3518 Value *Op0 = CI->getArgOperand(0);
3519 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3520 auto *VecTy = cast<FixedVectorType>(CI->getType());
3521 unsigned NumElts = VecTy->getNumElements();
3522
3523 SmallVector<int, 8> Idxs(NumElts);
3524 for (unsigned i = 0; i != NumElts; ++i)
3525 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3526
3527 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3528
3529 if (CI->arg_size() == 4)
3530 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3531 CI->getArgOperand(2));
3532 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3533 // The immediate permute control byte looks like this:
3534 // [1:0] - select 128 bits from sources for low half of destination
3535 // [2] - ignore
3536 // [3] - zero low half of destination
3537 // [5:4] - select 128 bits from sources for high half of destination
3538 // [6] - ignore
3539 // [7] - zero high half of destination
3540
3541 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3542
3543 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3544 unsigned HalfSize = NumElts / 2;
3545 SmallVector<int, 8> ShuffleMask(NumElts);
3546
3547 // Determine which operand(s) are actually in use for this instruction.
3548 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3549 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3550
3551 // If needed, replace operands based on zero mask.
3552 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3553 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3554
3555 // Permute low half of result.
3556 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3557 for (unsigned i = 0; i < HalfSize; ++i)
3558 ShuffleMask[i] = StartIndex + i;
3559
3560 // Permute high half of result.
3561 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3562 for (unsigned i = 0; i < HalfSize; ++i)
3563 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3564
3565 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3566
3567 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3568 Name.starts_with("avx512.mask.vpermil.p") ||
3569 Name.starts_with("avx512.mask.pshuf.d.")) {
3570 Value *Op0 = CI->getArgOperand(0);
3571 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3572 auto *VecTy = cast<FixedVectorType>(CI->getType());
3573 unsigned NumElts = VecTy->getNumElements();
3574 // Calculate the size of each index in the immediate.
3575 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3576 unsigned IdxMask = ((1 << IdxSize) - 1);
3577
3578 SmallVector<int, 8> Idxs(NumElts);
3579 // Lookup the bits for this element, wrapping around the immediate every
3580 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3581 // to offset by the first index of each group.
3582 for (unsigned i = 0; i != NumElts; ++i)
3583 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3584
3585 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3586
3587 if (CI->arg_size() == 4)
3588 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3589 CI->getArgOperand(2));
3590 } else if (Name == "sse2.pshufl.w" ||
3591 Name.starts_with("avx512.mask.pshufl.w.")) {
3592 Value *Op0 = CI->getArgOperand(0);
3593 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3594 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3595
3596 SmallVector<int, 16> Idxs(NumElts);
3597 for (unsigned l = 0; l != NumElts; l += 8) {
3598 for (unsigned i = 0; i != 4; ++i)
3599 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3600 for (unsigned i = 4; i != 8; ++i)
3601 Idxs[i + l] = i + l;
3602 }
3603
3604 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3605
3606 if (CI->arg_size() == 4)
3607 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3608 CI->getArgOperand(2));
3609 } else if (Name == "sse2.pshufh.w" ||
3610 Name.starts_with("avx512.mask.pshufh.w.")) {
3611 Value *Op0 = CI->getArgOperand(0);
3612 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3613 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3614
3615 SmallVector<int, 16> Idxs(NumElts);
3616 for (unsigned l = 0; l != NumElts; l += 8) {
3617 for (unsigned i = 0; i != 4; ++i)
3618 Idxs[i + l] = i + l;
3619 for (unsigned i = 0; i != 4; ++i)
3620 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3621 }
3622
3623 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3624
3625 if (CI->arg_size() == 4)
3626 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3627 CI->getArgOperand(2));
3628 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3629 Value *Op0 = CI->getArgOperand(0);
3630 Value *Op1 = CI->getArgOperand(1);
3631 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3632 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3633
3634 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3635 unsigned HalfLaneElts = NumLaneElts / 2;
3636
3637 SmallVector<int, 16> Idxs(NumElts);
3638 for (unsigned i = 0; i != NumElts; ++i) {
3639 // Base index is the starting element of the lane.
3640 Idxs[i] = i - (i % NumLaneElts);
3641 // If we are half way through the lane switch to the other source.
3642 if ((i % NumLaneElts) >= HalfLaneElts)
3643 Idxs[i] += NumElts;
3644 // Now select the specific element. By adding HalfLaneElts bits from
3645 // the immediate. Wrapping around the immediate every 8-bits.
3646 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3647 }
3648
3649 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3650
3651 Rep =
3652 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3653 } else if (Name.starts_with("avx512.mask.movddup") ||
3654 Name.starts_with("avx512.mask.movshdup") ||
3655 Name.starts_with("avx512.mask.movsldup")) {
3656 Value *Op0 = CI->getArgOperand(0);
3657 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3658 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3659
3660 unsigned Offset = 0;
3661 if (Name.starts_with("avx512.mask.movshdup."))
3662 Offset = 1;
3663
3664 SmallVector<int, 16> Idxs(NumElts);
3665 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3666 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3667 Idxs[i + l + 0] = i + l + Offset;
3668 Idxs[i + l + 1] = i + l + Offset;
3669 }
3670
3671 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3672
3673 Rep =
3674 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3675 } else if (Name.starts_with("avx512.mask.punpckl") ||
3676 Name.starts_with("avx512.mask.unpckl.")) {
3677 Value *Op0 = CI->getArgOperand(0);
3678 Value *Op1 = CI->getArgOperand(1);
3679 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3680 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3681
3682 SmallVector<int, 64> Idxs(NumElts);
3683 for (int l = 0; l != NumElts; l += NumLaneElts)
3684 for (int i = 0; i != NumLaneElts; ++i)
3685 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3686
3687 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3688
3689 Rep =
3690 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3691 } else if (Name.starts_with("avx512.mask.punpckh") ||
3692 Name.starts_with("avx512.mask.unpckh.")) {
3693 Value *Op0 = CI->getArgOperand(0);
3694 Value *Op1 = CI->getArgOperand(1);
3695 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3696 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3697
3698 SmallVector<int, 64> Idxs(NumElts);
3699 for (int l = 0; l != NumElts; l += NumLaneElts)
3700 for (int i = 0; i != NumLaneElts; ++i)
3701 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3702
3703 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3704
3705 Rep =
3706 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3707 } else if (Name.starts_with("avx512.mask.and.") ||
3708 Name.starts_with("avx512.mask.pand.")) {
3709 VectorType *FTy = cast<VectorType>(CI->getType());
3711 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3712 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3713 Rep = Builder.CreateBitCast(Rep, FTy);
3714 Rep =
3715 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3716 } else if (Name.starts_with("avx512.mask.andn.") ||
3717 Name.starts_with("avx512.mask.pandn.")) {
3718 VectorType *FTy = cast<VectorType>(CI->getType());
3720 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3721 Rep = Builder.CreateAnd(Rep,
3722 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3723 Rep = Builder.CreateBitCast(Rep, FTy);
3724 Rep =
3725 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3726 } else if (Name.starts_with("avx512.mask.or.") ||
3727 Name.starts_with("avx512.mask.por.")) {
3728 VectorType *FTy = cast<VectorType>(CI->getType());
3730 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3731 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3732 Rep = Builder.CreateBitCast(Rep, FTy);
3733 Rep =
3734 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3735 } else if (Name.starts_with("avx512.mask.xor.") ||
3736 Name.starts_with("avx512.mask.pxor.")) {
3737 VectorType *FTy = cast<VectorType>(CI->getType());
3739 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3740 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3741 Rep = Builder.CreateBitCast(Rep, FTy);
3742 Rep =
3743 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3744 } else if (Name.starts_with("avx512.mask.padd.")) {
3745 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3746 Rep =
3747 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3748 } else if (Name.starts_with("avx512.mask.psub.")) {
3749 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3750 Rep =
3751 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3752 } else if (Name.starts_with("avx512.mask.pmull.")) {
3753 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3754 Rep =
3755 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3756 } else if (Name.starts_with("avx512.mask.add.p")) {
3757 if (Name.ends_with(".512")) {
3758 Intrinsic::ID IID;
3759 if (Name[17] == 's')
3760 IID = Intrinsic::x86_avx512_add_ps_512;
3761 else
3762 IID = Intrinsic::x86_avx512_add_pd_512;
3763
3764 Rep = Builder.CreateIntrinsic(
3765 IID,
3766 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3767 } else {
3768 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3769 }
3770 Rep =
3771 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3772 } else if (Name.starts_with("avx512.mask.div.p")) {
3773 if (Name.ends_with(".512")) {
3774 Intrinsic::ID IID;
3775 if (Name[17] == 's')
3776 IID = Intrinsic::x86_avx512_div_ps_512;
3777 else
3778 IID = Intrinsic::x86_avx512_div_pd_512;
3779
3780 Rep = Builder.CreateIntrinsic(
3781 IID,
3782 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3783 } else {
3784 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3785 }
3786 Rep =
3787 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3788 } else if (Name.starts_with("avx512.mask.mul.p")) {
3789 if (Name.ends_with(".512")) {
3790 Intrinsic::ID IID;
3791 if (Name[17] == 's')
3792 IID = Intrinsic::x86_avx512_mul_ps_512;
3793 else
3794 IID = Intrinsic::x86_avx512_mul_pd_512;
3795
3796 Rep = Builder.CreateIntrinsic(
3797 IID,
3798 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3799 } else {
3800 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3801 }
3802 Rep =
3803 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3804 } else if (Name.starts_with("avx512.mask.sub.p")) {
3805 if (Name.ends_with(".512")) {
3806 Intrinsic::ID IID;
3807 if (Name[17] == 's')
3808 IID = Intrinsic::x86_avx512_sub_ps_512;
3809 else
3810 IID = Intrinsic::x86_avx512_sub_pd_512;
3811
3812 Rep = Builder.CreateIntrinsic(
3813 IID,
3814 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3815 } else {
3816 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3817 }
3818 Rep =
3819 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3820 } else if ((Name.starts_with("avx512.mask.max.p") ||
3821 Name.starts_with("avx512.mask.min.p")) &&
3822 Name.drop_front(18) == ".512") {
3823 bool IsDouble = Name[17] == 'd';
3824 bool IsMin = Name[13] == 'i';
3825 static const Intrinsic::ID MinMaxTbl[2][2] = {
3826 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3827 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3828 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3829
3830 Rep = Builder.CreateIntrinsic(
3831 IID,
3832 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3833 Rep =
3834 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3835 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3836 Rep =
3837 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3838 {CI->getArgOperand(0), Builder.getInt1(false)});
3839 Rep =
3840 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3841 } else if (Name.starts_with("avx512.mask.psll")) {
3842 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3843 bool IsVariable = Name[16] == 'v';
3844 char Size = Name[16] == '.' ? Name[17]
3845 : Name[17] == '.' ? Name[18]
3846 : Name[18] == '.' ? Name[19]
3847 : Name[20];
3848
3849 Intrinsic::ID IID;
3850 if (IsVariable && Name[17] != '.') {
3851 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3852 IID = Intrinsic::x86_avx2_psllv_q;
3853 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3854 IID = Intrinsic::x86_avx2_psllv_q_256;
3855 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3856 IID = Intrinsic::x86_avx2_psllv_d;
3857 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3858 IID = Intrinsic::x86_avx2_psllv_d_256;
3859 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3860 IID = Intrinsic::x86_avx512_psllv_w_128;
3861 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3862 IID = Intrinsic::x86_avx512_psllv_w_256;
3863 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3864 IID = Intrinsic::x86_avx512_psllv_w_512;
3865 else
3866 llvm_unreachable("Unexpected size");
3867 } else if (Name.ends_with(".128")) {
3868 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3869 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3870 : Intrinsic::x86_sse2_psll_d;
3871 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3872 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3873 : Intrinsic::x86_sse2_psll_q;
3874 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3875 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3876 : Intrinsic::x86_sse2_psll_w;
3877 else
3878 llvm_unreachable("Unexpected size");
3879 } else if (Name.ends_with(".256")) {
3880 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3881 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3882 : Intrinsic::x86_avx2_psll_d;
3883 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3884 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3885 : Intrinsic::x86_avx2_psll_q;
3886 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3887 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3888 : Intrinsic::x86_avx2_psll_w;
3889 else
3890 llvm_unreachable("Unexpected size");
3891 } else {
3892 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3893 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3894 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3895 : Intrinsic::x86_avx512_psll_d_512;
3896 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3897 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3898 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3899 : Intrinsic::x86_avx512_psll_q_512;
3900 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3901 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3902 : Intrinsic::x86_avx512_psll_w_512;
3903 else
3904 llvm_unreachable("Unexpected size");
3905 }
3906
3907 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3908 } else if (Name.starts_with("avx512.mask.psrl")) {
3909 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3910 bool IsVariable = Name[16] == 'v';
3911 char Size = Name[16] == '.' ? Name[17]
3912 : Name[17] == '.' ? Name[18]
3913 : Name[18] == '.' ? Name[19]
3914 : Name[20];
3915
3916 Intrinsic::ID IID;
3917 if (IsVariable && Name[17] != '.') {
3918 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3919 IID = Intrinsic::x86_avx2_psrlv_q;
3920 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3921 IID = Intrinsic::x86_avx2_psrlv_q_256;
3922 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3923 IID = Intrinsic::x86_avx2_psrlv_d;
3924 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3925 IID = Intrinsic::x86_avx2_psrlv_d_256;
3926 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3927 IID = Intrinsic::x86_avx512_psrlv_w_128;
3928 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3929 IID = Intrinsic::x86_avx512_psrlv_w_256;
3930 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3931 IID = Intrinsic::x86_avx512_psrlv_w_512;
3932 else
3933 llvm_unreachable("Unexpected size");
3934 } else if (Name.ends_with(".128")) {
3935 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3936 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3937 : Intrinsic::x86_sse2_psrl_d;
3938 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3939 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3940 : Intrinsic::x86_sse2_psrl_q;
3941 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3942 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3943 : Intrinsic::x86_sse2_psrl_w;
3944 else
3945 llvm_unreachable("Unexpected size");
3946 } else if (Name.ends_with(".256")) {
3947 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3948 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3949 : Intrinsic::x86_avx2_psrl_d;
3950 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3951 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3952 : Intrinsic::x86_avx2_psrl_q;
3953 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3954 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3955 : Intrinsic::x86_avx2_psrl_w;
3956 else
3957 llvm_unreachable("Unexpected size");
3958 } else {
3959 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3960 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3961 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3962 : Intrinsic::x86_avx512_psrl_d_512;
3963 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3964 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3965 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3966 : Intrinsic::x86_avx512_psrl_q_512;
3967 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3968 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3969 : Intrinsic::x86_avx512_psrl_w_512;
3970 else
3971 llvm_unreachable("Unexpected size");
3972 }
3973
3974 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3975 } else if (Name.starts_with("avx512.mask.psra")) {
3976 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3977 bool IsVariable = Name[16] == 'v';
3978 char Size = Name[16] == '.' ? Name[17]
3979 : Name[17] == '.' ? Name[18]
3980 : Name[18] == '.' ? Name[19]
3981 : Name[20];
3982
3983 Intrinsic::ID IID;
3984 if (IsVariable && Name[17] != '.') {
3985 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3986 IID = Intrinsic::x86_avx2_psrav_d;
3987 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3988 IID = Intrinsic::x86_avx2_psrav_d_256;
3989 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3990 IID = Intrinsic::x86_avx512_psrav_w_128;
3991 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3992 IID = Intrinsic::x86_avx512_psrav_w_256;
3993 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3994 IID = Intrinsic::x86_avx512_psrav_w_512;
3995 else
3996 llvm_unreachable("Unexpected size");
3997 } else if (Name.ends_with(".128")) {
3998 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3999 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4000 : Intrinsic::x86_sse2_psra_d;
4001 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4002 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4003 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4004 : Intrinsic::x86_avx512_psra_q_128;
4005 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4006 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4007 : Intrinsic::x86_sse2_psra_w;
4008 else
4009 llvm_unreachable("Unexpected size");
4010 } else if (Name.ends_with(".256")) {
4011 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4012 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4013 : Intrinsic::x86_avx2_psra_d;
4014 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4015 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4016 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4017 : Intrinsic::x86_avx512_psra_q_256;
4018 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4019 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4020 : Intrinsic::x86_avx2_psra_w;
4021 else
4022 llvm_unreachable("Unexpected size");
4023 } else {
4024 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4025 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4026 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4027 : Intrinsic::x86_avx512_psra_d_512;
4028 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4029 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4030 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4031 : Intrinsic::x86_avx512_psra_q_512;
4032 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4033 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4034 : Intrinsic::x86_avx512_psra_w_512;
4035 else
4036 llvm_unreachable("Unexpected size");
4037 }
4038
4039 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4040 } else if (Name.starts_with("avx512.mask.move.s")) {
4041 Rep = upgradeMaskedMove(Builder, *CI);
4042 } else if (Name.starts_with("avx512.cvtmask2")) {
4043 Rep = upgradeMaskToInt(Builder, *CI);
4044 } else if (Name.ends_with(".movntdqa")) {
4046 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4047
4048 LoadInst *LI = Builder.CreateAlignedLoad(
4049 CI->getType(), CI->getArgOperand(0),
4051 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4052 Rep = LI;
4053 } else if (Name.starts_with("fma.vfmadd.") ||
4054 Name.starts_with("fma.vfmsub.") ||
4055 Name.starts_with("fma.vfnmadd.") ||
4056 Name.starts_with("fma.vfnmsub.")) {
4057 bool NegMul = Name[6] == 'n';
4058 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4059 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4060
4061 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4062 CI->getArgOperand(2)};
4063
4064 if (IsScalar) {
4065 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4066 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4067 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4068 }
4069
4070 if (NegMul && !IsScalar)
4071 Ops[0] = Builder.CreateFNeg(Ops[0]);
4072 if (NegMul && IsScalar)
4073 Ops[1] = Builder.CreateFNeg(Ops[1]);
4074 if (NegAcc)
4075 Ops[2] = Builder.CreateFNeg(Ops[2]);
4076
4077 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4078
4079 if (IsScalar)
4080 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4081 } else if (Name.starts_with("fma4.vfmadd.s")) {
4082 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4083 CI->getArgOperand(2)};
4084
4085 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4086 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4087 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4088
4089 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4090
4091 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4092 Rep, (uint64_t)0);
4093 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4094 Name.starts_with("avx512.maskz.vfmadd.s") ||
4095 Name.starts_with("avx512.mask3.vfmadd.s") ||
4096 Name.starts_with("avx512.mask3.vfmsub.s") ||
4097 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4098 bool IsMask3 = Name[11] == '3';
4099 bool IsMaskZ = Name[11] == 'z';
4100 // Drop the "avx512.mask." to make it easier.
4101 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4102 bool NegMul = Name[2] == 'n';
4103 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4104
4105 Value *A = CI->getArgOperand(0);
4106 Value *B = CI->getArgOperand(1);
4107 Value *C = CI->getArgOperand(2);
4108
4109 if (NegMul && (IsMask3 || IsMaskZ))
4110 A = Builder.CreateFNeg(A);
4111 if (NegMul && !(IsMask3 || IsMaskZ))
4112 B = Builder.CreateFNeg(B);
4113 if (NegAcc)
4114 C = Builder.CreateFNeg(C);
4115
4116 A = Builder.CreateExtractElement(A, (uint64_t)0);
4117 B = Builder.CreateExtractElement(B, (uint64_t)0);
4118 C = Builder.CreateExtractElement(C, (uint64_t)0);
4119
4120 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4121 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4122 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4123
4124 Intrinsic::ID IID;
4125 if (Name.back() == 'd')
4126 IID = Intrinsic::x86_avx512_vfmadd_f64;
4127 else
4128 IID = Intrinsic::x86_avx512_vfmadd_f32;
4129 Rep = Builder.CreateIntrinsic(IID, Ops);
4130 } else {
4131 Rep = Builder.CreateFMA(A, B, C);
4132 }
4133
4134 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4135 : IsMask3 ? C
4136 : A;
4137
4138 // For Mask3 with NegAcc, we need to create a new extractelement that
4139 // avoids the negation above.
4140 if (NegAcc && IsMask3)
4141 PassThru =
4142 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4143
4144 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4145 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4146 (uint64_t)0);
4147 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4148 Name.starts_with("avx512.mask.vfnmadd.p") ||
4149 Name.starts_with("avx512.mask.vfnmsub.p") ||
4150 Name.starts_with("avx512.mask3.vfmadd.p") ||
4151 Name.starts_with("avx512.mask3.vfmsub.p") ||
4152 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4153 Name.starts_with("avx512.maskz.vfmadd.p")) {
4154 bool IsMask3 = Name[11] == '3';
4155 bool IsMaskZ = Name[11] == 'z';
4156 // Drop the "avx512.mask." to make it easier.
4157 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4158 bool NegMul = Name[2] == 'n';
4159 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4160
4161 Value *A = CI->getArgOperand(0);
4162 Value *B = CI->getArgOperand(1);
4163 Value *C = CI->getArgOperand(2);
4164
4165 if (NegMul && (IsMask3 || IsMaskZ))
4166 A = Builder.CreateFNeg(A);
4167 if (NegMul && !(IsMask3 || IsMaskZ))
4168 B = Builder.CreateFNeg(B);
4169 if (NegAcc)
4170 C = Builder.CreateFNeg(C);
4171
4172 if (CI->arg_size() == 5 &&
4173 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4174 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4175 Intrinsic::ID IID;
4176 // Check the character before ".512" in string.
4177 if (Name[Name.size() - 5] == 's')
4178 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4179 else
4180 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4181
4182 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4183 } else {
4184 Rep = Builder.CreateFMA(A, B, C);
4185 }
4186
4187 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4188 : IsMask3 ? CI->getArgOperand(2)
4189 : CI->getArgOperand(0);
4190
4191 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4192 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4193 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4194 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4195 Intrinsic::ID IID;
4196 if (VecWidth == 128 && EltWidth == 32)
4197 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4198 else if (VecWidth == 256 && EltWidth == 32)
4199 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4200 else if (VecWidth == 128 && EltWidth == 64)
4201 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4202 else if (VecWidth == 256 && EltWidth == 64)
4203 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4204 else
4205 llvm_unreachable("Unexpected intrinsic");
4206
4207 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4208 CI->getArgOperand(2)};
4209 Ops[2] = Builder.CreateFNeg(Ops[2]);
4210 Rep = Builder.CreateIntrinsic(IID, Ops);
4211 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4212 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4213 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4214 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4215 bool IsMask3 = Name[11] == '3';
4216 bool IsMaskZ = Name[11] == 'z';
4217 // Drop the "avx512.mask." to make it easier.
4218 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4219 bool IsSubAdd = Name[3] == 's';
4220 if (CI->arg_size() == 5) {
4221 Intrinsic::ID IID;
4222 // Check the character before ".512" in string.
4223 if (Name[Name.size() - 5] == 's')
4224 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4225 else
4226 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4227
4228 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4229 CI->getArgOperand(2), CI->getArgOperand(4)};
4230 if (IsSubAdd)
4231 Ops[2] = Builder.CreateFNeg(Ops[2]);
4232
4233 Rep = Builder.CreateIntrinsic(IID, Ops);
4234 } else {
4235 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4236
4237 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4238 CI->getArgOperand(2)};
4239
4241 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4242 Value *Odd = Builder.CreateCall(FMA, Ops);
4243 Ops[2] = Builder.CreateFNeg(Ops[2]);
4244 Value *Even = Builder.CreateCall(FMA, Ops);
4245
4246 if (IsSubAdd)
4247 std::swap(Even, Odd);
4248
4249 SmallVector<int, 32> Idxs(NumElts);
4250 for (int i = 0; i != NumElts; ++i)
4251 Idxs[i] = i + (i % 2) * NumElts;
4252
4253 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4254 }
4255
4256 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4257 : IsMask3 ? CI->getArgOperand(2)
4258 : CI->getArgOperand(0);
4259
4260 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4261 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4262 Name.starts_with("avx512.maskz.pternlog.")) {
4263 bool ZeroMask = Name[11] == 'z';
4264 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4265 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4266 Intrinsic::ID IID;
4267 if (VecWidth == 128 && EltWidth == 32)
4268 IID = Intrinsic::x86_avx512_pternlog_d_128;
4269 else if (VecWidth == 256 && EltWidth == 32)
4270 IID = Intrinsic::x86_avx512_pternlog_d_256;
4271 else if (VecWidth == 512 && EltWidth == 32)
4272 IID = Intrinsic::x86_avx512_pternlog_d_512;
4273 else if (VecWidth == 128 && EltWidth == 64)
4274 IID = Intrinsic::x86_avx512_pternlog_q_128;
4275 else if (VecWidth == 256 && EltWidth == 64)
4276 IID = Intrinsic::x86_avx512_pternlog_q_256;
4277 else if (VecWidth == 512 && EltWidth == 64)
4278 IID = Intrinsic::x86_avx512_pternlog_q_512;
4279 else
4280 llvm_unreachable("Unexpected intrinsic");
4281
4282 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4283 CI->getArgOperand(2), CI->getArgOperand(3)};
4284 Rep = Builder.CreateIntrinsic(IID, Args);
4285 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4286 : CI->getArgOperand(0);
4287 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4288 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4289 Name.starts_with("avx512.maskz.vpmadd52")) {
4290 bool ZeroMask = Name[11] == 'z';
4291 bool High = Name[20] == 'h' || Name[21] == 'h';
4292 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4293 Intrinsic::ID IID;
4294 if (VecWidth == 128 && !High)
4295 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4296 else if (VecWidth == 256 && !High)
4297 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4298 else if (VecWidth == 512 && !High)
4299 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4300 else if (VecWidth == 128 && High)
4301 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4302 else if (VecWidth == 256 && High)
4303 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4304 else if (VecWidth == 512 && High)
4305 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4306 else
4307 llvm_unreachable("Unexpected intrinsic");
4308
4309 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4310 CI->getArgOperand(2)};
4311 Rep = Builder.CreateIntrinsic(IID, Args);
4312 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4313 : CI->getArgOperand(0);
4314 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4315 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4316 Name.starts_with("avx512.mask.vpermt2var.") ||
4317 Name.starts_with("avx512.maskz.vpermt2var.")) {
4318 bool ZeroMask = Name[11] == 'z';
4319 bool IndexForm = Name[17] == 'i';
4320 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4321 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4322 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4323 Name.starts_with("avx512.mask.vpdpbusds.") ||
4324 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4325 bool ZeroMask = Name[11] == 'z';
4326 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4327 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4328 Intrinsic::ID IID;
4329 if (VecWidth == 128 && !IsSaturating)
4330 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4331 else if (VecWidth == 256 && !IsSaturating)
4332 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4333 else if (VecWidth == 512 && !IsSaturating)
4334 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4335 else if (VecWidth == 128 && IsSaturating)
4336 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4337 else if (VecWidth == 256 && IsSaturating)
4338 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4339 else if (VecWidth == 512 && IsSaturating)
4340 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4341 else
4342 llvm_unreachable("Unexpected intrinsic");
4343
4344 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4345 CI->getArgOperand(2)};
4346
4347 // Input arguments types were incorrectly set to vectors of i32 before but
4348 // they should be vectors of i8. Insert bit cast when encountering the old
4349 // types
4350 if (Args[1]->getType()->isVectorTy() &&
4351 cast<VectorType>(Args[1]->getType())
4352 ->getElementType()
4353 ->isIntegerTy(32) &&
4354 Args[2]->getType()->isVectorTy() &&
4355 cast<VectorType>(Args[2]->getType())
4356 ->getElementType()
4357 ->isIntegerTy(32)) {
4358 Type *NewArgType = nullptr;
4359 if (VecWidth == 128)
4360 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4361 else if (VecWidth == 256)
4362 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4363 else if (VecWidth == 512)
4364 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4365 else
4366 llvm_unreachable("Unexpected vector bit width");
4367
4368 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4369 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4370 }
4371
4372 Rep = Builder.CreateIntrinsic(IID, Args);
4373 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4374 : CI->getArgOperand(0);
4375 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4376 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4377 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4378 Name.starts_with("avx512.mask.vpdpwssds.") ||
4379 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4380 bool ZeroMask = Name[11] == 'z';
4381 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4382 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4383 Intrinsic::ID IID;
4384 if (VecWidth == 128 && !IsSaturating)
4385 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4386 else if (VecWidth == 256 && !IsSaturating)
4387 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4388 else if (VecWidth == 512 && !IsSaturating)
4389 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4390 else if (VecWidth == 128 && IsSaturating)
4391 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4392 else if (VecWidth == 256 && IsSaturating)
4393 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4394 else if (VecWidth == 512 && IsSaturating)
4395 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4396 else
4397 llvm_unreachable("Unexpected intrinsic");
4398
4399 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4400 CI->getArgOperand(2)};
4401
4402 // Input arguments types were incorrectly set to vectors of i32 before but
4403 // they should be vectors of i16. Insert bit cast when encountering the old
4404 // types
4405 if (Args[1]->getType()->isVectorTy() &&
4406 cast<VectorType>(Args[1]->getType())
4407 ->getElementType()
4408 ->isIntegerTy(32) &&
4409 Args[2]->getType()->isVectorTy() &&
4410 cast<VectorType>(Args[2]->getType())
4411 ->getElementType()
4412 ->isIntegerTy(32)) {
4413 Type *NewArgType = nullptr;
4414 if (VecWidth == 128)
4415 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4416 else if (VecWidth == 256)
4417 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4418 else if (VecWidth == 512)
4419 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4420 else
4421 llvm_unreachable("Unexpected vector bit width");
4422
4423 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4424 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4425 }
4426
4427 Rep = Builder.CreateIntrinsic(IID, Args);
4428 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4429 : CI->getArgOperand(0);
4430 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4431 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4432 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4433 Name == "subborrow.u32" || Name == "subborrow.u64") {
4434 Intrinsic::ID IID;
4435 if (Name[0] == 'a' && Name.back() == '2')
4436 IID = Intrinsic::x86_addcarry_32;
4437 else if (Name[0] == 'a' && Name.back() == '4')
4438 IID = Intrinsic::x86_addcarry_64;
4439 else if (Name[0] == 's' && Name.back() == '2')
4440 IID = Intrinsic::x86_subborrow_32;
4441 else if (Name[0] == 's' && Name.back() == '4')
4442 IID = Intrinsic::x86_subborrow_64;
4443 else
4444 llvm_unreachable("Unexpected intrinsic");
4445
4446 // Make a call with 3 operands.
4447 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4448 CI->getArgOperand(2)};
4449 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4450
4451 // Extract the second result and store it.
4452 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4453 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4454 // Replace the original call result with the first result of the new call.
4455 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4456
4457 CI->replaceAllUsesWith(CF);
4458 Rep = nullptr;
4459 } else if (Name.starts_with("avx512.mask.") &&
4460 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4461 // Rep will be updated by the call in the condition.
4462 }
4463
4464 return Rep;
4465}
4466
4468 Function *F, IRBuilder<> &Builder) {
4469 if (Name.starts_with("neon.bfcvt")) {
4470 if (Name.starts_with("neon.bfcvtn2")) {
4471 SmallVector<int, 32> LoMask(4);
4472 std::iota(LoMask.begin(), LoMask.end(), 0);
4473 SmallVector<int, 32> ConcatMask(8);
4474 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4475 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4476 Value *Trunc =
4477 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4478 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4479 } else if (Name.starts_with("neon.bfcvtn")) {
4480 SmallVector<int, 32> ConcatMask(8);
4481 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4482 Type *V4BF16 =
4483 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4484 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4485 dbgs() << "Trunc: " << *Trunc << "\n";
4486 return Builder.CreateShuffleVector(
4487 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4488 } else {
4489 return Builder.CreateFPTrunc(CI->getOperand(0),
4490 Type::getBFloatTy(F->getContext()));
4491 }
4492 } else if (Name.starts_with("sve.fcvt")) {
4493 Intrinsic::ID NewID =
4495 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4496 .Case("sve.fcvtnt.bf16f32",
4497 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4499 if (NewID == Intrinsic::not_intrinsic)
4500 llvm_unreachable("Unhandled Intrinsic!");
4501
4502 SmallVector<Value *, 3> Args(CI->args());
4503
4504 // The original intrinsics incorrectly used a predicate based on the
4505 // smallest element type rather than the largest.
4506 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4507 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4508
4509 if (Args[1]->getType() != BadPredTy)
4510 llvm_unreachable("Unexpected predicate type!");
4511
4512 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4513 BadPredTy, Args[1]);
4514 Args[1] = Builder.CreateIntrinsic(
4515 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4516
4517 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4518 CI->getName());
4519 }
4520
4521 llvm_unreachable("Unhandled Intrinsic!");
4522}
4523
4525 IRBuilder<> &Builder) {
4526 if (Name == "mve.vctp64.old") {
4527 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4528 // correct type.
4529 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4530 CI->getArgOperand(0),
4531 /*FMFSource=*/nullptr, CI->getName());
4532 Value *C1 = Builder.CreateIntrinsic(
4533 Intrinsic::arm_mve_pred_v2i,
4534 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4535 return Builder.CreateIntrinsic(
4536 Intrinsic::arm_mve_pred_i2v,
4537 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4538 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4539 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4540 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4541 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4542 Name ==
4543 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4544 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4545 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4546 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4547 Name ==
4548 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4549 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4550 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4551 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4552 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4553 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4554 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4555 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4556 std::vector<Type *> Tys;
4557 unsigned ID = CI->getIntrinsicID();
4558 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4559 switch (ID) {
4560 case Intrinsic::arm_mve_mull_int_predicated:
4561 case Intrinsic::arm_mve_vqdmull_predicated:
4562 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4563 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4564 break;
4565 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4566 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4567 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4568 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4569 V2I1Ty};
4570 break;
4571 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4572 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4573 CI->getOperand(1)->getType(), V2I1Ty};
4574 break;
4575 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4576 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4577 CI->getOperand(2)->getType(), V2I1Ty};
4578 break;
4579 case Intrinsic::arm_cde_vcx1q_predicated:
4580 case Intrinsic::arm_cde_vcx1qa_predicated:
4581 case Intrinsic::arm_cde_vcx2q_predicated:
4582 case Intrinsic::arm_cde_vcx2qa_predicated:
4583 case Intrinsic::arm_cde_vcx3q_predicated:
4584 case Intrinsic::arm_cde_vcx3qa_predicated:
4585 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4586 break;
4587 default:
4588 llvm_unreachable("Unhandled Intrinsic!");
4589 }
4590
4591 std::vector<Value *> Ops;
4592 for (Value *Op : CI->args()) {
4593 Type *Ty = Op->getType();
4594 if (Ty->getScalarSizeInBits() == 1) {
4595 Value *C1 = Builder.CreateIntrinsic(
4596 Intrinsic::arm_mve_pred_v2i,
4597 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4598 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4599 }
4600 Ops.push_back(Op);
4601 }
4602
4603 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4604 CI->getName());
4605 }
4606 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4607}
4608
4609// These are expected to have the arguments:
4610// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4611//
4612// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4613//
4615 Function *F, IRBuilder<> &Builder) {
4616 AtomicRMWInst::BinOp RMWOp =
4618 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4619 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4620 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4621 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4622 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4623 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4624 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4625 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4626 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4627 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4628 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4629 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4630 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4631
4632 unsigned NumOperands = CI->getNumOperands();
4633 if (NumOperands < 3) // Malformed bitcode.
4634 return nullptr;
4635
4636 Value *Ptr = CI->getArgOperand(0);
4637 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4638 if (!PtrTy) // Malformed.
4639 return nullptr;
4640
4641 Value *Val = CI->getArgOperand(1);
4642 if (Val->getType() != CI->getType()) // Malformed.
4643 return nullptr;
4644
4645 ConstantInt *OrderArg = nullptr;
4646 bool IsVolatile = false;
4647
4648 // These should have 5 arguments (plus the callee). A separate version of the
4649 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4650 if (NumOperands > 3)
4651 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4652
4653 // Ignore scope argument at 3
4654
4655 if (NumOperands > 5) {
4656 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4657 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4658 }
4659
4661 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4662 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4665
4666 LLVMContext &Ctx = F->getContext();
4667
4668 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4669 Type *RetTy = CI->getType();
4670 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4671 if (VT->getElementType()->isIntegerTy(16)) {
4672 VectorType *AsBF16 =
4673 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4674 Val = Builder.CreateBitCast(Val, AsBF16);
4675 }
4676 }
4677
4678 // The scope argument never really worked correctly. Use agent as the most
4679 // conservative option which should still always produce the instruction.
4680 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4681 AtomicRMWInst *RMW =
4682 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4683
4684 unsigned AddrSpace = PtrTy->getAddressSpace();
4685 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4686 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4687 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4688 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4689 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4690 }
4691
4692 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4693 MDBuilder MDB(F->getContext());
4694 MDNode *RangeNotPrivate =
4697 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4698 }
4699
4700 if (IsVolatile)
4701 RMW->setVolatile(true);
4702
4703 return Builder.CreateBitCast(RMW, RetTy);
4704}
4705
4706/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4707/// plain MDNode, as it's the verifier's job to check these are the correct
4708/// types later.
4709static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4710 if (Op < CI->arg_size()) {
4711 if (MetadataAsValue *MAV =
4713 Metadata *MD = MAV->getMetadata();
4714 return dyn_cast_if_present<MDNode>(MD);
4715 }
4716 }
4717 return nullptr;
4718}
4719
4720/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4721static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4722 if (Op < CI->arg_size())
4724 return MAV->getMetadata();
4725 return nullptr;
4726}
4727
4729 // The MDNode attached to this instruction might not be the correct type,
4730 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4731 return I->getDebugLoc().getAsMDNode();
4732}
4733
4734/// Convert debug intrinsic calls to non-instruction debug records.
4735/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4736/// \p CI - The debug intrinsic call.
4738 DbgRecord *DR = nullptr;
4739 if (Name == "label") {
4741 CI->getDebugLoc());
4742 } else if (Name == "assign") {
4745 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4746 unwrapMAVMetadataOp(CI, 4),
4747 /*The address is a Value ref, it will be stored as a Metadata */
4748 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4749 } else if (Name == "declare") {
4752 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4753 getDebugLocSafe(CI));
4754 } else if (Name == "addr") {
4755 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4756 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4757 // Don't try to add something to the expression if it's not an expression.
4758 // Instead, allow the verifier to fail later.
4759 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4760 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4761 }
4764 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4765 getDebugLocSafe(CI));
4766 } else if (Name == "value") {
4767 // An old version of dbg.value had an extra offset argument.
4768 unsigned VarOp = 1;
4769 unsigned ExprOp = 2;
4770 if (CI->arg_size() == 4) {
4772 // Nonzero offset dbg.values get dropped without a replacement.
4773 if (!Offset || !Offset->isZeroValue())
4774 return;
4775 VarOp = 2;
4776 ExprOp = 3;
4777 }
4780 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4781 nullptr, getDebugLocSafe(CI));
4782 }
4783 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4784 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4785}
4786
4787/// Upgrade a call to an old intrinsic. All argument and return casting must be
4788/// provided to seamlessly integrate with existing context.
4790 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4791 // checks the callee's function type matches. It's likely we need to handle
4792 // type changes here.
4794 if (!F)
4795 return;
4796
4797 LLVMContext &C = CI->getContext();
4798 IRBuilder<> Builder(C);
4799 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4800
4801 if (!NewFn) {
4802 // Get the Function's name.
4803 StringRef Name = F->getName();
4804
4805 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4806 Name = Name.substr(5);
4807
4808 bool IsX86 = Name.consume_front("x86.");
4809 bool IsNVVM = Name.consume_front("nvvm.");
4810 bool IsAArch64 = Name.consume_front("aarch64.");
4811 bool IsARM = Name.consume_front("arm.");
4812 bool IsAMDGCN = Name.consume_front("amdgcn.");
4813 bool IsDbg = Name.consume_front("dbg.");
4814 Value *Rep = nullptr;
4815
4816 if (!IsX86 && Name == "stackprotectorcheck") {
4817 Rep = nullptr;
4818 } else if (IsNVVM) {
4819 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4820 } else if (IsX86) {
4821 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4822 } else if (IsAArch64) {
4823 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4824 } else if (IsARM) {
4825 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4826 } else if (IsAMDGCN) {
4827 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4828 } else if (IsDbg) {
4830 } else {
4831 llvm_unreachable("Unknown function for CallBase upgrade.");
4832 }
4833
4834 if (Rep)
4835 CI->replaceAllUsesWith(Rep);
4836 CI->eraseFromParent();
4837 return;
4838 }
4839
4840 const auto &DefaultCase = [&]() -> void {
4841 if (F == NewFn)
4842 return;
4843
4844 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4845 // Handle generic mangling change.
4846 assert(
4847 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4848 "Unknown function for CallBase upgrade and isn't just a name change");
4849 CI->setCalledFunction(NewFn);
4850 return;
4851 }
4852
4853 // This must be an upgrade from a named to a literal struct.
4854 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4855 assert(OldST != NewFn->getReturnType() &&
4856 "Return type must have changed");
4857 assert(OldST->getNumElements() ==
4858 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4859 "Must have same number of elements");
4860
4861 SmallVector<Value *> Args(CI->args());
4862 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4863 NewCI->setAttributes(CI->getAttributes());
4864 Value *Res = PoisonValue::get(OldST);
4865 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4866 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4867 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4868 }
4869 CI->replaceAllUsesWith(Res);
4870 CI->eraseFromParent();
4871 return;
4872 }
4873
4874 // We're probably about to produce something invalid. Let the verifier catch
4875 // it instead of dying here.
4876 CI->setCalledOperand(
4878 return;
4879 };
4880 CallInst *NewCall = nullptr;
4881 switch (NewFn->getIntrinsicID()) {
4882 default: {
4883 DefaultCase();
4884 return;
4885 }
4886 case Intrinsic::arm_neon_vst1:
4887 case Intrinsic::arm_neon_vst2:
4888 case Intrinsic::arm_neon_vst3:
4889 case Intrinsic::arm_neon_vst4:
4890 case Intrinsic::arm_neon_vst2lane:
4891 case Intrinsic::arm_neon_vst3lane:
4892 case Intrinsic::arm_neon_vst4lane: {
4893 SmallVector<Value *, 4> Args(CI->args());
4894 NewCall = Builder.CreateCall(NewFn, Args);
4895 break;
4896 }
4897 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4898 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4899 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4900 LLVMContext &Ctx = F->getParent()->getContext();
4901 SmallVector<Value *, 4> Args(CI->args());
4902 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4903 cast<ConstantInt>(Args[3])->getZExtValue());
4904 NewCall = Builder.CreateCall(NewFn, Args);
4905 break;
4906 }
4907 case Intrinsic::aarch64_sve_ld3_sret:
4908 case Intrinsic::aarch64_sve_ld4_sret:
4909 case Intrinsic::aarch64_sve_ld2_sret: {
4910 StringRef Name = F->getName();
4911 Name = Name.substr(5);
4912 unsigned N = StringSwitch<unsigned>(Name)
4913 .StartsWith("aarch64.sve.ld2", 2)
4914 .StartsWith("aarch64.sve.ld3", 3)
4915 .StartsWith("aarch64.sve.ld4", 4)
4916 .Default(0);
4917 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4918 unsigned MinElts = RetTy->getMinNumElements() / N;
4919 SmallVector<Value *, 2> Args(CI->args());
4920 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4921 Value *Ret = llvm::PoisonValue::get(RetTy);
4922 for (unsigned I = 0; I < N; I++) {
4923 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4924 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4925 }
4926 NewCall = dyn_cast<CallInst>(Ret);
4927 break;
4928 }
4929
4930 case Intrinsic::coro_end: {
4931 SmallVector<Value *, 3> Args(CI->args());
4932 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4933 NewCall = Builder.CreateCall(NewFn, Args);
4934 break;
4935 }
4936
4937 case Intrinsic::vector_extract: {
4938 StringRef Name = F->getName();
4939 Name = Name.substr(5); // Strip llvm
4940 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4941 DefaultCase();
4942 return;
4943 }
4944 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4945 unsigned MinElts = RetTy->getMinNumElements();
4946 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4947 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4948 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4949 break;
4950 }
4951
4952 case Intrinsic::vector_insert: {
4953 StringRef Name = F->getName();
4954 Name = Name.substr(5);
4955 if (!Name.starts_with("aarch64.sve.tuple")) {
4956 DefaultCase();
4957 return;
4958 }
4959 if (Name.starts_with("aarch64.sve.tuple.set")) {
4960 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4961 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4962 Value *NewIdx =
4963 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4964 NewCall = Builder.CreateCall(
4965 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4966 break;
4967 }
4968 if (Name.starts_with("aarch64.sve.tuple.create")) {
4969 unsigned N = StringSwitch<unsigned>(Name)
4970 .StartsWith("aarch64.sve.tuple.create2", 2)
4971 .StartsWith("aarch64.sve.tuple.create3", 3)
4972 .StartsWith("aarch64.sve.tuple.create4", 4)
4973 .Default(0);
4974 assert(N > 1 && "Create is expected to be between 2-4");
4975 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4976 Value *Ret = llvm::PoisonValue::get(RetTy);
4977 unsigned MinElts = RetTy->getMinNumElements() / N;
4978 for (unsigned I = 0; I < N; I++) {
4979 Value *V = CI->getArgOperand(I);
4980 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4981 }
4982 NewCall = dyn_cast<CallInst>(Ret);
4983 }
4984 break;
4985 }
4986
4987 case Intrinsic::arm_neon_bfdot:
4988 case Intrinsic::arm_neon_bfmmla:
4989 case Intrinsic::arm_neon_bfmlalb:
4990 case Intrinsic::arm_neon_bfmlalt:
4991 case Intrinsic::aarch64_neon_bfdot:
4992 case Intrinsic::aarch64_neon_bfmmla:
4993 case Intrinsic::aarch64_neon_bfmlalb:
4994 case Intrinsic::aarch64_neon_bfmlalt: {
4996 assert(CI->arg_size() == 3 &&
4997 "Mismatch between function args and call args");
4998 size_t OperandWidth =
5000 assert((OperandWidth == 64 || OperandWidth == 128) &&
5001 "Unexpected operand width");
5002 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5003 auto Iter = CI->args().begin();
5004 Args.push_back(*Iter++);
5005 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5006 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5007 NewCall = Builder.CreateCall(NewFn, Args);
5008 break;
5009 }
5010
5011 case Intrinsic::bitreverse:
5012 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5013 break;
5014
5015 case Intrinsic::ctlz:
5016 case Intrinsic::cttz: {
5017 if (CI->arg_size() != 1) {
5018 DefaultCase();
5019 return;
5020 }
5021
5022 NewCall =
5023 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5024 break;
5025 }
5026
5027 case Intrinsic::objectsize: {
5028 Value *NullIsUnknownSize =
5029 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5030 Value *Dynamic =
5031 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5032 NewCall = Builder.CreateCall(
5033 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5034 break;
5035 }
5036
5037 case Intrinsic::ctpop:
5038 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5039 break;
5040
5041 case Intrinsic::convert_from_fp16:
5042 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5043 break;
5044
5045 case Intrinsic::dbg_value: {
5046 StringRef Name = F->getName();
5047 Name = Name.substr(5); // Strip llvm.
5048 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5049 if (Name.starts_with("dbg.addr")) {
5051 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5052 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5053 NewCall =
5054 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5055 MetadataAsValue::get(C, Expr)});
5056 break;
5057 }
5058
5059 // Upgrade from the old version that had an extra offset argument.
5060 assert(CI->arg_size() == 4);
5061 // Drop nonzero offsets instead of attempting to upgrade them.
5063 if (Offset->isZeroValue()) {
5064 NewCall = Builder.CreateCall(
5065 NewFn,
5066 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5067 break;
5068 }
5069 CI->eraseFromParent();
5070 return;
5071 }
5072
5073 case Intrinsic::ptr_annotation:
5074 // Upgrade from versions that lacked the annotation attribute argument.
5075 if (CI->arg_size() != 4) {
5076 DefaultCase();
5077 return;
5078 }
5079
5080 // Create a new call with an added null annotation attribute argument.
5081 NewCall = Builder.CreateCall(
5082 NewFn,
5083 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5084 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5085 NewCall->takeName(CI);
5086 CI->replaceAllUsesWith(NewCall);
5087 CI->eraseFromParent();
5088 return;
5089
5090 case Intrinsic::var_annotation:
5091 // Upgrade from versions that lacked the annotation attribute argument.
5092 if (CI->arg_size() != 4) {
5093 DefaultCase();
5094 return;
5095 }
5096 // Create a new call with an added null annotation attribute argument.
5097 NewCall = Builder.CreateCall(
5098 NewFn,
5099 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5100 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5101 NewCall->takeName(CI);
5102 CI->replaceAllUsesWith(NewCall);
5103 CI->eraseFromParent();
5104 return;
5105
5106 case Intrinsic::riscv_aes32dsi:
5107 case Intrinsic::riscv_aes32dsmi:
5108 case Intrinsic::riscv_aes32esi:
5109 case Intrinsic::riscv_aes32esmi:
5110 case Intrinsic::riscv_sm4ks:
5111 case Intrinsic::riscv_sm4ed: {
5112 // The last argument to these intrinsics used to be i8 and changed to i32.
5113 // The type overload for sm4ks and sm4ed was removed.
5114 Value *Arg2 = CI->getArgOperand(2);
5115 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5116 return;
5117
5118 Value *Arg0 = CI->getArgOperand(0);
5119 Value *Arg1 = CI->getArgOperand(1);
5120 if (CI->getType()->isIntegerTy(64)) {
5121 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5122 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5123 }
5124
5125 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5126 cast<ConstantInt>(Arg2)->getZExtValue());
5127
5128 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5129 Value *Res = NewCall;
5130 if (Res->getType() != CI->getType())
5131 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5132 NewCall->takeName(CI);
5133 CI->replaceAllUsesWith(Res);
5134 CI->eraseFromParent();
5135 return;
5136 }
5137 case Intrinsic::nvvm_mapa_shared_cluster: {
5138 // Create a new call with the correct address space.
5139 NewCall =
5140 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5141 Value *Res = NewCall;
5142 Res = Builder.CreateAddrSpaceCast(
5143 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5144 NewCall->takeName(CI);
5145 CI->replaceAllUsesWith(Res);
5146 CI->eraseFromParent();
5147 return;
5148 }
5149 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5150 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5151 // Create a new call with the correct address space.
5152 SmallVector<Value *, 4> Args(CI->args());
5153 Args[0] = Builder.CreateAddrSpaceCast(
5154 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5155
5156 NewCall = Builder.CreateCall(NewFn, Args);
5157 NewCall->takeName(CI);
5158 CI->replaceAllUsesWith(NewCall);
5159 CI->eraseFromParent();
5160 return;
5161 }
5162 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5163 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5164 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5165 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5166 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5167 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5168 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5169 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5170 SmallVector<Value *, 16> Args(CI->args());
5171
5172 // Create AddrSpaceCast to shared_cluster if needed.
5173 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5174 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5176 Args[0] = Builder.CreateAddrSpaceCast(
5177 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5178
5179 // Attach the flag argument for cta_group, with a
5180 // default value of 0. This handles case (2) in
5181 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5182 size_t NumArgs = CI->arg_size();
5183 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5184 if (!FlagArg->getType()->isIntegerTy(1))
5185 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5186
5187 NewCall = Builder.CreateCall(NewFn, Args);
5188 NewCall->takeName(CI);
5189 CI->replaceAllUsesWith(NewCall);
5190 CI->eraseFromParent();
5191 return;
5192 }
5193 case Intrinsic::riscv_sha256sig0:
5194 case Intrinsic::riscv_sha256sig1:
5195 case Intrinsic::riscv_sha256sum0:
5196 case Intrinsic::riscv_sha256sum1:
5197 case Intrinsic::riscv_sm3p0:
5198 case Intrinsic::riscv_sm3p1: {
5199 // The last argument to these intrinsics used to be i8 and changed to i32.
5200 // The type overload for sm4ks and sm4ed was removed.
5201 if (!CI->getType()->isIntegerTy(64))
5202 return;
5203
5204 Value *Arg =
5205 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5206
5207 NewCall = Builder.CreateCall(NewFn, Arg);
5208 Value *Res =
5209 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5210 NewCall->takeName(CI);
5211 CI->replaceAllUsesWith(Res);
5212 CI->eraseFromParent();
5213 return;
5214 }
5215
5216 case Intrinsic::x86_xop_vfrcz_ss:
5217 case Intrinsic::x86_xop_vfrcz_sd:
5218 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5219 break;
5220
5221 case Intrinsic::x86_xop_vpermil2pd:
5222 case Intrinsic::x86_xop_vpermil2ps:
5223 case Intrinsic::x86_xop_vpermil2pd_256:
5224 case Intrinsic::x86_xop_vpermil2ps_256: {
5225 SmallVector<Value *, 4> Args(CI->args());
5226 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5227 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5228 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5229 NewCall = Builder.CreateCall(NewFn, Args);
5230 break;
5231 }
5232
5233 case Intrinsic::x86_sse41_ptestc:
5234 case Intrinsic::x86_sse41_ptestz:
5235 case Intrinsic::x86_sse41_ptestnzc: {
5236 // The arguments for these intrinsics used to be v4f32, and changed
5237 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5238 // So, the only thing required is a bitcast for both arguments.
5239 // First, check the arguments have the old type.
5240 Value *Arg0 = CI->getArgOperand(0);
5241 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5242 return;
5243
5244 // Old intrinsic, add bitcasts
5245 Value *Arg1 = CI->getArgOperand(1);
5246
5247 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5248
5249 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5250 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5251
5252 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5253 break;
5254 }
5255
5256 case Intrinsic::x86_rdtscp: {
5257 // This used to take 1 arguments. If we have no arguments, it is already
5258 // upgraded.
5259 if (CI->getNumOperands() == 0)
5260 return;
5261
5262 NewCall = Builder.CreateCall(NewFn);
5263 // Extract the second result and store it.
5264 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5265 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5266 // Replace the original call result with the first result of the new call.
5267 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5268
5269 NewCall->takeName(CI);
5270 CI->replaceAllUsesWith(TSC);
5271 CI->eraseFromParent();
5272 return;
5273 }
5274
5275 case Intrinsic::x86_sse41_insertps:
5276 case Intrinsic::x86_sse41_dppd:
5277 case Intrinsic::x86_sse41_dpps:
5278 case Intrinsic::x86_sse41_mpsadbw:
5279 case Intrinsic::x86_avx_dp_ps_256:
5280 case Intrinsic::x86_avx2_mpsadbw: {
5281 // Need to truncate the last argument from i32 to i8 -- this argument models
5282 // an inherently 8-bit immediate operand to these x86 instructions.
5283 SmallVector<Value *, 4> Args(CI->args());
5284
5285 // Replace the last argument with a trunc.
5286 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5287 NewCall = Builder.CreateCall(NewFn, Args);
5288 break;
5289 }
5290
5291 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5292 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5293 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5294 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5295 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5296 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5297 SmallVector<Value *, 4> Args(CI->args());
5298 unsigned NumElts =
5299 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5300 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5301
5302 NewCall = Builder.CreateCall(NewFn, Args);
5303 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5304
5305 NewCall->takeName(CI);
5306 CI->replaceAllUsesWith(Res);
5307 CI->eraseFromParent();
5308 return;
5309 }
5310
5311 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5312 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5313 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5314 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5315 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5316 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5317 SmallVector<Value *, 4> Args(CI->args());
5318 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5319 if (NewFn->getIntrinsicID() ==
5320 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5321 Args[1] = Builder.CreateBitCast(
5322 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5323
5324 NewCall = Builder.CreateCall(NewFn, Args);
5325 Value *Res = Builder.CreateBitCast(
5326 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5327
5328 NewCall->takeName(CI);
5329 CI->replaceAllUsesWith(Res);
5330 CI->eraseFromParent();
5331 return;
5332 }
5333 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5334 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5335 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5336 SmallVector<Value *, 4> Args(CI->args());
5337 unsigned NumElts =
5338 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5339 Args[1] = Builder.CreateBitCast(
5340 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5341 Args[2] = Builder.CreateBitCast(
5342 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5343
5344 NewCall = Builder.CreateCall(NewFn, Args);
5345 break;
5346 }
5347
5348 case Intrinsic::thread_pointer: {
5349 NewCall = Builder.CreateCall(NewFn, {});
5350 break;
5351 }
5352
5353 case Intrinsic::memcpy:
5354 case Intrinsic::memmove:
5355 case Intrinsic::memset: {
5356 // We have to make sure that the call signature is what we're expecting.
5357 // We only want to change the old signatures by removing the alignment arg:
5358 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5359 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5360 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5361 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5362 // Note: i8*'s in the above can be any pointer type
5363 if (CI->arg_size() != 5) {
5364 DefaultCase();
5365 return;
5366 }
5367 // Remove alignment argument (3), and add alignment attributes to the
5368 // dest/src pointers.
5369 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5370 CI->getArgOperand(2), CI->getArgOperand(4)};
5371 NewCall = Builder.CreateCall(NewFn, Args);
5372 AttributeList OldAttrs = CI->getAttributes();
5373 AttributeList NewAttrs = AttributeList::get(
5374 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5375 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5376 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5377 NewCall->setAttributes(NewAttrs);
5378 auto *MemCI = cast<MemIntrinsic>(NewCall);
5379 // All mem intrinsics support dest alignment.
5381 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5382 // Memcpy/Memmove also support source alignment.
5383 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5384 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5385 break;
5386 }
5387
5388 case Intrinsic::masked_load:
5389 case Intrinsic::masked_gather:
5390 case Intrinsic::masked_store:
5391 case Intrinsic::masked_scatter: {
5392 if (CI->arg_size() != 4) {
5393 DefaultCase();
5394 return;
5395 }
5396
5397 auto GetMaybeAlign = [](Value *Op) {
5398 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5399 uint64_t Val = CI->getZExtValue();
5400 if (Val == 0)
5401 return MaybeAlign();
5402 if (isPowerOf2_64(Val))
5403 return MaybeAlign(Val);
5404 }
5405 reportFatalUsageError("Invalid alignment argument");
5406 };
5407 auto GetAlign = [&](Value *Op) {
5408 MaybeAlign Align = GetMaybeAlign(Op);
5409 if (Align)
5410 return *Align;
5411 reportFatalUsageError("Invalid zero alignment argument");
5412 };
5413
5414 const DataLayout &DL = CI->getDataLayout();
5415 switch (NewFn->getIntrinsicID()) {
5416 case Intrinsic::masked_load:
5417 NewCall = Builder.CreateMaskedLoad(
5418 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5419 CI->getArgOperand(2), CI->getArgOperand(3));
5420 break;
5421 case Intrinsic::masked_gather:
5422 NewCall = Builder.CreateMaskedGather(
5423 CI->getType(), CI->getArgOperand(0),
5424 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5425 CI->getType()->getScalarType()),
5426 CI->getArgOperand(2), CI->getArgOperand(3));
5427 break;
5428 case Intrinsic::masked_store:
5429 NewCall = Builder.CreateMaskedStore(
5430 CI->getArgOperand(0), CI->getArgOperand(1),
5431 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5432 break;
5433 case Intrinsic::masked_scatter:
5434 NewCall = Builder.CreateMaskedScatter(
5435 CI->getArgOperand(0), CI->getArgOperand(1),
5436 DL.getValueOrABITypeAlignment(
5437 GetMaybeAlign(CI->getArgOperand(2)),
5438 CI->getArgOperand(0)->getType()->getScalarType()),
5439 CI->getArgOperand(3));
5440 break;
5441 default:
5442 llvm_unreachable("Unexpected intrinsic ID");
5443 }
5444 // Previous metadata is still valid.
5445 NewCall->copyMetadata(*CI);
5446 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5447 break;
5448 }
5449
5450 case Intrinsic::lifetime_start:
5451 case Intrinsic::lifetime_end: {
5452 if (CI->arg_size() != 2) {
5453 DefaultCase();
5454 return;
5455 }
5456
5457 Value *Ptr = CI->getArgOperand(1);
5458 // Try to strip pointer casts, such that the lifetime works on an alloca.
5459 Ptr = Ptr->stripPointerCasts();
5460 if (isa<AllocaInst>(Ptr)) {
5461 // Don't use NewFn, as we might have looked through an addrspacecast.
5462 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5463 NewCall = Builder.CreateLifetimeStart(Ptr);
5464 else
5465 NewCall = Builder.CreateLifetimeEnd(Ptr);
5466 break;
5467 }
5468
5469 // Otherwise remove the lifetime marker.
5470 CI->eraseFromParent();
5471 return;
5472 }
5473
5474 case Intrinsic::x86_avx512_vpdpbusd_128:
5475 case Intrinsic::x86_avx512_vpdpbusd_256:
5476 case Intrinsic::x86_avx512_vpdpbusd_512:
5477 case Intrinsic::x86_avx512_vpdpbusds_128:
5478 case Intrinsic::x86_avx512_vpdpbusds_256:
5479 case Intrinsic::x86_avx512_vpdpbusds_512:
5480 case Intrinsic::x86_avx2_vpdpbssd_128:
5481 case Intrinsic::x86_avx2_vpdpbssd_256:
5482 case Intrinsic::x86_avx10_vpdpbssd_512:
5483 case Intrinsic::x86_avx2_vpdpbssds_128:
5484 case Intrinsic::x86_avx2_vpdpbssds_256:
5485 case Intrinsic::x86_avx10_vpdpbssds_512:
5486 case Intrinsic::x86_avx2_vpdpbsud_128:
5487 case Intrinsic::x86_avx2_vpdpbsud_256:
5488 case Intrinsic::x86_avx10_vpdpbsud_512:
5489 case Intrinsic::x86_avx2_vpdpbsuds_128:
5490 case Intrinsic::x86_avx2_vpdpbsuds_256:
5491 case Intrinsic::x86_avx10_vpdpbsuds_512:
5492 case Intrinsic::x86_avx2_vpdpbuud_128:
5493 case Intrinsic::x86_avx2_vpdpbuud_256:
5494 case Intrinsic::x86_avx10_vpdpbuud_512:
5495 case Intrinsic::x86_avx2_vpdpbuuds_128:
5496 case Intrinsic::x86_avx2_vpdpbuuds_256:
5497 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5498 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5499 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5500 CI->getArgOperand(2)};
5501 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5502 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5503 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5504
5505 NewCall = Builder.CreateCall(NewFn, Args);
5506 break;
5507 }
5508 case Intrinsic::x86_avx512_vpdpwssd_128:
5509 case Intrinsic::x86_avx512_vpdpwssd_256:
5510 case Intrinsic::x86_avx512_vpdpwssd_512:
5511 case Intrinsic::x86_avx512_vpdpwssds_128:
5512 case Intrinsic::x86_avx512_vpdpwssds_256:
5513 case Intrinsic::x86_avx512_vpdpwssds_512:
5514 case Intrinsic::x86_avx2_vpdpwsud_128:
5515 case Intrinsic::x86_avx2_vpdpwsud_256:
5516 case Intrinsic::x86_avx10_vpdpwsud_512:
5517 case Intrinsic::x86_avx2_vpdpwsuds_128:
5518 case Intrinsic::x86_avx2_vpdpwsuds_256:
5519 case Intrinsic::x86_avx10_vpdpwsuds_512:
5520 case Intrinsic::x86_avx2_vpdpwusd_128:
5521 case Intrinsic::x86_avx2_vpdpwusd_256:
5522 case Intrinsic::x86_avx10_vpdpwusd_512:
5523 case Intrinsic::x86_avx2_vpdpwusds_128:
5524 case Intrinsic::x86_avx2_vpdpwusds_256:
5525 case Intrinsic::x86_avx10_vpdpwusds_512:
5526 case Intrinsic::x86_avx2_vpdpwuud_128:
5527 case Intrinsic::x86_avx2_vpdpwuud_256:
5528 case Intrinsic::x86_avx10_vpdpwuud_512:
5529 case Intrinsic::x86_avx2_vpdpwuuds_128:
5530 case Intrinsic::x86_avx2_vpdpwuuds_256:
5531 case Intrinsic::x86_avx10_vpdpwuuds_512:
5532 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5533 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5534 CI->getArgOperand(2)};
5535 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5536 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5537 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5538
5539 NewCall = Builder.CreateCall(NewFn, Args);
5540 break;
5541 }
5542 assert(NewCall && "Should have either set this variable or returned through "
5543 "the default case");
5544 NewCall->takeName(CI);
5545 CI->replaceAllUsesWith(NewCall);
5546 CI->eraseFromParent();
5547}
5548
5550 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5551
5552 // Check if this function should be upgraded and get the replacement function
5553 // if there is one.
5554 Function *NewFn;
5555 if (UpgradeIntrinsicFunction(F, NewFn)) {
5556 // Replace all users of the old function with the new function or new
5557 // instructions. This is not a range loop because the call is deleted.
5558 for (User *U : make_early_inc_range(F->users()))
5559 if (CallBase *CB = dyn_cast<CallBase>(U))
5560 UpgradeIntrinsicCall(CB, NewFn);
5561
5562 // Remove old function, no longer used, from the module.
5563 if (F != NewFn)
5564 F->eraseFromParent();
5565 }
5566}
5567
5569 const unsigned NumOperands = MD.getNumOperands();
5570 if (NumOperands == 0)
5571 return &MD; // Invalid, punt to a verifier error.
5572
5573 // Check if the tag uses struct-path aware TBAA format.
5574 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5575 return &MD;
5576
5577 auto &Context = MD.getContext();
5578 if (NumOperands == 3) {
5579 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5580 MDNode *ScalarType = MDNode::get(Context, Elts);
5581 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5582 Metadata *Elts2[] = {ScalarType, ScalarType,
5585 MD.getOperand(2)};
5586 return MDNode::get(Context, Elts2);
5587 }
5588 // Create a MDNode <MD, MD, offset 0>
5590 Type::getInt64Ty(Context)))};
5591 return MDNode::get(Context, Elts);
5592}
5593
5595 Instruction *&Temp) {
5596 if (Opc != Instruction::BitCast)
5597 return nullptr;
5598
5599 Temp = nullptr;
5600 Type *SrcTy = V->getType();
5601 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5602 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5603 LLVMContext &Context = V->getContext();
5604
5605 // We have no information about target data layout, so we assume that
5606 // the maximum pointer size is 64bit.
5607 Type *MidTy = Type::getInt64Ty(Context);
5608 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5609
5610 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5611 }
5612
5613 return nullptr;
5614}
5615
5617 if (Opc != Instruction::BitCast)
5618 return nullptr;
5619
5620 Type *SrcTy = C->getType();
5621 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5622 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5623 LLVMContext &Context = C->getContext();
5624
5625 // We have no information about target data layout, so we assume that
5626 // the maximum pointer size is 64bit.
5627 Type *MidTy = Type::getInt64Ty(Context);
5628
5630 DestTy);
5631 }
5632
5633 return nullptr;
5634}
5635
5636/// Check the debug info version number, if it is out-dated, drop the debug
5637/// info. Return true if module is modified.
5640 return false;
5641
5642 llvm::TimeTraceScope timeScope("Upgrade debug info");
5643 // We need to get metadata before the module is verified (i.e., getModuleFlag
5644 // makes assumptions that we haven't verified yet). Carefully extract the flag
5645 // from the metadata.
5646 unsigned Version = 0;
5647 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5648 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5649 if (Flag->getNumOperands() < 3)
5650 return false;
5651 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5652 return K->getString() == "Debug Info Version";
5653 return false;
5654 });
5655 if (OpIt != ModFlags->op_end()) {
5656 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5657 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5658 Version = CI->getZExtValue();
5659 }
5660 }
5661
5663 bool BrokenDebugInfo = false;
5664 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5665 report_fatal_error("Broken module found, compilation aborted!");
5666 if (!BrokenDebugInfo)
5667 // Everything is ok.
5668 return false;
5669 else {
5670 // Diagnose malformed debug info.
5672 M.getContext().diagnose(Diag);
5673 }
5674 }
5675 bool Modified = StripDebugInfo(M);
5677 // Diagnose a version mismatch.
5679 M.getContext().diagnose(DiagVersion);
5680 }
5681 return Modified;
5682}
5683
5684static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5685 GlobalValue *GV, const Metadata *V) {
5686 Function *F = cast<Function>(GV);
5687
5688 constexpr StringLiteral DefaultValue = "1";
5689 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5690 unsigned Length = 0;
5691
5692 if (F->hasFnAttribute(Attr)) {
5693 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5694 // parse these elements placing them into Vect3
5695 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5696 for (; Length < 3 && !S.empty(); Length++) {
5697 auto [Part, Rest] = S.split(',');
5698 Vect3[Length] = Part.trim();
5699 S = Rest;
5700 }
5701 }
5702
5703 const unsigned Dim = DimC - 'x';
5704 assert(Dim < 3 && "Unexpected dim char");
5705
5706 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5707
5708 // local variable required for StringRef in Vect3 to point to.
5709 const std::string VStr = llvm::utostr(VInt);
5710 Vect3[Dim] = VStr;
5711 Length = std::max(Length, Dim + 1);
5712
5713 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5714 F->addFnAttr(Attr, NewAttr);
5715}
5716
5717static inline bool isXYZ(StringRef S) {
5718 return S == "x" || S == "y" || S == "z";
5719}
5720
5722 const Metadata *V) {
5723 if (K == "kernel") {
5725 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5726 return true;
5727 }
5728 if (K == "align") {
5729 // V is a bitfeild specifying two 16-bit values. The alignment value is
5730 // specfied in low 16-bits, The index is specified in the high bits. For the
5731 // index, 0 indicates the return value while higher values correspond to
5732 // each parameter (idx = param + 1).
5733 const uint64_t AlignIdxValuePair =
5734 mdconst::extract<ConstantInt>(V)->getZExtValue();
5735 const unsigned Idx = (AlignIdxValuePair >> 16);
5736 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5737 cast<Function>(GV)->addAttributeAtIndex(
5738 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5739 return true;
5740 }
5741 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5742 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5743 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5744 return true;
5745 }
5746 if (K == "minctasm") {
5747 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5748 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5749 return true;
5750 }
5751 if (K == "maxnreg") {
5752 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5753 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5754 return true;
5755 }
5756 if (K.consume_front("maxntid") && isXYZ(K)) {
5757 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5758 return true;
5759 }
5760 if (K.consume_front("reqntid") && isXYZ(K)) {
5761 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5762 return true;
5763 }
5764 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5765 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5766 return true;
5767 }
5768 if (K == "grid_constant") {
5769 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5770 for (const auto &Op : cast<MDNode>(V)->operands()) {
5771 // For some reason, the index is 1-based in the metadata. Good thing we're
5772 // able to auto-upgrade it!
5773 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5774 cast<Function>(GV)->addParamAttr(Index, Attr);
5775 }
5776 return true;
5777 }
5778
5779 return false;
5780}
5781
5783 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5784 if (!NamedMD)
5785 return;
5786
5787 SmallVector<MDNode *, 8> NewNodes;
5789 for (MDNode *MD : NamedMD->operands()) {
5790 if (!SeenNodes.insert(MD).second)
5791 continue;
5792
5793 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5794 if (!GV)
5795 continue;
5796
5797 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5798
5799 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5800 // Each nvvm.annotations metadata entry will be of the following form:
5801 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5802 // start index = 1, to skip the global variable key
5803 // increment = 2, to skip the value for each property-value pairs
5804 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5805 MDString *K = cast<MDString>(MD->getOperand(j));
5806 const MDOperand &V = MD->getOperand(j + 1);
5807 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5808 if (!Upgraded)
5809 NewOperands.append({K, V});
5810 }
5811
5812 if (NewOperands.size() > 1)
5813 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5814 }
5815
5816 NamedMD->clearOperands();
5817 for (MDNode *N : NewNodes)
5818 NamedMD->addOperand(N);
5819}
5820
5821/// This checks for objc retain release marker which should be upgraded. It
5822/// returns true if module is modified.
5824 bool Changed = false;
5825 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5826 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5827 if (ModRetainReleaseMarker) {
5828 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5829 if (Op) {
5830 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5831 if (ID) {
5832 SmallVector<StringRef, 4> ValueComp;
5833 ID->getString().split(ValueComp, "#");
5834 if (ValueComp.size() == 2) {
5835 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5836 ID = MDString::get(M.getContext(), NewValue);
5837 }
5838 M.addModuleFlag(Module::Error, MarkerKey, ID);
5839 M.eraseNamedMetadata(ModRetainReleaseMarker);
5840 Changed = true;
5841 }
5842 }
5843 }
5844 return Changed;
5845}
5846
5848 // This lambda converts normal function calls to ARC runtime functions to
5849 // intrinsic calls.
5850 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5851 llvm::Intrinsic::ID IntrinsicFunc) {
5852 Function *Fn = M.getFunction(OldFunc);
5853
5854 if (!Fn)
5855 return;
5856
5857 Function *NewFn =
5858 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5859
5860 for (User *U : make_early_inc_range(Fn->users())) {
5862 if (!CI || CI->getCalledFunction() != Fn)
5863 continue;
5864
5865 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5866 FunctionType *NewFuncTy = NewFn->getFunctionType();
5868
5869 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5870 // value to the return type of the old function.
5871 if (NewFuncTy->getReturnType() != CI->getType() &&
5872 !CastInst::castIsValid(Instruction::BitCast, CI,
5873 NewFuncTy->getReturnType()))
5874 continue;
5875
5876 bool InvalidCast = false;
5877
5878 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5879 Value *Arg = CI->getArgOperand(I);
5880
5881 // Bitcast argument to the parameter type of the new function if it's
5882 // not a variadic argument.
5883 if (I < NewFuncTy->getNumParams()) {
5884 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5885 // to the parameter type of the new function.
5886 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5887 NewFuncTy->getParamType(I))) {
5888 InvalidCast = true;
5889 break;
5890 }
5891 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5892 }
5893 Args.push_back(Arg);
5894 }
5895
5896 if (InvalidCast)
5897 continue;
5898
5899 // Create a call instruction that calls the new function.
5900 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5901 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5902 NewCall->takeName(CI);
5903
5904 // Bitcast the return value back to the type of the old call.
5905 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5906
5907 if (!CI->use_empty())
5908 CI->replaceAllUsesWith(NewRetVal);
5909 CI->eraseFromParent();
5910 }
5911
5912 if (Fn->use_empty())
5913 Fn->eraseFromParent();
5914 };
5915
5916 // Unconditionally convert a call to "clang.arc.use" to a call to
5917 // "llvm.objc.clang.arc.use".
5918 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5919
5920 // Upgrade the retain release marker. If there is no need to upgrade
5921 // the marker, that means either the module is already new enough to contain
5922 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5924 return;
5925
5926 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5927 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5928 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5929 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5930 {"objc_autoreleaseReturnValue",
5931 llvm::Intrinsic::objc_autoreleaseReturnValue},
5932 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5933 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5934 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5935 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5936 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5937 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5938 {"objc_release", llvm::Intrinsic::objc_release},
5939 {"objc_retain", llvm::Intrinsic::objc_retain},
5940 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5941 {"objc_retainAutoreleaseReturnValue",
5942 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5943 {"objc_retainAutoreleasedReturnValue",
5944 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5945 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5946 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5947 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5948 {"objc_unsafeClaimAutoreleasedReturnValue",
5949 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5950 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5951 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5952 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5953 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5954 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5955 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5956 {"objc_arc_annotation_topdown_bbstart",
5957 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5958 {"objc_arc_annotation_topdown_bbend",
5959 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5960 {"objc_arc_annotation_bottomup_bbstart",
5961 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5962 {"objc_arc_annotation_bottomup_bbend",
5963 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5964
5965 for (auto &I : RuntimeFuncs)
5966 UpgradeToIntrinsic(I.first, I.second);
5967}
5968
5970 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5971 if (!ModFlags)
5972 return false;
5973
5974 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5975 bool HasSwiftVersionFlag = false;
5976 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5977 uint32_t SwiftABIVersion;
5978 auto Int8Ty = Type::getInt8Ty(M.getContext());
5979 auto Int32Ty = Type::getInt32Ty(M.getContext());
5980
5981 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5982 MDNode *Op = ModFlags->getOperand(I);
5983 if (Op->getNumOperands() != 3)
5984 continue;
5985 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5986 if (!ID)
5987 continue;
5988 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5989 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5990 Type::getInt32Ty(M.getContext()), B)),
5991 MDString::get(M.getContext(), ID->getString()),
5992 Op->getOperand(2)};
5993 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5994 Changed = true;
5995 };
5996
5997 if (ID->getString() == "Objective-C Image Info Version")
5998 HasObjCFlag = true;
5999 if (ID->getString() == "Objective-C Class Properties")
6000 HasClassProperties = true;
6001 // Upgrade PIC from Error/Max to Min.
6002 if (ID->getString() == "PIC Level") {
6003 if (auto *Behavior =
6005 uint64_t V = Behavior->getLimitedValue();
6006 if (V == Module::Error || V == Module::Max)
6007 SetBehavior(Module::Min);
6008 }
6009 }
6010 // Upgrade "PIE Level" from Error to Max.
6011 if (ID->getString() == "PIE Level")
6012 if (auto *Behavior =
6014 if (Behavior->getLimitedValue() == Module::Error)
6015 SetBehavior(Module::Max);
6016
6017 // Upgrade branch protection and return address signing module flags. The
6018 // module flag behavior for these fields were Error and now they are Min.
6019 if (ID->getString() == "branch-target-enforcement" ||
6020 ID->getString().starts_with("sign-return-address")) {
6021 if (auto *Behavior =
6023 if (Behavior->getLimitedValue() == Module::Error) {
6024 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6025 Metadata *Ops[3] = {
6026 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6027 Op->getOperand(1), Op->getOperand(2)};
6028 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6029 Changed = true;
6030 }
6031 }
6032 }
6033
6034 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6035 // section name so that llvm-lto will not complain about mismatching
6036 // module flags that is functionally the same.
6037 if (ID->getString() == "Objective-C Image Info Section") {
6038 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6039 SmallVector<StringRef, 4> ValueComp;
6040 Value->getString().split(ValueComp, " ");
6041 if (ValueComp.size() != 1) {
6042 std::string NewValue;
6043 for (auto &S : ValueComp)
6044 NewValue += S.str();
6045 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6046 MDString::get(M.getContext(), NewValue)};
6047 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6048 Changed = true;
6049 }
6050 }
6051 }
6052
6053 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6054 // If the higher bits are set, it adds new module flag for swift info.
6055 if (ID->getString() == "Objective-C Garbage Collection") {
6056 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6057 if (Md) {
6058 assert(Md->getValue() && "Expected non-empty metadata");
6059 auto Type = Md->getValue()->getType();
6060 if (Type == Int8Ty)
6061 continue;
6062 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6063 if ((Val & 0xff) != Val) {
6064 HasSwiftVersionFlag = true;
6065 SwiftABIVersion = (Val & 0xff00) >> 8;
6066 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6067 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6068 }
6069 Metadata *Ops[3] = {
6071 Op->getOperand(1),
6072 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6073 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6074 Changed = true;
6075 }
6076 }
6077
6078 if (ID->getString() == "amdgpu_code_object_version") {
6079 Metadata *Ops[3] = {
6080 Op->getOperand(0),
6081 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6082 Op->getOperand(2)};
6083 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6084 Changed = true;
6085 }
6086 }
6087
6088 // "Objective-C Class Properties" is recently added for Objective-C. We
6089 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6090 // flag of value 0, so we can correclty downgrade this flag when trying to
6091 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6092 // this module flag.
6093 if (HasObjCFlag && !HasClassProperties) {
6094 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6095 (uint32_t)0);
6096 Changed = true;
6097 }
6098
6099 if (HasSwiftVersionFlag) {
6100 M.addModuleFlag(Module::Error, "Swift ABI Version",
6101 SwiftABIVersion);
6102 M.addModuleFlag(Module::Error, "Swift Major Version",
6103 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6104 M.addModuleFlag(Module::Error, "Swift Minor Version",
6105 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6106 Changed = true;
6107 }
6108
6109 return Changed;
6110}
6111
6113 auto TrimSpaces = [](StringRef Section) -> std::string {
6114 SmallVector<StringRef, 5> Components;
6115 Section.split(Components, ',');
6116
6117 SmallString<32> Buffer;
6118 raw_svector_ostream OS(Buffer);
6119
6120 for (auto Component : Components)
6121 OS << ',' << Component.trim();
6122
6123 return std::string(OS.str().substr(1));
6124 };
6125
6126 for (auto &GV : M.globals()) {
6127 if (!GV.hasSection())
6128 continue;
6129
6130 StringRef Section = GV.getSection();
6131
6132 if (!Section.starts_with("__DATA, __objc_catlist"))
6133 continue;
6134
6135 // __DATA, __objc_catlist, regular, no_dead_strip
6136 // __DATA,__objc_catlist,regular,no_dead_strip
6137 GV.setSection(TrimSpaces(Section));
6138 }
6139}
6140
6141namespace {
6142// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6143// callsites within a function that did not also have the strictfp attribute.
6144// Since 10.0, if strict FP semantics are needed within a function, the
6145// function must have the strictfp attribute and all calls within the function
6146// must also have the strictfp attribute. This latter restriction is
6147// necessary to prevent unwanted libcall simplification when a function is
6148// being cloned (such as for inlining).
6149//
6150// The "dangling" strictfp attribute usage was only used to prevent constant
6151// folding and other libcall simplification. The nobuiltin attribute on the
6152// callsite has the same effect.
6153struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6154 StrictFPUpgradeVisitor() = default;
6155
6156 void visitCallBase(CallBase &Call) {
6157 if (!Call.isStrictFP())
6158 return;
6160 return;
6161 // If we get here, the caller doesn't have the strictfp attribute
6162 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6163 Call.removeFnAttr(Attribute::StrictFP);
6164 Call.addFnAttr(Attribute::NoBuiltin);
6165 }
6166};
6167
6168/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6169struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6170 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6171 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6172
6173 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6174 if (!RMW.isFloatingPointOperation())
6175 return;
6176
6177 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6178 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6179 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6180 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6181 }
6182};
6183} // namespace
6184
6186 // If a function definition doesn't have the strictfp attribute,
6187 // convert any callsite strictfp attributes to nobuiltin.
6188 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6189 StrictFPUpgradeVisitor SFPV;
6190 SFPV.visit(F);
6191 }
6192
6193 // Remove all incompatibile attributes from function.
6194 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6195 F.getReturnType(), F.getAttributes().getRetAttrs()));
6196 for (auto &Arg : F.args())
6197 Arg.removeAttrs(
6198 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6199
6200 // Older versions of LLVM treated an "implicit-section-name" attribute
6201 // similarly to directly setting the section on a Function.
6202 if (Attribute A = F.getFnAttribute("implicit-section-name");
6203 A.isValid() && A.isStringAttribute()) {
6204 F.setSection(A.getValueAsString());
6205 F.removeFnAttr("implicit-section-name");
6206 }
6207
6208 if (!F.empty()) {
6209 // For some reason this is called twice, and the first time is before any
6210 // instructions are loaded into the body.
6211
6212 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6213 A.isValid()) {
6214
6215 if (A.getValueAsBool()) {
6216 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6217 Visitor.visit(F);
6218 }
6219
6220 // We will leave behind dead attribute uses on external declarations, but
6221 // clang never added these to declarations anyway.
6222 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6223 }
6224 }
6225}
6226
6227// Check if the function attribute is not present and set it.
6229 StringRef Value) {
6230 if (!F.hasFnAttribute(FnAttrName))
6231 F.addFnAttr(FnAttrName, Value);
6232}
6233
6234// Check if the function attribute is not present and set it if needed.
6235// If the attribute is "false" then removes it.
6236// If the attribute is "true" resets it to a valueless attribute.
6237static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6238 if (!F.hasFnAttribute(FnAttrName)) {
6239 if (Set)
6240 F.addFnAttr(FnAttrName);
6241 } else {
6242 auto A = F.getFnAttribute(FnAttrName);
6243 if ("false" == A.getValueAsString())
6244 F.removeFnAttr(FnAttrName);
6245 else if ("true" == A.getValueAsString()) {
6246 F.removeFnAttr(FnAttrName);
6247 F.addFnAttr(FnAttrName);
6248 }
6249 }
6250}
6251
6253 Triple T(M.getTargetTriple());
6254 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6255 return;
6256
6257 uint64_t BTEValue = 0;
6258 uint64_t BPPLRValue = 0;
6259 uint64_t GCSValue = 0;
6260 uint64_t SRAValue = 0;
6261 uint64_t SRAALLValue = 0;
6262 uint64_t SRABKeyValue = 0;
6263
6264 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6265 if (ModFlags) {
6266 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6267 MDNode *Op = ModFlags->getOperand(I);
6268 if (Op->getNumOperands() != 3)
6269 continue;
6270
6271 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6272 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6273 if (!ID || !CI)
6274 continue;
6275
6276 StringRef IDStr = ID->getString();
6277 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6278 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6279 : IDStr == "guarded-control-stack" ? &GCSValue
6280 : IDStr == "sign-return-address" ? &SRAValue
6281 : IDStr == "sign-return-address-all" ? &SRAALLValue
6282 : IDStr == "sign-return-address-with-bkey"
6283 ? &SRABKeyValue
6284 : nullptr;
6285 if (!ValPtr)
6286 continue;
6287
6288 *ValPtr = CI->getZExtValue();
6289 if (*ValPtr == 2)
6290 return;
6291 }
6292 }
6293
6294 bool BTE = BTEValue == 1;
6295 bool BPPLR = BPPLRValue == 1;
6296 bool GCS = GCSValue == 1;
6297 bool SRA = SRAValue == 1;
6298
6299 StringRef SignTypeValue = "non-leaf";
6300 if (SRA && SRAALLValue == 1)
6301 SignTypeValue = "all";
6302
6303 StringRef SignKeyValue = "a_key";
6304 if (SRA && SRABKeyValue == 1)
6305 SignKeyValue = "b_key";
6306
6307 for (Function &F : M.getFunctionList()) {
6308 if (F.isDeclaration())
6309 continue;
6310
6311 if (SRA) {
6312 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6313 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6314 } else {
6315 if (auto A = F.getFnAttribute("sign-return-address");
6316 A.isValid() && "none" == A.getValueAsString()) {
6317 F.removeFnAttr("sign-return-address");
6318 F.removeFnAttr("sign-return-address-key");
6319 }
6320 }
6321 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6322 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6323 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6324 }
6325
6326 if (BTE)
6327 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6328 if (BPPLR)
6329 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6330 if (GCS)
6331 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6332 if (SRA) {
6333 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6334 if (SRAALLValue == 1)
6335 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6336 if (SRABKeyValue == 1)
6337 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6338 }
6339}
6340
6341static bool isOldLoopArgument(Metadata *MD) {
6342 auto *T = dyn_cast_or_null<MDTuple>(MD);
6343 if (!T)
6344 return false;
6345 if (T->getNumOperands() < 1)
6346 return false;
6347 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6348 if (!S)
6349 return false;
6350 return S->getString().starts_with("llvm.vectorizer.");
6351}
6352
6354 StringRef OldPrefix = "llvm.vectorizer.";
6355 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6356
6357 if (OldTag == "llvm.vectorizer.unroll")
6358 return MDString::get(C, "llvm.loop.interleave.count");
6359
6360 return MDString::get(
6361 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6362 .str());
6363}
6364
6366 auto *T = dyn_cast_or_null<MDTuple>(MD);
6367 if (!T)
6368 return MD;
6369 if (T->getNumOperands() < 1)
6370 return MD;
6371 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6372 if (!OldTag)
6373 return MD;
6374 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6375 return MD;
6376
6377 // This has an old tag. Upgrade it.
6379 Ops.reserve(T->getNumOperands());
6380 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6381 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6382 Ops.push_back(T->getOperand(I));
6383
6384 return MDTuple::get(T->getContext(), Ops);
6385}
6386
6388 auto *T = dyn_cast<MDTuple>(&N);
6389 if (!T)
6390 return &N;
6391
6392 if (none_of(T->operands(), isOldLoopArgument))
6393 return &N;
6394
6396 Ops.reserve(T->getNumOperands());
6397 for (Metadata *MD : T->operands())
6398 Ops.push_back(upgradeLoopArgument(MD));
6399
6400 return MDTuple::get(T->getContext(), Ops);
6401}
6402
6404 Triple T(TT);
6405 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6406 // the address space of globals to 1. This does not apply to SPIRV Logical.
6407 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6408 !DL.contains("-G") && !DL.starts_with("G")) {
6409 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6410 }
6411
6412 if (T.isLoongArch64() || T.isRISCV64()) {
6413 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6414 auto I = DL.find("-n64-");
6415 if (I != StringRef::npos)
6416 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6417 return DL.str();
6418 }
6419
6420 // AMDGPU data layout upgrades.
6421 std::string Res = DL.str();
6422 if (T.isAMDGPU()) {
6423 // Define address spaces for constants.
6424 if (!DL.contains("-G") && !DL.starts_with("G"))
6425 Res.append(Res.empty() ? "G1" : "-G1");
6426
6427 // AMDGCN data layout upgrades.
6428 if (T.isAMDGCN()) {
6429
6430 // Add missing non-integral declarations.
6431 // This goes before adding new address spaces to prevent incoherent string
6432 // values.
6433 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6434 Res.append("-ni:7:8:9");
6435 // Update ni:7 to ni:7:8:9.
6436 if (DL.ends_with("ni:7"))
6437 Res.append(":8:9");
6438 if (DL.ends_with("ni:7:8"))
6439 Res.append(":9");
6440
6441 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6442 // resources) An empty data layout has already been upgraded to G1 by now.
6443 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6444 Res.append("-p7:160:256:256:32");
6445 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6446 Res.append("-p8:128:128:128:48");
6447 constexpr StringRef OldP8("-p8:128:128-");
6448 if (DL.contains(OldP8))
6449 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6450 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6451 Res.append("-p9:192:256:256:32");
6452 }
6453
6454 // Upgrade the ELF mangling mode.
6455 if (!DL.contains("m:e"))
6456 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6457
6458 return Res;
6459 }
6460
6461 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6462 // If the datalayout matches the expected format, add pointer size address
6463 // spaces to the datalayout.
6464 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6465 if (!DL.contains(AddrSpaces)) {
6467 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6468 if (R.match(Res, &Groups))
6469 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6470 }
6471 };
6472
6473 // AArch64 data layout upgrades.
6474 if (T.isAArch64()) {
6475 // Add "-Fn32"
6476 if (!DL.empty() && !DL.contains("-Fn32"))
6477 Res.append("-Fn32");
6478 AddPtr32Ptr64AddrSpaces();
6479 return Res;
6480 }
6481
6482 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6483 T.isWasm()) {
6484 // Mips64 with o32 ABI did not add "-i128:128".
6485 // Add "-i128:128"
6486 std::string I64 = "-i64:64";
6487 std::string I128 = "-i128:128";
6488 if (!StringRef(Res).contains(I128)) {
6489 size_t Pos = Res.find(I64);
6490 if (Pos != size_t(-1))
6491 Res.insert(Pos + I64.size(), I128);
6492 }
6493 }
6494
6495 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6496 size_t Pos = Res.find("-S128");
6497 if (Pos == StringRef::npos)
6498 Pos = Res.size();
6499 Res.insert(Pos, "-f64:32:64");
6500 }
6501
6502 if (!T.isX86())
6503 return Res;
6504
6505 AddPtr32Ptr64AddrSpaces();
6506
6507 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6508 // for i128 operations prior to this being reflected in the data layout, and
6509 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6510 // boundaries, so although this is a breaking change, the upgrade is expected
6511 // to fix more IR than it breaks.
6512 // Intel MCU is an exception and uses 4-byte-alignment.
6513 if (!T.isOSIAMCU()) {
6514 std::string I128 = "-i128:128";
6515 if (StringRef Ref = Res; !Ref.contains(I128)) {
6517 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6518 if (R.match(Res, &Groups))
6519 Res = (Groups[1] + I128 + Groups[3]).str();
6520 }
6521 }
6522
6523 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6524 // Raising the alignment is safe because Clang did not produce f80 values in
6525 // the MSVC environment before this upgrade was added.
6526 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6527 StringRef Ref = Res;
6528 auto I = Ref.find("-f80:32-");
6529 if (I != StringRef::npos)
6530 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6531 }
6532
6533 return Res;
6534}
6535
6536void llvm::UpgradeAttributes(AttrBuilder &B) {
6537 StringRef FramePointer;
6538 Attribute A = B.getAttribute("no-frame-pointer-elim");
6539 if (A.isValid()) {
6540 // The value can be "true" or "false".
6541 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6542 B.removeAttribute("no-frame-pointer-elim");
6543 }
6544 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6545 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6546 if (FramePointer != "all")
6547 FramePointer = "non-leaf";
6548 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6549 }
6550 if (!FramePointer.empty())
6551 B.addAttribute("frame-pointer", FramePointer);
6552
6553 A = B.getAttribute("null-pointer-is-valid");
6554 if (A.isValid()) {
6555 // The value can be "true" or "false".
6556 bool NullPointerIsValid = A.getValueAsString() == "true";
6557 B.removeAttribute("null-pointer-is-valid");
6558 if (NullPointerIsValid)
6559 B.addAttribute(Attribute::NullPointerIsValid);
6560 }
6561}
6562
6563void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6564 // clang.arc.attachedcall bundles are now required to have an operand.
6565 // If they don't, it's okay to drop them entirely: when there is an operand,
6566 // the "attachedcall" is meaningful and required, but without an operand,
6567 // it's just a marker NOP. Dropping it merely prevents an optimization.
6568 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6569 return OBD.getTag() == "clang.arc.attachedcall" &&
6570 OBD.inputs().empty();
6571 });
6572}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:222
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:171
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:826
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
constexpr bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1779
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2141
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106