LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
51#include "llvm/Support/Regex.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
67// changed their type from v4f32 to v2i64.
69 Function *&NewFn) {
70 // Check whether this is an old version of the function, which received
71 // v4f32 arguments.
72 Type *Arg0Type = F->getFunctionType()->getParamType(0);
73 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
74 return false;
75
76 // Yes, it's old, replace it with new version.
77 rename(F);
78 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
79 return true;
80}
81
82// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
83// arguments have changed their type from i32 to i8.
85 Function *&NewFn) {
86 // Check that the last argument is an i32.
87 Type *LastArgType = F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
89 if (!LastArgType->isIntegerTy(32))
90 return false;
91
92 // Move this function aside and map down.
93 rename(F);
94 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
95 return true;
96}
97
98// Upgrade the declaration of fp compare intrinsics that change return type
99// from scalar to vXi1 mask.
101 Function *&NewFn) {
102 // Check if the return type is a vector.
103 if (F->getReturnType()->isVectorTy())
104 return false;
105
106 rename(F);
107 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
108 return true;
109}
110
111// Upgrade the declaration of multiply and add bytes intrinsics whose input
112// arguments' types have changed from vectors of i32 to vectors of i8
114 Function *&NewFn) {
115 // check if input argument type is a vector of i8
116 Type *Arg1Type = F->getFunctionType()->getParamType(1);
117 Type *Arg2Type = F->getFunctionType()->getParamType(2);
118 if (Arg1Type->isVectorTy() &&
119 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
120 Arg2Type->isVectorTy() &&
121 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127}
128
129// Upgrade the declaration of multipy and add words intrinsics whose input
130// arguments' types have changed to vectors of i32 to vectors of i16
132 Function *&NewFn) {
133 // check if input argument type is a vector of i16
134 Type *Arg1Type = F->getFunctionType()->getParamType(1);
135 Type *Arg2Type = F->getFunctionType()->getParamType(2);
136 if (Arg1Type->isVectorTy() &&
137 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
138 Arg2Type->isVectorTy() &&
139 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
140 return false;
141
142 rename(F);
143 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
144 return true;
145}
146
148 Function *&NewFn) {
149 if (F->getReturnType()->getScalarType()->isBFloatTy())
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 // All of the intrinsics matches below should be marked with which llvm
169 // version started autoupgrading them. At some point in the future we would
170 // like to use this information to remove upgrade code for some older
171 // intrinsics. It is currently undecided how we will determine that future
172 // point.
173 if (Name.consume_front("avx."))
174 return (Name.starts_with("blend.p") || // Added in 3.7
175 Name == "cvt.ps2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.pd.256" || // Added in 3.9
177 Name == "cvtdq2.ps.256" || // Added in 7.0
178 Name.starts_with("movnt.") || // Added in 3.2
179 Name.starts_with("sqrt.p") || // Added in 7.0
180 Name.starts_with("storeu.") || // Added in 3.9
181 Name.starts_with("vbroadcast.s") || // Added in 3.5
182 Name.starts_with("vbroadcastf128") || // Added in 4.0
183 Name.starts_with("vextractf128.") || // Added in 3.7
184 Name.starts_with("vinsertf128.") || // Added in 3.7
185 Name.starts_with("vperm2f128.") || // Added in 6.0
186 Name.starts_with("vpermil.")); // Added in 3.1
187
188 if (Name.consume_front("avx2."))
189 return (Name == "movntdqa" || // Added in 5.0
190 Name.starts_with("pabs.") || // Added in 6.0
191 Name.starts_with("padds.") || // Added in 8.0
192 Name.starts_with("paddus.") || // Added in 8.0
193 Name.starts_with("pblendd.") || // Added in 3.7
194 Name == "pblendw" || // Added in 3.7
195 Name.starts_with("pbroadcast") || // Added in 3.8
196 Name.starts_with("pcmpeq.") || // Added in 3.1
197 Name.starts_with("pcmpgt.") || // Added in 3.1
198 Name.starts_with("pmax") || // Added in 3.9
199 Name.starts_with("pmin") || // Added in 3.9
200 Name.starts_with("pmovsx") || // Added in 3.9
201 Name.starts_with("pmovzx") || // Added in 3.9
202 Name == "pmul.dq" || // Added in 7.0
203 Name == "pmulu.dq" || // Added in 7.0
204 Name.starts_with("psll.dq") || // Added in 3.7
205 Name.starts_with("psrl.dq") || // Added in 3.7
206 Name.starts_with("psubs.") || // Added in 8.0
207 Name.starts_with("psubus.") || // Added in 8.0
208 Name.starts_with("vbroadcast") || // Added in 3.8
209 Name == "vbroadcasti128" || // Added in 3.7
210 Name == "vextracti128" || // Added in 3.7
211 Name == "vinserti128" || // Added in 3.7
212 Name == "vperm2i128"); // Added in 6.0
213
214 if (Name.consume_front("avx512.")) {
215 if (Name.consume_front("mask."))
216 // 'avx512.mask.*'
217 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
218 Name.starts_with("and.") || // Added in 3.9
219 Name.starts_with("andn.") || // Added in 3.9
220 Name.starts_with("broadcast.s") || // Added in 3.9
221 Name.starts_with("broadcastf32x4.") || // Added in 6.0
222 Name.starts_with("broadcastf32x8.") || // Added in 6.0
223 Name.starts_with("broadcastf64x2.") || // Added in 6.0
224 Name.starts_with("broadcastf64x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x4.") || // Added in 6.0
226 Name.starts_with("broadcasti32x8.") || // Added in 6.0
227 Name.starts_with("broadcasti64x2.") || // Added in 6.0
228 Name.starts_with("broadcasti64x4.") || // Added in 6.0
229 Name.starts_with("cmp.b") || // Added in 5.0
230 Name.starts_with("cmp.d") || // Added in 5.0
231 Name.starts_with("cmp.q") || // Added in 5.0
232 Name.starts_with("cmp.w") || // Added in 5.0
233 Name.starts_with("compress.b") || // Added in 9.0
234 Name.starts_with("compress.d") || // Added in 9.0
235 Name.starts_with("compress.p") || // Added in 9.0
236 Name.starts_with("compress.q") || // Added in 9.0
237 Name.starts_with("compress.store.") || // Added in 7.0
238 Name.starts_with("compress.w") || // Added in 9.0
239 Name.starts_with("conflict.") || // Added in 9.0
240 Name.starts_with("cvtdq2pd.") || // Added in 4.0
241 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
242 Name == "cvtpd2dq.256" || // Added in 7.0
243 Name == "cvtpd2ps.256" || // Added in 7.0
244 Name == "cvtps2pd.128" || // Added in 7.0
245 Name == "cvtps2pd.256" || // Added in 7.0
246 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
247 Name == "cvtqq2ps.256" || // Added in 9.0
248 Name == "cvtqq2ps.512" || // Added in 9.0
249 Name == "cvttpd2dq.256" || // Added in 7.0
250 Name == "cvttps2dq.128" || // Added in 7.0
251 Name == "cvttps2dq.256" || // Added in 7.0
252 Name.starts_with("cvtudq2pd.") || // Added in 4.0
253 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
254 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name == "cvtuqq2ps.256" || // Added in 9.0
256 Name == "cvtuqq2ps.512" || // Added in 9.0
257 Name.starts_with("dbpsadbw.") || // Added in 7.0
258 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
259 Name.starts_with("expand.b") || // Added in 9.0
260 Name.starts_with("expand.d") || // Added in 9.0
261 Name.starts_with("expand.load.") || // Added in 7.0
262 Name.starts_with("expand.p") || // Added in 9.0
263 Name.starts_with("expand.q") || // Added in 9.0
264 Name.starts_with("expand.w") || // Added in 9.0
265 Name.starts_with("fpclass.p") || // Added in 7.0
266 Name.starts_with("insert") || // Added in 4.0
267 Name.starts_with("load.") || // Added in 3.9
268 Name.starts_with("loadu.") || // Added in 3.9
269 Name.starts_with("lzcnt.") || // Added in 5.0
270 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
272 Name.starts_with("movddup") || // Added in 3.9
273 Name.starts_with("move.s") || // Added in 4.0
274 Name.starts_with("movshdup") || // Added in 3.9
275 Name.starts_with("movsldup") || // Added in 3.9
276 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
277 Name.starts_with("or.") || // Added in 3.9
278 Name.starts_with("pabs.") || // Added in 6.0
279 Name.starts_with("packssdw.") || // Added in 5.0
280 Name.starts_with("packsswb.") || // Added in 5.0
281 Name.starts_with("packusdw.") || // Added in 5.0
282 Name.starts_with("packuswb.") || // Added in 5.0
283 Name.starts_with("padd.") || // Added in 4.0
284 Name.starts_with("padds.") || // Added in 8.0
285 Name.starts_with("paddus.") || // Added in 8.0
286 Name.starts_with("palignr.") || // Added in 3.9
287 Name.starts_with("pand.") || // Added in 3.9
288 Name.starts_with("pandn.") || // Added in 3.9
289 Name.starts_with("pavg") || // Added in 6.0
290 Name.starts_with("pbroadcast") || // Added in 6.0
291 Name.starts_with("pcmpeq.") || // Added in 3.9
292 Name.starts_with("pcmpgt.") || // Added in 3.9
293 Name.starts_with("perm.df.") || // Added in 3.9
294 Name.starts_with("perm.di.") || // Added in 3.9
295 Name.starts_with("permvar.") || // Added in 7.0
296 Name.starts_with("pmaddubs.w.") || // Added in 7.0
297 Name.starts_with("pmaddw.d.") || // Added in 7.0
298 Name.starts_with("pmax") || // Added in 4.0
299 Name.starts_with("pmin") || // Added in 4.0
300 Name == "pmov.qd.256" || // Added in 9.0
301 Name == "pmov.qd.512" || // Added in 9.0
302 Name == "pmov.wb.256" || // Added in 9.0
303 Name == "pmov.wb.512" || // Added in 9.0
304 Name.starts_with("pmovsx") || // Added in 4.0
305 Name.starts_with("pmovzx") || // Added in 4.0
306 Name.starts_with("pmul.dq.") || // Added in 4.0
307 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
308 Name.starts_with("pmulh.w.") || // Added in 7.0
309 Name.starts_with("pmulhu.w.") || // Added in 7.0
310 Name.starts_with("pmull.") || // Added in 4.0
311 Name.starts_with("pmultishift.qb.") || // Added in 8.0
312 Name.starts_with("pmulu.dq.") || // Added in 4.0
313 Name.starts_with("por.") || // Added in 3.9
314 Name.starts_with("prol.") || // Added in 8.0
315 Name.starts_with("prolv.") || // Added in 8.0
316 Name.starts_with("pror.") || // Added in 8.0
317 Name.starts_with("prorv.") || // Added in 8.0
318 Name.starts_with("pshuf.b.") || // Added in 4.0
319 Name.starts_with("pshuf.d.") || // Added in 3.9
320 Name.starts_with("pshufh.w.") || // Added in 3.9
321 Name.starts_with("pshufl.w.") || // Added in 3.9
322 Name.starts_with("psll.d") || // Added in 4.0
323 Name.starts_with("psll.q") || // Added in 4.0
324 Name.starts_with("psll.w") || // Added in 4.0
325 Name.starts_with("pslli") || // Added in 4.0
326 Name.starts_with("psllv") || // Added in 4.0
327 Name.starts_with("psra.d") || // Added in 4.0
328 Name.starts_with("psra.q") || // Added in 4.0
329 Name.starts_with("psra.w") || // Added in 4.0
330 Name.starts_with("psrai") || // Added in 4.0
331 Name.starts_with("psrav") || // Added in 4.0
332 Name.starts_with("psrl.d") || // Added in 4.0
333 Name.starts_with("psrl.q") || // Added in 4.0
334 Name.starts_with("psrl.w") || // Added in 4.0
335 Name.starts_with("psrli") || // Added in 4.0
336 Name.starts_with("psrlv") || // Added in 4.0
337 Name.starts_with("psub.") || // Added in 4.0
338 Name.starts_with("psubs.") || // Added in 8.0
339 Name.starts_with("psubus.") || // Added in 8.0
340 Name.starts_with("pternlog.") || // Added in 7.0
341 Name.starts_with("punpckh") || // Added in 3.9
342 Name.starts_with("punpckl") || // Added in 3.9
343 Name.starts_with("pxor.") || // Added in 3.9
344 Name.starts_with("shuf.f") || // Added in 6.0
345 Name.starts_with("shuf.i") || // Added in 6.0
346 Name.starts_with("shuf.p") || // Added in 4.0
347 Name.starts_with("sqrt.p") || // Added in 7.0
348 Name.starts_with("store.b.") || // Added in 3.9
349 Name.starts_with("store.d.") || // Added in 3.9
350 Name.starts_with("store.p") || // Added in 3.9
351 Name.starts_with("store.q.") || // Added in 3.9
352 Name.starts_with("store.w.") || // Added in 3.9
353 Name == "store.ss" || // Added in 7.0
354 Name.starts_with("storeu.") || // Added in 3.9
355 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
356 Name.starts_with("ucmp.") || // Added in 5.0
357 Name.starts_with("unpckh.") || // Added in 3.9
358 Name.starts_with("unpckl.") || // Added in 3.9
359 Name.starts_with("valign.") || // Added in 4.0
360 Name == "vcvtph2ps.128" || // Added in 11.0
361 Name == "vcvtph2ps.256" || // Added in 11.0
362 Name.starts_with("vextract") || // Added in 4.0
363 Name.starts_with("vfmadd.") || // Added in 7.0
364 Name.starts_with("vfmaddsub.") || // Added in 7.0
365 Name.starts_with("vfnmadd.") || // Added in 7.0
366 Name.starts_with("vfnmsub.") || // Added in 7.0
367 Name.starts_with("vpdpbusd.") || // Added in 7.0
368 Name.starts_with("vpdpbusds.") || // Added in 7.0
369 Name.starts_with("vpdpwssd.") || // Added in 7.0
370 Name.starts_with("vpdpwssds.") || // Added in 7.0
371 Name.starts_with("vpermi2var.") || // Added in 7.0
372 Name.starts_with("vpermil.p") || // Added in 3.9
373 Name.starts_with("vpermilvar.") || // Added in 4.0
374 Name.starts_with("vpermt2var.") || // Added in 7.0
375 Name.starts_with("vpmadd52") || // Added in 7.0
376 Name.starts_with("vpshld.") || // Added in 7.0
377 Name.starts_with("vpshldv.") || // Added in 8.0
378 Name.starts_with("vpshrd.") || // Added in 7.0
379 Name.starts_with("vpshrdv.") || // Added in 8.0
380 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
381 Name.starts_with("xor.")); // Added in 3.9
382
383 if (Name.consume_front("mask3."))
384 // 'avx512.mask3.*'
385 return (Name.starts_with("vfmadd.") || // Added in 7.0
386 Name.starts_with("vfmaddsub.") || // Added in 7.0
387 Name.starts_with("vfmsub.") || // Added in 7.0
388 Name.starts_with("vfmsubadd.") || // Added in 7.0
389 Name.starts_with("vfnmsub.")); // Added in 7.0
390
391 if (Name.consume_front("maskz."))
392 // 'avx512.maskz.*'
393 return (Name.starts_with("pternlog.") || // Added in 7.0
394 Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmaddsub.") || // Added in 7.0
396 Name.starts_with("vpdpbusd.") || // Added in 7.0
397 Name.starts_with("vpdpbusds.") || // Added in 7.0
398 Name.starts_with("vpdpwssd.") || // Added in 7.0
399 Name.starts_with("vpdpwssds.") || // Added in 7.0
400 Name.starts_with("vpermt2var.") || // Added in 7.0
401 Name.starts_with("vpmadd52") || // Added in 7.0
402 Name.starts_with("vpshldv.") || // Added in 8.0
403 Name.starts_with("vpshrdv.")); // Added in 8.0
404
405 // 'avx512.*'
406 return (Name == "movntdqa" || // Added in 5.0
407 Name == "pmul.dq.512" || // Added in 7.0
408 Name == "pmulu.dq.512" || // Added in 7.0
409 Name.starts_with("broadcastm") || // Added in 6.0
410 Name.starts_with("cmp.p") || // Added in 12.0
411 Name.starts_with("cvtb2mask.") || // Added in 7.0
412 Name.starts_with("cvtd2mask.") || // Added in 7.0
413 Name.starts_with("cvtmask2") || // Added in 5.0
414 Name.starts_with("cvtq2mask.") || // Added in 7.0
415 Name == "cvtusi2sd" || // Added in 7.0
416 Name.starts_with("cvtw2mask.") || // Added in 7.0
417 Name == "kand.w" || // Added in 7.0
418 Name == "kandn.w" || // Added in 7.0
419 Name == "knot.w" || // Added in 7.0
420 Name == "kor.w" || // Added in 7.0
421 Name == "kortestc.w" || // Added in 7.0
422 Name == "kortestz.w" || // Added in 7.0
423 Name.starts_with("kunpck") || // added in 6.0
424 Name == "kxnor.w" || // Added in 7.0
425 Name == "kxor.w" || // Added in 7.0
426 Name.starts_with("padds.") || // Added in 8.0
427 Name.starts_with("pbroadcast") || // Added in 3.9
428 Name.starts_with("prol") || // Added in 8.0
429 Name.starts_with("pror") || // Added in 8.0
430 Name.starts_with("psll.dq") || // Added in 3.9
431 Name.starts_with("psrl.dq") || // Added in 3.9
432 Name.starts_with("psubs.") || // Added in 8.0
433 Name.starts_with("ptestm") || // Added in 6.0
434 Name.starts_with("ptestnm") || // Added in 6.0
435 Name.starts_with("storent.") || // Added in 3.9
436 Name.starts_with("vbroadcast.s") || // Added in 7.0
437 Name.starts_with("vpshld.") || // Added in 8.0
438 Name.starts_with("vpshrd.")); // Added in 8.0
439 }
440
441 if (Name.consume_front("fma."))
442 return (Name.starts_with("vfmadd.") || // Added in 7.0
443 Name.starts_with("vfmsub.") || // Added in 7.0
444 Name.starts_with("vfmsubadd.") || // Added in 7.0
445 Name.starts_with("vfnmadd.") || // Added in 7.0
446 Name.starts_with("vfnmsub.")); // Added in 7.0
447
448 if (Name.consume_front("fma4."))
449 return Name.starts_with("vfmadd.s"); // Added in 7.0
450
451 if (Name.consume_front("sse."))
452 return (Name == "add.ss" || // Added in 4.0
453 Name == "cvtsi2ss" || // Added in 7.0
454 Name == "cvtsi642ss" || // Added in 7.0
455 Name == "div.ss" || // Added in 4.0
456 Name == "mul.ss" || // Added in 4.0
457 Name.starts_with("sqrt.p") || // Added in 7.0
458 Name == "sqrt.ss" || // Added in 7.0
459 Name.starts_with("storeu.") || // Added in 3.9
460 Name == "sub.ss"); // Added in 4.0
461
462 if (Name.consume_front("sse2."))
463 return (Name == "add.sd" || // Added in 4.0
464 Name == "cvtdq2pd" || // Added in 3.9
465 Name == "cvtdq2ps" || // Added in 7.0
466 Name == "cvtps2pd" || // Added in 3.9
467 Name == "cvtsi2sd" || // Added in 7.0
468 Name == "cvtsi642sd" || // Added in 7.0
469 Name == "cvtss2sd" || // Added in 7.0
470 Name == "div.sd" || // Added in 4.0
471 Name == "mul.sd" || // Added in 4.0
472 Name.starts_with("padds.") || // Added in 8.0
473 Name.starts_with("paddus.") || // Added in 8.0
474 Name.starts_with("pcmpeq.") || // Added in 3.1
475 Name.starts_with("pcmpgt.") || // Added in 3.1
476 Name == "pmaxs.w" || // Added in 3.9
477 Name == "pmaxu.b" || // Added in 3.9
478 Name == "pmins.w" || // Added in 3.9
479 Name == "pminu.b" || // Added in 3.9
480 Name == "pmulu.dq" || // Added in 7.0
481 Name.starts_with("pshuf") || // Added in 3.9
482 Name.starts_with("psll.dq") || // Added in 3.7
483 Name.starts_with("psrl.dq") || // Added in 3.7
484 Name.starts_with("psubs.") || // Added in 8.0
485 Name.starts_with("psubus.") || // Added in 8.0
486 Name.starts_with("sqrt.p") || // Added in 7.0
487 Name == "sqrt.sd" || // Added in 7.0
488 Name == "storel.dq" || // Added in 3.9
489 Name.starts_with("storeu.") || // Added in 3.9
490 Name == "sub.sd"); // Added in 4.0
491
492 if (Name.consume_front("sse41."))
493 return (Name.starts_with("blendp") || // Added in 3.7
494 Name == "movntdqa" || // Added in 5.0
495 Name == "pblendw" || // Added in 3.7
496 Name == "pmaxsb" || // Added in 3.9
497 Name == "pmaxsd" || // Added in 3.9
498 Name == "pmaxud" || // Added in 3.9
499 Name == "pmaxuw" || // Added in 3.9
500 Name == "pminsb" || // Added in 3.9
501 Name == "pminsd" || // Added in 3.9
502 Name == "pminud" || // Added in 3.9
503 Name == "pminuw" || // Added in 3.9
504 Name.starts_with("pmovsx") || // Added in 3.8
505 Name.starts_with("pmovzx") || // Added in 3.9
506 Name == "pmuldq"); // Added in 7.0
507
508 if (Name.consume_front("sse42."))
509 return Name == "crc32.64.8"; // Added in 3.4
510
511 if (Name.consume_front("sse4a."))
512 return Name.starts_with("movnt."); // Added in 3.9
513
514 if (Name.consume_front("ssse3."))
515 return (Name == "pabs.b.128" || // Added in 6.0
516 Name == "pabs.d.128" || // Added in 6.0
517 Name == "pabs.w.128"); // Added in 6.0
518
519 if (Name.consume_front("xop."))
520 return (Name == "vpcmov" || // Added in 3.8
521 Name == "vpcmov.256" || // Added in 5.0
522 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
523 Name.starts_with("vprot")); // Added in 8.0
524
525 return (Name == "addcarry.u32" || // Added in 8.0
526 Name == "addcarry.u64" || // Added in 8.0
527 Name == "addcarryx.u32" || // Added in 8.0
528 Name == "addcarryx.u64" || // Added in 8.0
529 Name == "subborrow.u32" || // Added in 8.0
530 Name == "subborrow.u64" || // Added in 8.0
531 Name.starts_with("vcvtph2ps.")); // Added in 11.0
532}
533
535 Function *&NewFn) {
536 // Only handle intrinsics that start with "x86.".
537 if (!Name.consume_front("x86."))
538 return false;
539
540 if (shouldUpgradeX86Intrinsic(F, Name)) {
541 NewFn = nullptr;
542 return true;
543 }
544
545 if (Name == "rdtscp") { // Added in 8.0
546 // If this intrinsic has 0 operands, it's the new version.
547 if (F->getFunctionType()->getNumParams() == 0)
548 return false;
549
550 rename(F);
551 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
552 Intrinsic::x86_rdtscp);
553 return true;
554 }
555
557
558 // SSE4.1 ptest functions may have an old signature.
559 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
561 .Case("c", Intrinsic::x86_sse41_ptestc)
562 .Case("z", Intrinsic::x86_sse41_ptestz)
563 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
566 return upgradePTESTIntrinsic(F, ID, NewFn);
567
568 return false;
569 }
570
571 // Several blend and other instructions with masks used the wrong number of
572 // bits.
573
574 // Added in 3.6
576 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
584 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
585
586 if (Name.consume_front("avx512.")) {
587 if (Name.consume_front("mask.cmp.")) {
588 // Added in 7.0
590 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 return upgradeX86MaskedFPCompare(F, ID, NewFn);
599 } else if (Name.starts_with("vpdpbusd.") ||
600 Name.starts_with("vpdpbusds.")) {
601 // Added in 21.1
603 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
612 } else if (Name.starts_with("vpdpwssd.") ||
613 Name.starts_with("vpdpwssds.")) {
614 // Added in 21.1
616 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
624 return upgradeX86MultiplyAddWords(F, ID, NewFn);
625 }
626 return false; // No other 'x86.avx512.*'.
627 }
628
629 if (Name.consume_front("avx2.")) {
630 if (Name.consume_front("vpdpb")) {
631 // Added in 21.1
633 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
648 } else if (Name.consume_front("vpdpw")) {
649 // Added in 21.1
651 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
665 return upgradeX86MultiplyAddWords(F, ID, NewFn);
666 }
667 return false; // No other 'x86.avx2.*'
668 }
669
670 if (Name.consume_front("avx10.")) {
671 if (Name.consume_front("vpdpb")) {
672 // Added in 21.1
674 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
683 } else if (Name.consume_front("vpdpw")) {
685 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
693 return upgradeX86MultiplyAddWords(F, ID, NewFn);
694 }
695 return false; // No other 'x86.avx10.*'
696 }
697
698 if (Name.consume_front("avx512bf16.")) {
699 // Added in 9.0
701 .Case("cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .Case("cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .Case("cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .Case("mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .Case("cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .Case("cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
715 return upgradeX86BF16Intrinsic(F, ID, NewFn);
716
717 // Added in 9.0
719 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
724 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
725 return false; // No other 'x86.avx512bf16.*'.
726 }
727
728 if (Name.consume_front("xop.")) {
730 if (Name.starts_with("vpermil2")) { // Added in 3.9
731 // Upgrade any XOP PERMIL2 index operand still using a float/double
732 // vector.
733 auto Idx = F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 else
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
746 } else if (F->arg_size() == 2)
747 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
749 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
752
754 rename(F);
755 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
756 return true;
757 }
758 return false; // No other 'x86.xop.*'
759 }
760
761 if (Name == "seh.recoverfp") {
762 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
763 Intrinsic::eh_recoverfp);
764 return true;
765 }
766
767 return false;
768}
769
770// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
771// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
773 StringRef Name,
774 Function *&NewFn) {
775 if (Name.starts_with("rbit")) {
776 // '(arm|aarch64).rbit'.
778 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
779 return true;
780 }
781
782 if (Name == "thread.pointer") {
783 // '(arm|aarch64).thread.pointer'.
785 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
786 return true;
787 }
788
789 bool Neon = Name.consume_front("neon.");
790 if (Neon) {
791 // '(arm|aarch64).neon.*'.
792 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
793 // v16i8 respectively.
794 if (Name.consume_front("bfdot.")) {
795 // (arm|aarch64).neon.bfdot.*'.
798 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
799 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
800 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
803 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
806 LLVMContext &Ctx = F->getParent()->getContext();
807 std::array<Type *, 2> Tys{
808 {F->getReturnType(),
809 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
811 return true;
812 }
813 return false; // No other '(arm|aarch64).neon.bfdot.*'.
814 }
815
816 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
817 // anymore and accept v8bf16 instead of v16i8.
818 if (Name.consume_front("bfm")) {
819 // (arm|aarch64).neon.bfm*'.
820 if (Name.consume_back(".v4f32.v16i8")) {
821 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
824 .Case("mla",
825 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
827 .Case("lalb",
828 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
829 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
830 .Case("lalt",
831 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
832 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
835 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
836 return true;
837 }
838 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
839 }
840 return false; // No other '(arm|aarch64).neon.bfm*.
841 }
842 // Continue on to Aarch64 Neon or Arm Neon.
843 }
844 // Continue on to Arm or Aarch64.
845
846 if (IsArm) {
847 // 'arm.*'.
848 if (Neon) {
849 // 'arm.neon.*'.
851 .StartsWith("vclz.", Intrinsic::ctlz)
852 .StartsWith("vcnt.", Intrinsic::ctpop)
853 .StartsWith("vqadds.", Intrinsic::sadd_sat)
854 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
855 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
856 .StartsWith("vqsubu.", Intrinsic::usub_sat)
857 .StartsWith("vrinta.", Intrinsic::round)
858 .StartsWith("vrintn.", Intrinsic::roundeven)
859 .StartsWith("vrintm.", Intrinsic::floor)
860 .StartsWith("vrintp.", Intrinsic::ceil)
861 .StartsWith("vrintx.", Intrinsic::rint)
862 .StartsWith("vrintz.", Intrinsic::trunc)
865 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
866 F->arg_begin()->getType());
867 return true;
868 }
869
870 if (Name.consume_front("vst")) {
871 // 'arm.neon.vst*'.
872 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
874 if (vstRegex.match(Name, &Groups)) {
875 static const Intrinsic::ID StoreInts[] = {
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
878
879 static const Intrinsic::ID StoreLaneInts[] = {
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
882
883 auto fArgs = F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
885 if (Groups[1].size() == 1)
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
888 else
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
891 return true;
892 }
893 return false; // No other 'arm.neon.vst*'.
894 }
895
896 return false; // No other 'arm.neon.*'.
897 }
898
899 if (Name.consume_front("mve.")) {
900 // 'arm.mve.*'.
901 if (Name == "vctp64") {
902 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
903 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
904 // the function and deal with it below in UpgradeIntrinsicCall.
905 rename(F);
906 return true;
907 }
908 return false; // Not 'arm.mve.vctp64'.
909 }
910
911 if (Name.starts_with("vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
914 return true;
915 }
916
917 // These too are changed to accept a v2i1 instead of the old v4i1.
918 if (Name.consume_back(".v4i1")) {
919 // 'arm.mve.*.v4i1'.
920 if (Name.consume_back(".predicated.v2i64.v4i32"))
921 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
922 return Name == "mull.int" || Name == "vqdmull";
923
924 if (Name.consume_back(".v2i64")) {
925 // 'arm.mve.*.v2i64.v4i1'
926 bool IsGather = Name.consume_front("vldr.gather.");
927 if (IsGather || Name.consume_front("vstr.scatter.")) {
928 if (Name.consume_front("base.")) {
929 // Optional 'wb.' prefix.
930 Name.consume_front("wb.");
931 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
932 // predicated.v2i64.v2i64.v4i1'.
933 return Name == "predicated.v2i64";
934 }
935
936 if (Name.consume_front("offset.predicated."))
937 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
938 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
939
940 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
941 return false;
942 }
943
944 return false; // No other 'arm.mve.*.v2i64.v4i1'.
945 }
946 return false; // No other 'arm.mve.*.v4i1'.
947 }
948 return false; // No other 'arm.mve.*'.
949 }
950
951 if (Name.consume_front("cde.vcx")) {
952 // 'arm.cde.vcx*'.
953 if (Name.consume_back(".predicated.v2i64.v4i1"))
954 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
955 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
956 Name == "3q" || Name == "3qa";
957
958 return false; // No other 'arm.cde.vcx*'.
959 }
960 } else {
961 // 'aarch64.*'.
962 if (Neon) {
963 // 'aarch64.neon.*'.
965 .StartsWith("frintn", Intrinsic::roundeven)
966 .StartsWith("rbit", Intrinsic::bitreverse)
969 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
970 F->arg_begin()->getType());
971 return true;
972 }
973
974 if (Name.starts_with("addp")) {
975 // 'aarch64.neon.addp*'.
976 if (F->arg_size() != 2)
977 return false; // Invalid IR.
978 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
982 return true;
983 }
984 }
985
986 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
987 if (Name.starts_with("bfcvt")) {
988 NewFn = nullptr;
989 return true;
990 }
991
992 return false; // No other 'aarch64.neon.*'.
993 }
994 if (Name.consume_front("sve.")) {
995 // 'aarch64.sve.*'.
996 if (Name.consume_front("bf")) {
997 if (Name.consume_back(".lane")) {
998 // 'aarch64.sve.bf*.lane'.
1001 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1006 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1007 return true;
1008 }
1009 return false; // No other 'aarch64.sve.bf*.lane'.
1010 }
1011 return false; // No other 'aarch64.sve.bf*'.
1012 }
1013
1014 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1015 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1016 NewFn = nullptr;
1017 return true;
1018 }
1019
1020 if (Name.consume_front("addqv")) {
1021 // 'aarch64.sve.addqv'.
1022 if (!F->getReturnType()->isFPOrFPVectorTy())
1023 return false;
1024
1025 auto Args = F->getFunctionType()->params();
1026 Type *Tys[] = {F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1029 return true;
1030 }
1031
1032 if (Name.consume_front("ld")) {
1033 // 'aarch64.sve.ld*'.
1034 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.match(Name)) {
1036 Type *ScalarTy =
1037 cast<VectorType>(F->getReturnType())->getElementType();
1038 ElementCount EC =
1039 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1040 Type *Ty = VectorType::get(ScalarTy, EC);
1041 static const Intrinsic::ID LoadIDs[] = {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1045 };
1046 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1047 LoadIDs[Name[0] - '2'], Ty);
1048 return true;
1049 }
1050 return false; // No other 'aarch64.sve.ld*'.
1051 }
1052
1053 if (Name.consume_front("tuple.")) {
1054 // 'aarch64.sve.tuple.*'.
1055 if (Name.starts_with("get")) {
1056 // 'aarch64.sve.tuple.get*'.
1057 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1060 return true;
1061 }
1062
1063 if (Name.starts_with("set")) {
1064 // 'aarch64.sve.tuple.set*'.
1065 auto Args = F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1069 return true;
1070 }
1071
1072 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.match(Name)) {
1074 // 'aarch64.sve.tuple.create*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.tuple.*'.
1082 }
1083
1084 if (Name.starts_with("rev.nxv")) {
1085 // 'aarch64.sve.rev.<Ty>'
1087 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1088 return true;
1089 }
1090
1091 return false; // No other 'aarch64.sve.*'.
1092 }
1093 }
1094 return false; // No other 'arm.*', 'aarch64.*'.
1095}
1096
1098 StringRef Name) {
1099 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1102 .Case("im2col.3d",
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 .Case("im2col.4d",
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 .Case("im2col.5d",
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1114
1116 return ID;
1117
1118 // These intrinsics may need upgrade for two reasons:
1119 // (1) When the address-space of the first argument is shared[AS=3]
1120 // (and we upgrade it to use shared_cluster address-space[AS=7])
1121 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1123 return ID;
1124
1125 // (2) When there are only two boolean flag arguments at the end:
1126 //
1127 // The last three parameters of the older version of these
1128 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1129 //
1130 // The newer version reads as:
1131 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1132 //
1133 // So, when the type of the [N-3]rd argument is "not i1", then
1134 // it is the older version and we need to upgrade.
1135 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1137 if (!ArgType->isIntegerTy(1))
1138 return ID;
1139 }
1140
1142}
1143
1145 StringRef Name) {
1146 if (Name.consume_front("mapa.shared.cluster"))
1147 if (F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1150
1151 if (Name.consume_front("cp.async.bulk.")) {
1154 .Case("global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .Case("shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1159
1161 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1163 return ID;
1164 }
1165
1167}
1168
1170 if (Name.consume_front("fma.rn."))
1171 return StringSwitch<Intrinsic::ID>(Name)
1172 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1175 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1176 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1177 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1178 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1179 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1180 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1181 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1182 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1183 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1185
1186 if (Name.consume_front("fmax."))
1187 return StringSwitch<Intrinsic::ID>(Name)
1188 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1189 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1190 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1191 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1192 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1193 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1194 .Case("ftz.nan.xorsign.abs.bf16",
1195 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1196 .Case("ftz.nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1198 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1199 .Case("ftz.xorsign.abs.bf16x2",
1200 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1201 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1202 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1203 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1204 .Case("nan.xorsign.abs.bf16x2",
1205 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1206 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1207 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1209
1210 if (Name.consume_front("fmin."))
1211 return StringSwitch<Intrinsic::ID>(Name)
1212 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1213 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1214 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1215 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1216 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1217 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1218 .Case("ftz.nan.xorsign.abs.bf16",
1219 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1220 .Case("ftz.nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1222 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1223 .Case("ftz.xorsign.abs.bf16x2",
1224 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1225 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1226 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1227 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1228 .Case("nan.xorsign.abs.bf16x2",
1229 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1230 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1231 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1233
1234 if (Name.consume_front("neg."))
1235 return StringSwitch<Intrinsic::ID>(Name)
1236 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1237 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1239
1241}
1242
1244 return Name.consume_front("local") || Name.consume_front("shared") ||
1245 Name.consume_front("global") || Name.consume_front("constant") ||
1246 Name.consume_front("param");
1247}
1248
1250 bool CanUpgradeDebugIntrinsicsToRecords) {
1251 assert(F && "Illegal to upgrade a non-existent Function.");
1252
1253 StringRef Name = F->getName();
1254
1255 // Quickly eliminate it, if it's not a candidate.
1256 if (!Name.consume_front("llvm.") || Name.empty())
1257 return false;
1258
1259 switch (Name[0]) {
1260 default: break;
1261 case 'a': {
1262 bool IsArm = Name.consume_front("arm.");
1263 if (IsArm || Name.consume_front("aarch64.")) {
1264 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1265 return true;
1266 break;
1267 }
1268
1269 if (Name.consume_front("amdgcn.")) {
1270 if (Name == "alignbit") {
1271 // Target specific intrinsic became redundant
1273 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1274 return true;
1275 }
1276
1277 if (Name.consume_front("atomic.")) {
1278 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1279 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1280 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1281 // and usub_sat so there's no new declaration.
1282 NewFn = nullptr;
1283 return true;
1284 }
1285 break; // No other 'amdgcn.atomic.*'
1286 }
1287
1288 if (F->arg_size() == 7 &&
1289 F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
1290 // Legacy wmma iu8 intrinsic without the optional clamp operand.
1291 NewFn = nullptr;
1292 return true;
1293 }
1294
1295 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1296 Name.consume_front("flat.atomic.")) {
1297 if (Name.starts_with("fadd") ||
1298 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1299 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1300 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1301 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1302 // declaration.
1303 NewFn = nullptr;
1304 return true;
1305 }
1306 }
1307
1308 if (Name.starts_with("ldexp.")) {
1309 // Target specific intrinsic became redundant
1311 F->getParent(), Intrinsic::ldexp,
1312 {F->getReturnType(), F->getArg(1)->getType()});
1313 return true;
1314 }
1315 break; // No other 'amdgcn.*'
1316 }
1317
1318 break;
1319 }
1320 case 'c': {
1321 if (F->arg_size() == 1) {
1323 .StartsWith("ctlz.", Intrinsic::ctlz)
1324 .StartsWith("cttz.", Intrinsic::cttz)
1327 rename(F);
1328 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1329 F->arg_begin()->getType());
1330 return true;
1331 }
1332 }
1333
1334 if (F->arg_size() == 2 && Name == "coro.end") {
1335 rename(F);
1336 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1337 Intrinsic::coro_end);
1338 return true;
1339 }
1340
1341 break;
1342 }
1343 case 'd':
1344 if (Name.consume_front("dbg.")) {
1345 // Mark debug intrinsics for upgrade to new debug format.
1346 if (CanUpgradeDebugIntrinsicsToRecords) {
1347 if (Name == "addr" || Name == "value" || Name == "assign" ||
1348 Name == "declare" || Name == "label") {
1349 // There's no function to replace these with.
1350 NewFn = nullptr;
1351 // But we do want these to get upgraded.
1352 return true;
1353 }
1354 }
1355 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1356 // converted to DbgVariableRecords later.
1357 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1358 rename(F);
1359 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1360 Intrinsic::dbg_value);
1361 return true;
1362 }
1363 break; // No other 'dbg.*'.
1364 }
1365 break;
1366 case 'e':
1367 if (Name.consume_front("experimental.vector.")) {
1370 // Skip over extract.last.active, otherwise it will be 'upgraded'
1371 // to a regular vector extract which is a different operation.
1372 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1373 .StartsWith("extract.", Intrinsic::vector_extract)
1374 .StartsWith("insert.", Intrinsic::vector_insert)
1375 .StartsWith("splice.", Intrinsic::vector_splice)
1376 .StartsWith("reverse.", Intrinsic::vector_reverse)
1377 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1378 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1379 .StartsWith("partial.reduce.add",
1380 Intrinsic::vector_partial_reduce_add)
1383 const auto *FT = F->getFunctionType();
1385 if (ID == Intrinsic::vector_extract ||
1386 ID == Intrinsic::vector_interleave2)
1387 // Extracting overloads the return type.
1388 Tys.push_back(FT->getReturnType());
1389 if (ID != Intrinsic::vector_interleave2)
1390 Tys.push_back(FT->getParamType(0));
1391 if (ID == Intrinsic::vector_insert ||
1392 ID == Intrinsic::vector_partial_reduce_add)
1393 // Inserting overloads the inserted type.
1394 Tys.push_back(FT->getParamType(1));
1395 rename(F);
1396 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1397 return true;
1398 }
1399
1400 if (Name.consume_front("reduce.")) {
1402 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1403 if (R.match(Name, &Groups))
1405 .Case("add", Intrinsic::vector_reduce_add)
1406 .Case("mul", Intrinsic::vector_reduce_mul)
1407 .Case("and", Intrinsic::vector_reduce_and)
1408 .Case("or", Intrinsic::vector_reduce_or)
1409 .Case("xor", Intrinsic::vector_reduce_xor)
1410 .Case("smax", Intrinsic::vector_reduce_smax)
1411 .Case("smin", Intrinsic::vector_reduce_smin)
1412 .Case("umax", Intrinsic::vector_reduce_umax)
1413 .Case("umin", Intrinsic::vector_reduce_umin)
1414 .Case("fmax", Intrinsic::vector_reduce_fmax)
1415 .Case("fmin", Intrinsic::vector_reduce_fmin)
1417
1418 bool V2 = false;
1420 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1421 Groups.clear();
1422 V2 = true;
1423 if (R2.match(Name, &Groups))
1425 .Case("fadd", Intrinsic::vector_reduce_fadd)
1426 .Case("fmul", Intrinsic::vector_reduce_fmul)
1428 }
1430 rename(F);
1431 auto Args = F->getFunctionType()->params();
1432 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1433 {Args[V2 ? 1 : 0]});
1434 return true;
1435 }
1436 break; // No other 'expermental.vector.reduce.*'.
1437 }
1438 break; // No other 'experimental.vector.*'.
1439 }
1440 if (Name.consume_front("experimental.stepvector.")) {
1441 Intrinsic::ID ID = Intrinsic::stepvector;
1442 rename(F);
1444 F->getParent(), ID, F->getFunctionType()->getReturnType());
1445 return true;
1446 }
1447 break; // No other 'e*'.
1448 case 'f':
1449 if (Name.starts_with("flt.rounds")) {
1450 rename(F);
1451 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1452 Intrinsic::get_rounding);
1453 return true;
1454 }
1455 break;
1456 case 'i':
1457 if (Name.starts_with("invariant.group.barrier")) {
1458 // Rename invariant.group.barrier to launder.invariant.group
1459 auto Args = F->getFunctionType()->params();
1460 Type* ObjectPtr[1] = {Args[0]};
1461 rename(F);
1463 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1464 return true;
1465 }
1466 break;
1467 case 'l':
1468 if ((Name.starts_with("lifetime.start") ||
1469 Name.starts_with("lifetime.end")) &&
1470 F->arg_size() == 2) {
1471 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1472 ? Intrinsic::lifetime_start
1473 : Intrinsic::lifetime_end;
1474 rename(F);
1475 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1476 F->getArg(0)->getType());
1477 return true;
1478 }
1479 break;
1480 case 'm': {
1481 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1482 // alignment parameter to embedding the alignment as an attribute of
1483 // the pointer args.
1484 if (unsigned ID = StringSwitch<unsigned>(Name)
1485 .StartsWith("memcpy.", Intrinsic::memcpy)
1486 .StartsWith("memmove.", Intrinsic::memmove)
1487 .Default(0)) {
1488 if (F->arg_size() == 5) {
1489 rename(F);
1490 // Get the types of dest, src, and len
1491 ArrayRef<Type *> ParamTypes =
1492 F->getFunctionType()->params().slice(0, 3);
1493 NewFn =
1494 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1495 return true;
1496 }
1497 }
1498 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1499 rename(F);
1500 // Get the types of dest, and len
1501 const auto *FT = F->getFunctionType();
1502 Type *ParamTypes[2] = {
1503 FT->getParamType(0), // Dest
1504 FT->getParamType(2) // len
1505 };
1506 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1507 Intrinsic::memset, ParamTypes);
1508 return true;
1509 }
1510
1511 unsigned MaskedID =
1513 .StartsWith("masked.load", Intrinsic::masked_load)
1514 .StartsWith("masked.gather", Intrinsic::masked_gather)
1515 .StartsWith("masked.store", Intrinsic::masked_store)
1516 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1517 .Default(0);
1518 if (MaskedID && F->arg_size() == 4) {
1519 rename(F);
1520 if (MaskedID == Intrinsic::masked_load ||
1521 MaskedID == Intrinsic::masked_gather) {
1523 F->getParent(), MaskedID,
1524 {F->getReturnType(), F->getArg(0)->getType()});
1525 return true;
1526 }
1528 F->getParent(), MaskedID,
1529 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1530 return true;
1531 }
1532 break;
1533 }
1534 case 'n': {
1535 if (Name.consume_front("nvvm.")) {
1536 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1537 if (F->arg_size() == 1) {
1538 Intrinsic::ID IID =
1540 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1541 .Case("clz.i", Intrinsic::ctlz)
1542 .Case("popc.i", Intrinsic::ctpop)
1544 if (IID != Intrinsic::not_intrinsic) {
1545 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1546 {F->getReturnType()});
1547 return true;
1548 }
1549 }
1550
1551 // Check for nvvm intrinsics that need a return type adjustment.
1552 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1554 if (IID != Intrinsic::not_intrinsic) {
1555 NewFn = nullptr;
1556 return true;
1557 }
1558 }
1559
1560 // Upgrade Distributed Shared Memory Intrinsics
1562 if (IID != Intrinsic::not_intrinsic) {
1563 rename(F);
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1565 return true;
1566 }
1567
1568 // Upgrade TMA copy G2S Intrinsics
1570 if (IID != Intrinsic::not_intrinsic) {
1571 rename(F);
1572 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1573 return true;
1574 }
1575
1576 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1577 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1578 //
1579 // TODO: We could add lohi.i2d.
1580 bool Expand = false;
1581 if (Name.consume_front("abs."))
1582 // nvvm.abs.{i,ii}
1583 Expand =
1584 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1585 else if (Name.consume_front("fabs."))
1586 // nvvm.fabs.{f,ftz.f,d}
1587 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1588 else if (Name.consume_front("ex2.approx."))
1589 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1590 Expand =
1591 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1592 else if (Name.consume_front("max.") || Name.consume_front("min."))
1593 // nvvm.{min,max}.{i,ii,ui,ull}
1594 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1595 Name == "ui" || Name == "ull";
1596 else if (Name.consume_front("atomic.load."))
1597 // nvvm.atomic.load.add.{f32,f64}.p
1598 // nvvm.atomic.load.{inc,dec}.32.p
1599 Expand = StringSwitch<bool>(Name)
1600 .StartsWith("add.f32.p", true)
1601 .StartsWith("add.f64.p", true)
1602 .StartsWith("inc.32.p", true)
1603 .StartsWith("dec.32.p", true)
1604 .Default(false);
1605 else if (Name.consume_front("bitcast."))
1606 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1607 Expand =
1608 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1609 else if (Name.consume_front("rotate."))
1610 // nvvm.rotate.{b32,b64,right.b64}
1611 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1612 else if (Name.consume_front("ptr.gen.to."))
1613 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1614 Expand = consumeNVVMPtrAddrSpace(Name);
1615 else if (Name.consume_front("ptr."))
1616 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1617 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1618 else if (Name.consume_front("ldg.global."))
1619 // nvvm.ldg.global.{i,p,f}
1620 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1621 Name.starts_with("p."));
1622 else
1623 Expand = StringSwitch<bool>(Name)
1624 .Case("barrier0", true)
1625 .Case("barrier.n", true)
1626 .Case("barrier.sync.cnt", true)
1627 .Case("barrier.sync", true)
1628 .Case("barrier", true)
1629 .Case("bar.sync", true)
1630 .Case("barrier0.popc", true)
1631 .Case("barrier0.and", true)
1632 .Case("barrier0.or", true)
1633 .Case("clz.ll", true)
1634 .Case("popc.ll", true)
1635 .Case("h2f", true)
1636 .Case("swap.lo.hi.b64", true)
1637 .Case("tanh.approx.f32", true)
1638 .Default(false);
1639
1640 if (Expand) {
1641 NewFn = nullptr;
1642 return true;
1643 }
1644 break; // No other 'nvvm.*'.
1645 }
1646 break;
1647 }
1648 case 'o':
1649 if (Name.starts_with("objectsize.")) {
1650 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1651 if (F->arg_size() == 2 || F->arg_size() == 3) {
1652 rename(F);
1653 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1654 Intrinsic::objectsize, Tys);
1655 return true;
1656 }
1657 }
1658 break;
1659
1660 case 'p':
1661 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1662 rename(F);
1664 F->getParent(), Intrinsic::ptr_annotation,
1665 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1666 return true;
1667 }
1668 break;
1669
1670 case 'r': {
1671 if (Name.consume_front("riscv.")) {
1674 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1675 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1676 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1677 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1680 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1681 rename(F);
1682 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1683 return true;
1684 }
1685 break; // No other applicable upgrades.
1686 }
1687
1689 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1690 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1693 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1694 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1695 rename(F);
1696 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1697 return true;
1698 }
1699 break; // No other applicable upgrades.
1700 }
1701
1703 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1704 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1705 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1706 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1707 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1708 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1711 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1712 rename(F);
1713 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1714 return true;
1715 }
1716 break; // No other applicable upgrades.
1717 }
1718 break; // No other 'riscv.*' intrinsics
1719 }
1720 } break;
1721
1722 case 's':
1723 if (Name == "stackprotectorcheck") {
1724 NewFn = nullptr;
1725 return true;
1726 }
1727 break;
1728
1729 case 't':
1730 if (Name == "thread.pointer") {
1732 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1733 return true;
1734 }
1735 break;
1736
1737 case 'v': {
1738 if (Name == "var.annotation" && F->arg_size() == 4) {
1739 rename(F);
1741 F->getParent(), Intrinsic::var_annotation,
1742 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1743 return true;
1744 }
1745 break;
1746 }
1747
1748 case 'w':
1749 if (Name.consume_front("wasm.")) {
1752 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1753 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1754 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1757 rename(F);
1758 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1759 F->getReturnType());
1760 return true;
1761 }
1762
1763 if (Name.consume_front("dot.i8x16.i7x16.")) {
1765 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1766 .Case("add.signed",
1767 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1770 rename(F);
1771 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1772 return true;
1773 }
1774 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1775 }
1776 break; // No other 'wasm.*'.
1777 }
1778 break;
1779
1780 case 'x':
1781 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1782 return true;
1783 }
1784
1785 auto *ST = dyn_cast<StructType>(F->getReturnType());
1786 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1787 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1788 // Replace return type with literal non-packed struct. Only do this for
1789 // intrinsics declared to return a struct, not for intrinsics with
1790 // overloaded return type, in which case the exact struct type will be
1791 // mangled into the name.
1794 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1795 auto *FT = F->getFunctionType();
1796 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1797 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1798 std::string Name = F->getName().str();
1799 rename(F);
1800 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1801 Name, F->getParent());
1802
1803 // The new function may also need remangling.
1804 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1805 NewFn = *Result;
1806 return true;
1807 }
1808 }
1809
1810 // Remangle our intrinsic since we upgrade the mangling
1812 if (Result != std::nullopt) {
1813 NewFn = *Result;
1814 return true;
1815 }
1816
1817 // This may not belong here. This function is effectively being overloaded
1818 // to both detect an intrinsic which needs upgrading, and to provide the
1819 // upgraded form of the intrinsic. We should perhaps have two separate
1820 // functions for this.
1821 return false;
1822}
1823
1825 bool CanUpgradeDebugIntrinsicsToRecords) {
1826 NewFn = nullptr;
1827 bool Upgraded =
1828 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1829
1830 // Upgrade intrinsic attributes. This does not change the function.
1831 if (NewFn)
1832 F = NewFn;
1833 if (Intrinsic::ID id = F->getIntrinsicID()) {
1834 // Only do this if the intrinsic signature is valid.
1835 SmallVector<Type *> OverloadTys;
1836 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1837 F->setAttributes(
1838 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1839 }
1840 return Upgraded;
1841}
1842
1844 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1845 GV->getName() == "llvm.global_dtors")) ||
1846 !GV->hasInitializer())
1847 return nullptr;
1849 if (!ATy)
1850 return nullptr;
1852 if (!STy || STy->getNumElements() != 2)
1853 return nullptr;
1854
1855 LLVMContext &C = GV->getContext();
1856 IRBuilder<> IRB(C);
1857 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1858 IRB.getPtrTy());
1859 Constant *Init = GV->getInitializer();
1860 unsigned N = Init->getNumOperands();
1861 std::vector<Constant *> NewCtors(N);
1862 for (unsigned i = 0; i != N; ++i) {
1863 auto Ctor = cast<Constant>(Init->getOperand(i));
1864 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1865 Ctor->getAggregateElement(1),
1867 }
1868 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1869
1870 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1871 NewInit, GV->getName());
1872}
1873
1874// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1875// to byte shuffles.
1877 unsigned Shift) {
1878 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1879 unsigned NumElts = ResultTy->getNumElements() * 8;
1880
1881 // Bitcast from a 64-bit element type to a byte element type.
1882 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1883 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1884
1885 // We'll be shuffling in zeroes.
1886 Value *Res = Constant::getNullValue(VecTy);
1887
1888 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1889 // we'll just return the zero vector.
1890 if (Shift < 16) {
1891 int Idxs[64];
1892 // 256/512-bit version is split into 2/4 16-byte lanes.
1893 for (unsigned l = 0; l != NumElts; l += 16)
1894 for (unsigned i = 0; i != 16; ++i) {
1895 unsigned Idx = NumElts + i - Shift;
1896 if (Idx < NumElts)
1897 Idx -= NumElts - 16; // end of lane, switch operand.
1898 Idxs[l + i] = Idx + l;
1899 }
1900
1901 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1902 }
1903
1904 // Bitcast back to a 64-bit element type.
1905 return Builder.CreateBitCast(Res, ResultTy, "cast");
1906}
1907
1908// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1909// to byte shuffles.
1911 unsigned Shift) {
1912 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1913 unsigned NumElts = ResultTy->getNumElements() * 8;
1914
1915 // Bitcast from a 64-bit element type to a byte element type.
1916 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1917 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1918
1919 // We'll be shuffling in zeroes.
1920 Value *Res = Constant::getNullValue(VecTy);
1921
1922 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1923 // we'll just return the zero vector.
1924 if (Shift < 16) {
1925 int Idxs[64];
1926 // 256/512-bit version is split into 2/4 16-byte lanes.
1927 for (unsigned l = 0; l != NumElts; l += 16)
1928 for (unsigned i = 0; i != 16; ++i) {
1929 unsigned Idx = i + Shift;
1930 if (Idx >= 16)
1931 Idx += NumElts - 16; // end of lane, switch operand.
1932 Idxs[l + i] = Idx + l;
1933 }
1934
1935 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1936 }
1937
1938 // Bitcast back to a 64-bit element type.
1939 return Builder.CreateBitCast(Res, ResultTy, "cast");
1940}
1941
1942static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1943 unsigned NumElts) {
1944 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1946 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1947 Mask = Builder.CreateBitCast(Mask, MaskTy);
1948
1949 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1950 // i8 and we need to extract down to the right number of elements.
1951 if (NumElts <= 4) {
1952 int Indices[4];
1953 for (unsigned i = 0; i != NumElts; ++i)
1954 Indices[i] = i;
1955 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1956 "extract");
1957 }
1958
1959 return Mask;
1960}
1961
1962static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1963 Value *Op1) {
1964 // If the mask is all ones just emit the first operation.
1965 if (const auto *C = dyn_cast<Constant>(Mask))
1966 if (C->isAllOnesValue())
1967 return Op0;
1968
1969 Mask = getX86MaskVec(Builder, Mask,
1970 cast<FixedVectorType>(Op0->getType())->getNumElements());
1971 return Builder.CreateSelect(Mask, Op0, Op1);
1972}
1973
1974static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1975 Value *Op1) {
1976 // If the mask is all ones just emit the first operation.
1977 if (const auto *C = dyn_cast<Constant>(Mask))
1978 if (C->isAllOnesValue())
1979 return Op0;
1980
1981 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1982 Mask->getType()->getIntegerBitWidth());
1983 Mask = Builder.CreateBitCast(Mask, MaskTy);
1984 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1985 return Builder.CreateSelect(Mask, Op0, Op1);
1986}
1987
1988// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1989// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1990// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1992 Value *Op1, Value *Shift,
1993 Value *Passthru, Value *Mask,
1994 bool IsVALIGN) {
1995 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1996
1997 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1998 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1999 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2000 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2001
2002 // Mask the immediate for VALIGN.
2003 if (IsVALIGN)
2004 ShiftVal &= (NumElts - 1);
2005
2006 // If palignr is shifting the pair of vectors more than the size of two
2007 // lanes, emit zero.
2008 if (ShiftVal >= 32)
2010
2011 // If palignr is shifting the pair of input vectors more than one lane,
2012 // but less than two lanes, convert to shifting in zeroes.
2013 if (ShiftVal > 16) {
2014 ShiftVal -= 16;
2015 Op1 = Op0;
2017 }
2018
2019 int Indices[64];
2020 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2021 for (unsigned l = 0; l < NumElts; l += 16) {
2022 for (unsigned i = 0; i != 16; ++i) {
2023 unsigned Idx = ShiftVal + i;
2024 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2025 Idx += NumElts - 16; // End of lane, switch operand.
2026 Indices[l + i] = Idx + l;
2027 }
2028 }
2029
2030 Value *Align = Builder.CreateShuffleVector(
2031 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2032
2033 return emitX86Select(Builder, Mask, Align, Passthru);
2034}
2035
2037 bool ZeroMask, bool IndexForm) {
2038 Type *Ty = CI.getType();
2039 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2040 unsigned EltWidth = Ty->getScalarSizeInBits();
2041 bool IsFloat = Ty->isFPOrFPVectorTy();
2042 Intrinsic::ID IID;
2043 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2044 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2045 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2046 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2047 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2048 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2049 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2050 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2051 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2052 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2053 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2054 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2055 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2056 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2057 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2058 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2059 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2060 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2061 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2062 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2063 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2064 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2065 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2066 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2067 else if (VecWidth == 128 && EltWidth == 16)
2068 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2069 else if (VecWidth == 256 && EltWidth == 16)
2070 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2071 else if (VecWidth == 512 && EltWidth == 16)
2072 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2073 else if (VecWidth == 128 && EltWidth == 8)
2074 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2075 else if (VecWidth == 256 && EltWidth == 8)
2076 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2077 else if (VecWidth == 512 && EltWidth == 8)
2078 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2079 else
2080 llvm_unreachable("Unexpected intrinsic");
2081
2082 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2083 CI.getArgOperand(2) };
2084
2085 // If this isn't index form we need to swap operand 0 and 1.
2086 if (!IndexForm)
2087 std::swap(Args[0], Args[1]);
2088
2089 Value *V = Builder.CreateIntrinsic(IID, Args);
2090 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2091 : Builder.CreateBitCast(CI.getArgOperand(1),
2092 Ty);
2093 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2094}
2095
2097 Intrinsic::ID IID) {
2098 Type *Ty = CI.getType();
2099 Value *Op0 = CI.getOperand(0);
2100 Value *Op1 = CI.getOperand(1);
2101 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2102
2103 if (CI.arg_size() == 4) { // For masked intrinsics.
2104 Value *VecSrc = CI.getOperand(2);
2105 Value *Mask = CI.getOperand(3);
2106 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2107 }
2108 return Res;
2109}
2110
2112 bool IsRotateRight) {
2113 Type *Ty = CI.getType();
2114 Value *Src = CI.getArgOperand(0);
2115 Value *Amt = CI.getArgOperand(1);
2116
2117 // Amount may be scalar immediate, in which case create a splat vector.
2118 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2119 // we only care about the lowest log2 bits anyway.
2120 if (Amt->getType() != Ty) {
2121 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2122 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2123 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2124 }
2125
2126 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2127 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2128
2129 if (CI.arg_size() == 4) { // For masked intrinsics.
2130 Value *VecSrc = CI.getOperand(2);
2131 Value *Mask = CI.getOperand(3);
2132 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2133 }
2134 return Res;
2135}
2136
2137static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2138 bool IsSigned) {
2139 Type *Ty = CI.getType();
2140 Value *LHS = CI.getArgOperand(0);
2141 Value *RHS = CI.getArgOperand(1);
2142
2143 CmpInst::Predicate Pred;
2144 switch (Imm) {
2145 case 0x0:
2146 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2147 break;
2148 case 0x1:
2149 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2150 break;
2151 case 0x2:
2152 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2153 break;
2154 case 0x3:
2155 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2156 break;
2157 case 0x4:
2158 Pred = ICmpInst::ICMP_EQ;
2159 break;
2160 case 0x5:
2161 Pred = ICmpInst::ICMP_NE;
2162 break;
2163 case 0x6:
2164 return Constant::getNullValue(Ty); // FALSE
2165 case 0x7:
2166 return Constant::getAllOnesValue(Ty); // TRUE
2167 default:
2168 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2169 }
2170
2171 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2172 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2173 return Ext;
2174}
2175
2177 bool IsShiftRight, bool ZeroMask) {
2178 Type *Ty = CI.getType();
2179 Value *Op0 = CI.getArgOperand(0);
2180 Value *Op1 = CI.getArgOperand(1);
2181 Value *Amt = CI.getArgOperand(2);
2182
2183 if (IsShiftRight)
2184 std::swap(Op0, Op1);
2185
2186 // Amount may be scalar immediate, in which case create a splat vector.
2187 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2188 // we only care about the lowest log2 bits anyway.
2189 if (Amt->getType() != Ty) {
2190 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2191 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2192 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2193 }
2194
2195 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2196 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2197
2198 unsigned NumArgs = CI.arg_size();
2199 if (NumArgs >= 4) { // For masked intrinsics.
2200 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2201 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2202 CI.getArgOperand(0);
2203 Value *Mask = CI.getOperand(NumArgs - 1);
2204 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2205 }
2206 return Res;
2207}
2208
2210 Value *Mask, bool Aligned) {
2211 const Align Alignment =
2212 Aligned
2213 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2214 : Align(1);
2215
2216 // If the mask is all ones just emit a regular store.
2217 if (const auto *C = dyn_cast<Constant>(Mask))
2218 if (C->isAllOnesValue())
2219 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2220
2221 // Convert the mask from an integer type to a vector of i1.
2222 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2223 Mask = getX86MaskVec(Builder, Mask, NumElts);
2224 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2225}
2226
2228 Value *Passthru, Value *Mask, bool Aligned) {
2229 Type *ValTy = Passthru->getType();
2230 const Align Alignment =
2231 Aligned
2232 ? Align(
2234 8)
2235 : Align(1);
2236
2237 // If the mask is all ones just emit a regular store.
2238 if (const auto *C = dyn_cast<Constant>(Mask))
2239 if (C->isAllOnesValue())
2240 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2241
2242 // Convert the mask from an integer type to a vector of i1.
2243 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2244 Mask = getX86MaskVec(Builder, Mask, NumElts);
2245 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2246}
2247
2248static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2249 Type *Ty = CI.getType();
2250 Value *Op0 = CI.getArgOperand(0);
2251 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2252 {Op0, Builder.getInt1(false)});
2253 if (CI.arg_size() == 3)
2254 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2255 return Res;
2256}
2257
2258static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2259 Type *Ty = CI.getType();
2260
2261 // Arguments have a vXi32 type so cast to vXi64.
2262 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2263 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2264
2265 if (IsSigned) {
2266 // Shift left then arithmetic shift right.
2267 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2268 LHS = Builder.CreateShl(LHS, ShiftAmt);
2269 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2270 RHS = Builder.CreateShl(RHS, ShiftAmt);
2271 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2272 } else {
2273 // Clear the upper bits.
2274 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2275 LHS = Builder.CreateAnd(LHS, Mask);
2276 RHS = Builder.CreateAnd(RHS, Mask);
2277 }
2278
2279 Value *Res = Builder.CreateMul(LHS, RHS);
2280
2281 if (CI.arg_size() == 4)
2282 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2283
2284 return Res;
2285}
2286
2287// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2289 Value *Mask) {
2290 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2291 if (Mask) {
2292 const auto *C = dyn_cast<Constant>(Mask);
2293 if (!C || !C->isAllOnesValue())
2294 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2295 }
2296
2297 if (NumElts < 8) {
2298 int Indices[8];
2299 for (unsigned i = 0; i != NumElts; ++i)
2300 Indices[i] = i;
2301 for (unsigned i = NumElts; i != 8; ++i)
2302 Indices[i] = NumElts + i % NumElts;
2303 Vec = Builder.CreateShuffleVector(Vec,
2305 Indices);
2306 }
2307 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2308}
2309
2311 unsigned CC, bool Signed) {
2312 Value *Op0 = CI.getArgOperand(0);
2313 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2314
2315 Value *Cmp;
2316 if (CC == 3) {
2318 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2319 } else if (CC == 7) {
2321 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2322 } else {
2324 switch (CC) {
2325 default: llvm_unreachable("Unknown condition code");
2326 case 0: Pred = ICmpInst::ICMP_EQ; break;
2327 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2328 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2329 case 4: Pred = ICmpInst::ICMP_NE; break;
2330 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2331 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2332 }
2333 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2334 }
2335
2336 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2337
2338 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2339}
2340
2341// Replace a masked intrinsic with an older unmasked intrinsic.
2343 Intrinsic::ID IID) {
2344 Value *Rep =
2345 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2346 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2347}
2348
2350 Value* A = CI.getArgOperand(0);
2351 Value* B = CI.getArgOperand(1);
2352 Value* Src = CI.getArgOperand(2);
2353 Value* Mask = CI.getArgOperand(3);
2354
2355 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2356 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2357 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2358 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2359 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2360 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2361}
2362
2364 Value* Op = CI.getArgOperand(0);
2365 Type* ReturnOp = CI.getType();
2366 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2367 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2368 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2369}
2370
2371// Replace intrinsic with unmasked version and a select.
2373 CallBase &CI, Value *&Rep) {
2374 Name = Name.substr(12); // Remove avx512.mask.
2375
2376 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2377 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2378 Intrinsic::ID IID;
2379 if (Name.starts_with("max.p")) {
2380 if (VecWidth == 128 && EltWidth == 32)
2381 IID = Intrinsic::x86_sse_max_ps;
2382 else if (VecWidth == 128 && EltWidth == 64)
2383 IID = Intrinsic::x86_sse2_max_pd;
2384 else if (VecWidth == 256 && EltWidth == 32)
2385 IID = Intrinsic::x86_avx_max_ps_256;
2386 else if (VecWidth == 256 && EltWidth == 64)
2387 IID = Intrinsic::x86_avx_max_pd_256;
2388 else
2389 llvm_unreachable("Unexpected intrinsic");
2390 } else if (Name.starts_with("min.p")) {
2391 if (VecWidth == 128 && EltWidth == 32)
2392 IID = Intrinsic::x86_sse_min_ps;
2393 else if (VecWidth == 128 && EltWidth == 64)
2394 IID = Intrinsic::x86_sse2_min_pd;
2395 else if (VecWidth == 256 && EltWidth == 32)
2396 IID = Intrinsic::x86_avx_min_ps_256;
2397 else if (VecWidth == 256 && EltWidth == 64)
2398 IID = Intrinsic::x86_avx_min_pd_256;
2399 else
2400 llvm_unreachable("Unexpected intrinsic");
2401 } else if (Name.starts_with("pshuf.b.")) {
2402 if (VecWidth == 128)
2403 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2404 else if (VecWidth == 256)
2405 IID = Intrinsic::x86_avx2_pshuf_b;
2406 else if (VecWidth == 512)
2407 IID = Intrinsic::x86_avx512_pshuf_b_512;
2408 else
2409 llvm_unreachable("Unexpected intrinsic");
2410 } else if (Name.starts_with("pmul.hr.sw.")) {
2411 if (VecWidth == 128)
2412 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2413 else if (VecWidth == 256)
2414 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2415 else if (VecWidth == 512)
2416 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2417 else
2418 llvm_unreachable("Unexpected intrinsic");
2419 } else if (Name.starts_with("pmulh.w.")) {
2420 if (VecWidth == 128)
2421 IID = Intrinsic::x86_sse2_pmulh_w;
2422 else if (VecWidth == 256)
2423 IID = Intrinsic::x86_avx2_pmulh_w;
2424 else if (VecWidth == 512)
2425 IID = Intrinsic::x86_avx512_pmulh_w_512;
2426 else
2427 llvm_unreachable("Unexpected intrinsic");
2428 } else if (Name.starts_with("pmulhu.w.")) {
2429 if (VecWidth == 128)
2430 IID = Intrinsic::x86_sse2_pmulhu_w;
2431 else if (VecWidth == 256)
2432 IID = Intrinsic::x86_avx2_pmulhu_w;
2433 else if (VecWidth == 512)
2434 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2435 else
2436 llvm_unreachable("Unexpected intrinsic");
2437 } else if (Name.starts_with("pmaddw.d.")) {
2438 if (VecWidth == 128)
2439 IID = Intrinsic::x86_sse2_pmadd_wd;
2440 else if (VecWidth == 256)
2441 IID = Intrinsic::x86_avx2_pmadd_wd;
2442 else if (VecWidth == 512)
2443 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2444 else
2445 llvm_unreachable("Unexpected intrinsic");
2446 } else if (Name.starts_with("pmaddubs.w.")) {
2447 if (VecWidth == 128)
2448 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2449 else if (VecWidth == 256)
2450 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2451 else if (VecWidth == 512)
2452 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2453 else
2454 llvm_unreachable("Unexpected intrinsic");
2455 } else if (Name.starts_with("packsswb.")) {
2456 if (VecWidth == 128)
2457 IID = Intrinsic::x86_sse2_packsswb_128;
2458 else if (VecWidth == 256)
2459 IID = Intrinsic::x86_avx2_packsswb;
2460 else if (VecWidth == 512)
2461 IID = Intrinsic::x86_avx512_packsswb_512;
2462 else
2463 llvm_unreachable("Unexpected intrinsic");
2464 } else if (Name.starts_with("packssdw.")) {
2465 if (VecWidth == 128)
2466 IID = Intrinsic::x86_sse2_packssdw_128;
2467 else if (VecWidth == 256)
2468 IID = Intrinsic::x86_avx2_packssdw;
2469 else if (VecWidth == 512)
2470 IID = Intrinsic::x86_avx512_packssdw_512;
2471 else
2472 llvm_unreachable("Unexpected intrinsic");
2473 } else if (Name.starts_with("packuswb.")) {
2474 if (VecWidth == 128)
2475 IID = Intrinsic::x86_sse2_packuswb_128;
2476 else if (VecWidth == 256)
2477 IID = Intrinsic::x86_avx2_packuswb;
2478 else if (VecWidth == 512)
2479 IID = Intrinsic::x86_avx512_packuswb_512;
2480 else
2481 llvm_unreachable("Unexpected intrinsic");
2482 } else if (Name.starts_with("packusdw.")) {
2483 if (VecWidth == 128)
2484 IID = Intrinsic::x86_sse41_packusdw;
2485 else if (VecWidth == 256)
2486 IID = Intrinsic::x86_avx2_packusdw;
2487 else if (VecWidth == 512)
2488 IID = Intrinsic::x86_avx512_packusdw_512;
2489 else
2490 llvm_unreachable("Unexpected intrinsic");
2491 } else if (Name.starts_with("vpermilvar.")) {
2492 if (VecWidth == 128 && EltWidth == 32)
2493 IID = Intrinsic::x86_avx_vpermilvar_ps;
2494 else if (VecWidth == 128 && EltWidth == 64)
2495 IID = Intrinsic::x86_avx_vpermilvar_pd;
2496 else if (VecWidth == 256 && EltWidth == 32)
2497 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2498 else if (VecWidth == 256 && EltWidth == 64)
2499 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2500 else if (VecWidth == 512 && EltWidth == 32)
2501 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2502 else if (VecWidth == 512 && EltWidth == 64)
2503 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2504 else
2505 llvm_unreachable("Unexpected intrinsic");
2506 } else if (Name == "cvtpd2dq.256") {
2507 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2508 } else if (Name == "cvtpd2ps.256") {
2509 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2510 } else if (Name == "cvttpd2dq.256") {
2511 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2512 } else if (Name == "cvttps2dq.128") {
2513 IID = Intrinsic::x86_sse2_cvttps2dq;
2514 } else if (Name == "cvttps2dq.256") {
2515 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2516 } else if (Name.starts_with("permvar.")) {
2517 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2518 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2519 IID = Intrinsic::x86_avx2_permps;
2520 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2521 IID = Intrinsic::x86_avx2_permd;
2522 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2523 IID = Intrinsic::x86_avx512_permvar_df_256;
2524 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2525 IID = Intrinsic::x86_avx512_permvar_di_256;
2526 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2527 IID = Intrinsic::x86_avx512_permvar_sf_512;
2528 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2529 IID = Intrinsic::x86_avx512_permvar_si_512;
2530 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2531 IID = Intrinsic::x86_avx512_permvar_df_512;
2532 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2533 IID = Intrinsic::x86_avx512_permvar_di_512;
2534 else if (VecWidth == 128 && EltWidth == 16)
2535 IID = Intrinsic::x86_avx512_permvar_hi_128;
2536 else if (VecWidth == 256 && EltWidth == 16)
2537 IID = Intrinsic::x86_avx512_permvar_hi_256;
2538 else if (VecWidth == 512 && EltWidth == 16)
2539 IID = Intrinsic::x86_avx512_permvar_hi_512;
2540 else if (VecWidth == 128 && EltWidth == 8)
2541 IID = Intrinsic::x86_avx512_permvar_qi_128;
2542 else if (VecWidth == 256 && EltWidth == 8)
2543 IID = Intrinsic::x86_avx512_permvar_qi_256;
2544 else if (VecWidth == 512 && EltWidth == 8)
2545 IID = Intrinsic::x86_avx512_permvar_qi_512;
2546 else
2547 llvm_unreachable("Unexpected intrinsic");
2548 } else if (Name.starts_with("dbpsadbw.")) {
2549 if (VecWidth == 128)
2550 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2551 else if (VecWidth == 256)
2552 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2553 else if (VecWidth == 512)
2554 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2555 else
2556 llvm_unreachable("Unexpected intrinsic");
2557 } else if (Name.starts_with("pmultishift.qb.")) {
2558 if (VecWidth == 128)
2559 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2560 else if (VecWidth == 256)
2561 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2562 else if (VecWidth == 512)
2563 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2564 else
2565 llvm_unreachable("Unexpected intrinsic");
2566 } else if (Name.starts_with("conflict.")) {
2567 if (Name[9] == 'd' && VecWidth == 128)
2568 IID = Intrinsic::x86_avx512_conflict_d_128;
2569 else if (Name[9] == 'd' && VecWidth == 256)
2570 IID = Intrinsic::x86_avx512_conflict_d_256;
2571 else if (Name[9] == 'd' && VecWidth == 512)
2572 IID = Intrinsic::x86_avx512_conflict_d_512;
2573 else if (Name[9] == 'q' && VecWidth == 128)
2574 IID = Intrinsic::x86_avx512_conflict_q_128;
2575 else if (Name[9] == 'q' && VecWidth == 256)
2576 IID = Intrinsic::x86_avx512_conflict_q_256;
2577 else if (Name[9] == 'q' && VecWidth == 512)
2578 IID = Intrinsic::x86_avx512_conflict_q_512;
2579 else
2580 llvm_unreachable("Unexpected intrinsic");
2581 } else if (Name.starts_with("pavg.")) {
2582 if (Name[5] == 'b' && VecWidth == 128)
2583 IID = Intrinsic::x86_sse2_pavg_b;
2584 else if (Name[5] == 'b' && VecWidth == 256)
2585 IID = Intrinsic::x86_avx2_pavg_b;
2586 else if (Name[5] == 'b' && VecWidth == 512)
2587 IID = Intrinsic::x86_avx512_pavg_b_512;
2588 else if (Name[5] == 'w' && VecWidth == 128)
2589 IID = Intrinsic::x86_sse2_pavg_w;
2590 else if (Name[5] == 'w' && VecWidth == 256)
2591 IID = Intrinsic::x86_avx2_pavg_w;
2592 else if (Name[5] == 'w' && VecWidth == 512)
2593 IID = Intrinsic::x86_avx512_pavg_w_512;
2594 else
2595 llvm_unreachable("Unexpected intrinsic");
2596 } else
2597 return false;
2598
2599 SmallVector<Value *, 4> Args(CI.args());
2600 Args.pop_back();
2601 Args.pop_back();
2602 Rep = Builder.CreateIntrinsic(IID, Args);
2603 unsigned NumArgs = CI.arg_size();
2604 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2605 CI.getArgOperand(NumArgs - 2));
2606 return true;
2607}
2608
2609/// Upgrade comment in call to inline asm that represents an objc retain release
2610/// marker.
2611void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2612 size_t Pos;
2613 if (AsmStr->find("mov\tfp") == 0 &&
2614 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2615 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2616 AsmStr->replace(Pos, 1, ";");
2617 }
2618}
2619
2621 Function *F, IRBuilder<> &Builder) {
2622 Value *Rep = nullptr;
2623
2624 if (Name == "abs.i" || Name == "abs.ll") {
2625 Value *Arg = CI->getArgOperand(0);
2626 Value *Neg = Builder.CreateNeg(Arg, "neg");
2627 Value *Cmp = Builder.CreateICmpSGE(
2628 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2629 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2630 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2631 Type *Ty = (Name == "abs.bf16")
2632 ? Builder.getBFloatTy()
2633 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2634 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2635 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2636 Rep = Builder.CreateBitCast(Abs, CI->getType());
2637 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2638 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2639 : Intrinsic::nvvm_fabs;
2640 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2641 } else if (Name.consume_front("ex2.approx.")) {
2642 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2643 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2644 : Intrinsic::nvvm_ex2_approx;
2645 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2646 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2647 Name.starts_with("atomic.load.add.f64.p")) {
2648 Value *Ptr = CI->getArgOperand(0);
2649 Value *Val = CI->getArgOperand(1);
2650 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2652 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2653 Name.starts_with("atomic.load.dec.32.p")) {
2654 Value *Ptr = CI->getArgOperand(0);
2655 Value *Val = CI->getArgOperand(1);
2656 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2658 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2660 } else if (Name.consume_front("max.") &&
2661 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2662 Name == "ui" || Name == "ull")) {
2663 Value *Arg0 = CI->getArgOperand(0);
2664 Value *Arg1 = CI->getArgOperand(1);
2665 Value *Cmp = Name.starts_with("u")
2666 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2667 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2668 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2669 } else if (Name.consume_front("min.") &&
2670 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2671 Name == "ui" || Name == "ull")) {
2672 Value *Arg0 = CI->getArgOperand(0);
2673 Value *Arg1 = CI->getArgOperand(1);
2674 Value *Cmp = Name.starts_with("u")
2675 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2676 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2677 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2678 } else if (Name == "clz.ll") {
2679 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2680 Value *Arg = CI->getArgOperand(0);
2681 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2682 {Arg, Builder.getFalse()},
2683 /*FMFSource=*/nullptr, "ctlz");
2684 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2685 } else if (Name == "popc.ll") {
2686 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2687 // i64.
2688 Value *Arg = CI->getArgOperand(0);
2689 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2690 Arg, /*FMFSource=*/nullptr, "ctpop");
2691 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2692 } else if (Name == "h2f") {
2693 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2694 {Builder.getFloatTy()}, CI->getArgOperand(0),
2695 /*FMFSource=*/nullptr, "h2f");
2696 } else if (Name.consume_front("bitcast.") &&
2697 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2698 Name == "d2ll")) {
2699 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2700 } else if (Name == "rotate.b32") {
2701 Value *Arg = CI->getOperand(0);
2702 Value *ShiftAmt = CI->getOperand(1);
2703 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2704 {Arg, Arg, ShiftAmt});
2705 } else if (Name == "rotate.b64") {
2706 Type *Int64Ty = Builder.getInt64Ty();
2707 Value *Arg = CI->getOperand(0);
2708 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2709 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2710 {Arg, Arg, ZExtShiftAmt});
2711 } else if (Name == "rotate.right.b64") {
2712 Type *Int64Ty = Builder.getInt64Ty();
2713 Value *Arg = CI->getOperand(0);
2714 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2715 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2716 {Arg, Arg, ZExtShiftAmt});
2717 } else if (Name == "swap.lo.hi.b64") {
2718 Type *Int64Ty = Builder.getInt64Ty();
2719 Value *Arg = CI->getOperand(0);
2720 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2721 {Arg, Arg, Builder.getInt64(32)});
2722 } else if ((Name.consume_front("ptr.gen.to.") &&
2723 consumeNVVMPtrAddrSpace(Name)) ||
2724 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2725 Name.starts_with(".to.gen"))) {
2726 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2727 } else if (Name.consume_front("ldg.global")) {
2728 Value *Ptr = CI->getArgOperand(0);
2729 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2730 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2731 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2732 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2733 MDNode *MD = MDNode::get(Builder.getContext(), {});
2734 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2735 return LD;
2736 } else if (Name == "tanh.approx.f32") {
2737 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2738 FastMathFlags FMF;
2739 FMF.setApproxFunc();
2740 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2741 FMF);
2742 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2743 Value *Arg =
2744 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2745 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2746 {}, {Arg});
2747 } else if (Name == "barrier") {
2748 Rep = Builder.CreateIntrinsic(
2749 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2750 {CI->getArgOperand(0), CI->getArgOperand(1)});
2751 } else if (Name == "barrier.sync") {
2752 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2753 {CI->getArgOperand(0)});
2754 } else if (Name == "barrier.sync.cnt") {
2755 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2756 {CI->getArgOperand(0), CI->getArgOperand(1)});
2757 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2758 Name == "barrier0.or") {
2759 Value *C = CI->getArgOperand(0);
2760 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2761
2762 Intrinsic::ID IID =
2764 .Case("barrier0.popc",
2765 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2766 .Case("barrier0.and",
2767 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2768 .Case("barrier0.or",
2769 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2770 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2771 Rep = Builder.CreateZExt(Bar, CI->getType());
2772 } else {
2774 if (IID != Intrinsic::not_intrinsic &&
2775 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2776 rename(F);
2777 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2779 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2780 Value *Arg = CI->getArgOperand(I);
2781 Type *OldType = Arg->getType();
2782 Type *NewType = NewFn->getArg(I)->getType();
2783 Args.push_back(
2784 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2785 ? Builder.CreateBitCast(Arg, NewType)
2786 : Arg);
2787 }
2788 Rep = Builder.CreateCall(NewFn, Args);
2789 if (F->getReturnType()->isIntegerTy())
2790 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2791 }
2792 }
2793
2794 return Rep;
2795}
2796
2798 IRBuilder<> &Builder) {
2799 LLVMContext &C = F->getContext();
2800 Value *Rep = nullptr;
2801
2802 if (Name.starts_with("sse4a.movnt.")) {
2804 Elts.push_back(
2805 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2806 MDNode *Node = MDNode::get(C, Elts);
2807
2808 Value *Arg0 = CI->getArgOperand(0);
2809 Value *Arg1 = CI->getArgOperand(1);
2810
2811 // Nontemporal (unaligned) store of the 0'th element of the float/double
2812 // vector.
2813 Value *Extract =
2814 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2815
2816 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2817 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2818 } else if (Name.starts_with("avx.movnt.") ||
2819 Name.starts_with("avx512.storent.")) {
2821 Elts.push_back(
2822 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2823 MDNode *Node = MDNode::get(C, Elts);
2824
2825 Value *Arg0 = CI->getArgOperand(0);
2826 Value *Arg1 = CI->getArgOperand(1);
2827
2828 StoreInst *SI = Builder.CreateAlignedStore(
2829 Arg1, Arg0,
2831 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2832 } else if (Name == "sse2.storel.dq") {
2833 Value *Arg0 = CI->getArgOperand(0);
2834 Value *Arg1 = CI->getArgOperand(1);
2835
2836 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2837 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2838 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2839 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2840 } else if (Name.starts_with("sse.storeu.") ||
2841 Name.starts_with("sse2.storeu.") ||
2842 Name.starts_with("avx.storeu.")) {
2843 Value *Arg0 = CI->getArgOperand(0);
2844 Value *Arg1 = CI->getArgOperand(1);
2845 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2846 } else if (Name == "avx512.mask.store.ss") {
2847 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2848 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2849 Mask, false);
2850 } else if (Name.starts_with("avx512.mask.store")) {
2851 // "avx512.mask.storeu." or "avx512.mask.store."
2852 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2853 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2854 CI->getArgOperand(2), Aligned);
2855 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2856 // Upgrade packed integer vector compare intrinsics to compare instructions.
2857 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2858 bool CmpEq = Name[9] == 'e';
2859 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2860 CI->getArgOperand(0), CI->getArgOperand(1));
2861 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2862 } else if (Name.starts_with("avx512.broadcastm")) {
2863 Type *ExtTy = Type::getInt32Ty(C);
2864 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2865 ExtTy = Type::getInt64Ty(C);
2866 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2867 ExtTy->getPrimitiveSizeInBits();
2868 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2869 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2870 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2871 Value *Vec = CI->getArgOperand(0);
2872 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2873 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2874 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2875 } else if (Name.starts_with("avx.sqrt.p") ||
2876 Name.starts_with("sse2.sqrt.p") ||
2877 Name.starts_with("sse.sqrt.p")) {
2878 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2879 {CI->getArgOperand(0)});
2880 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2881 if (CI->arg_size() == 4 &&
2882 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2883 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2884 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2885 : Intrinsic::x86_avx512_sqrt_pd_512;
2886
2887 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2888 Rep = Builder.CreateIntrinsic(IID, Args);
2889 } else {
2890 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2891 {CI->getArgOperand(0)});
2892 }
2893 Rep =
2894 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2895 } else if (Name.starts_with("avx512.ptestm") ||
2896 Name.starts_with("avx512.ptestnm")) {
2897 Value *Op0 = CI->getArgOperand(0);
2898 Value *Op1 = CI->getArgOperand(1);
2899 Value *Mask = CI->getArgOperand(2);
2900 Rep = Builder.CreateAnd(Op0, Op1);
2901 llvm::Type *Ty = Op0->getType();
2903 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2906 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2907 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2908 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2909 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2910 ->getNumElements();
2911 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2912 Rep =
2913 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2914 } else if (Name.starts_with("avx512.kunpck")) {
2915 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2916 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2917 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2918 int Indices[64];
2919 for (unsigned i = 0; i != NumElts; ++i)
2920 Indices[i] = i;
2921
2922 // First extract half of each vector. This gives better codegen than
2923 // doing it in a single shuffle.
2924 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2925 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2926 // Concat the vectors.
2927 // NOTE: Operands have to be swapped to match intrinsic definition.
2928 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2929 Rep = Builder.CreateBitCast(Rep, CI->getType());
2930 } else if (Name == "avx512.kand.w") {
2931 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2932 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2933 Rep = Builder.CreateAnd(LHS, RHS);
2934 Rep = Builder.CreateBitCast(Rep, CI->getType());
2935 } else if (Name == "avx512.kandn.w") {
2936 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2937 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2938 LHS = Builder.CreateNot(LHS);
2939 Rep = Builder.CreateAnd(LHS, RHS);
2940 Rep = Builder.CreateBitCast(Rep, CI->getType());
2941 } else if (Name == "avx512.kor.w") {
2942 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2943 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2944 Rep = Builder.CreateOr(LHS, RHS);
2945 Rep = Builder.CreateBitCast(Rep, CI->getType());
2946 } else if (Name == "avx512.kxor.w") {
2947 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2948 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2949 Rep = Builder.CreateXor(LHS, RHS);
2950 Rep = Builder.CreateBitCast(Rep, CI->getType());
2951 } else if (Name == "avx512.kxnor.w") {
2952 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2953 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2954 LHS = Builder.CreateNot(LHS);
2955 Rep = Builder.CreateXor(LHS, RHS);
2956 Rep = Builder.CreateBitCast(Rep, CI->getType());
2957 } else if (Name == "avx512.knot.w") {
2958 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2959 Rep = Builder.CreateNot(Rep);
2960 Rep = Builder.CreateBitCast(Rep, CI->getType());
2961 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2962 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2963 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2964 Rep = Builder.CreateOr(LHS, RHS);
2965 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2966 Value *C;
2967 if (Name[14] == 'c')
2968 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2969 else
2970 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2971 Rep = Builder.CreateICmpEQ(Rep, C);
2972 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2973 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2974 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2975 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2976 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2977 Type *I32Ty = Type::getInt32Ty(C);
2978 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2979 ConstantInt::get(I32Ty, 0));
2980 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2981 ConstantInt::get(I32Ty, 0));
2982 Value *EltOp;
2983 if (Name.contains(".add."))
2984 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2985 else if (Name.contains(".sub."))
2986 EltOp = Builder.CreateFSub(Elt0, Elt1);
2987 else if (Name.contains(".mul."))
2988 EltOp = Builder.CreateFMul(Elt0, Elt1);
2989 else
2990 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2991 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2992 ConstantInt::get(I32Ty, 0));
2993 } else if (Name.starts_with("avx512.mask.pcmp")) {
2994 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2995 bool CmpEq = Name[16] == 'e';
2996 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2997 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2998 Type *OpTy = CI->getArgOperand(0)->getType();
2999 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3000 Intrinsic::ID IID;
3001 switch (VecWidth) {
3002 default:
3003 llvm_unreachable("Unexpected intrinsic");
3004 case 128:
3005 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3006 break;
3007 case 256:
3008 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3009 break;
3010 case 512:
3011 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3012 break;
3013 }
3014
3015 Rep =
3016 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3017 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3018 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3019 Type *OpTy = CI->getArgOperand(0)->getType();
3020 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3021 unsigned EltWidth = OpTy->getScalarSizeInBits();
3022 Intrinsic::ID IID;
3023 if (VecWidth == 128 && EltWidth == 32)
3024 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3025 else if (VecWidth == 256 && EltWidth == 32)
3026 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3027 else if (VecWidth == 512 && EltWidth == 32)
3028 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3029 else if (VecWidth == 128 && EltWidth == 64)
3030 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3031 else if (VecWidth == 256 && EltWidth == 64)
3032 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3033 else if (VecWidth == 512 && EltWidth == 64)
3034 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3035 else
3036 llvm_unreachable("Unexpected intrinsic");
3037
3038 Rep =
3039 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3040 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3041 } else if (Name.starts_with("avx512.cmp.p")) {
3042 SmallVector<Value *, 4> Args(CI->args());
3043 Type *OpTy = Args[0]->getType();
3044 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3045 unsigned EltWidth = OpTy->getScalarSizeInBits();
3046 Intrinsic::ID IID;
3047 if (VecWidth == 128 && EltWidth == 32)
3048 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3049 else if (VecWidth == 256 && EltWidth == 32)
3050 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3051 else if (VecWidth == 512 && EltWidth == 32)
3052 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3053 else if (VecWidth == 128 && EltWidth == 64)
3054 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3055 else if (VecWidth == 256 && EltWidth == 64)
3056 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3057 else if (VecWidth == 512 && EltWidth == 64)
3058 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3059 else
3060 llvm_unreachable("Unexpected intrinsic");
3061
3063 if (VecWidth == 512)
3064 std::swap(Mask, Args.back());
3065 Args.push_back(Mask);
3066
3067 Rep = Builder.CreateIntrinsic(IID, Args);
3068 } else if (Name.starts_with("avx512.mask.cmp.")) {
3069 // Integer compare intrinsics.
3070 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3071 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3072 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3073 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3074 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3075 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3076 Name.starts_with("avx512.cvtw2mask.") ||
3077 Name.starts_with("avx512.cvtd2mask.") ||
3078 Name.starts_with("avx512.cvtq2mask.")) {
3079 Value *Op = CI->getArgOperand(0);
3080 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3081 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3082 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3083 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3084 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3085 Name.starts_with("avx512.mask.pabs")) {
3086 Rep = upgradeAbs(Builder, *CI);
3087 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3088 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3089 Name.starts_with("avx512.mask.pmaxs")) {
3090 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3091 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3092 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3093 Name.starts_with("avx512.mask.pmaxu")) {
3094 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3095 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3096 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3097 Name.starts_with("avx512.mask.pmins")) {
3098 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3099 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3100 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3101 Name.starts_with("avx512.mask.pminu")) {
3102 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3103 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3104 Name == "avx512.pmulu.dq.512" ||
3105 Name.starts_with("avx512.mask.pmulu.dq.")) {
3106 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3107 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3108 Name == "avx512.pmul.dq.512" ||
3109 Name.starts_with("avx512.mask.pmul.dq.")) {
3110 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3111 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3112 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3113 Rep =
3114 Builder.CreateSIToFP(CI->getArgOperand(1),
3115 cast<VectorType>(CI->getType())->getElementType());
3116 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3117 } else if (Name == "avx512.cvtusi2sd") {
3118 Rep =
3119 Builder.CreateUIToFP(CI->getArgOperand(1),
3120 cast<VectorType>(CI->getType())->getElementType());
3121 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3122 } else if (Name == "sse2.cvtss2sd") {
3123 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3124 Rep = Builder.CreateFPExt(
3125 Rep, cast<VectorType>(CI->getType())->getElementType());
3126 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3127 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3128 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3129 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3130 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3131 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3132 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3133 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3134 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3135 Name == "avx512.mask.cvtqq2ps.256" ||
3136 Name == "avx512.mask.cvtqq2ps.512" ||
3137 Name == "avx512.mask.cvtuqq2ps.256" ||
3138 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3139 Name == "avx.cvt.ps2.pd.256" ||
3140 Name == "avx512.mask.cvtps2pd.128" ||
3141 Name == "avx512.mask.cvtps2pd.256") {
3142 auto *DstTy = cast<FixedVectorType>(CI->getType());
3143 Rep = CI->getArgOperand(0);
3144 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3145
3146 unsigned NumDstElts = DstTy->getNumElements();
3147 if (NumDstElts < SrcTy->getNumElements()) {
3148 assert(NumDstElts == 2 && "Unexpected vector size");
3149 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3150 }
3151
3152 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3153 bool IsUnsigned = Name.contains("cvtu");
3154 if (IsPS2PD)
3155 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3156 else if (CI->arg_size() == 4 &&
3157 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3158 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3159 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3160 : Intrinsic::x86_avx512_sitofp_round;
3161 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3162 {Rep, CI->getArgOperand(3)});
3163 } else {
3164 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3165 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3166 }
3167
3168 if (CI->arg_size() >= 3)
3169 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3170 CI->getArgOperand(1));
3171 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3172 Name.starts_with("vcvtph2ps.")) {
3173 auto *DstTy = cast<FixedVectorType>(CI->getType());
3174 Rep = CI->getArgOperand(0);
3175 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3176 unsigned NumDstElts = DstTy->getNumElements();
3177 if (NumDstElts != SrcTy->getNumElements()) {
3178 assert(NumDstElts == 4 && "Unexpected vector size");
3179 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3180 }
3181 Rep = Builder.CreateBitCast(
3182 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3183 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3184 if (CI->arg_size() >= 3)
3185 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3186 CI->getArgOperand(1));
3187 } else if (Name.starts_with("avx512.mask.load")) {
3188 // "avx512.mask.loadu." or "avx512.mask.load."
3189 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3190 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3191 CI->getArgOperand(2), Aligned);
3192 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3193 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3194 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3195 ResultTy->getNumElements());
3196
3197 Rep = Builder.CreateIntrinsic(
3198 Intrinsic::masked_expandload, ResultTy,
3199 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3200 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3201 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3202 Value *MaskVec =
3203 getX86MaskVec(Builder, CI->getArgOperand(2),
3204 cast<FixedVectorType>(ResultTy)->getNumElements());
3205
3206 Rep = Builder.CreateIntrinsic(
3207 Intrinsic::masked_compressstore, ResultTy,
3208 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3209 } else if (Name.starts_with("avx512.mask.compress.") ||
3210 Name.starts_with("avx512.mask.expand.")) {
3211 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3212
3213 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3214 ResultTy->getNumElements());
3215
3216 bool IsCompress = Name[12] == 'c';
3217 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3218 : Intrinsic::x86_avx512_mask_expand;
3219 Rep = Builder.CreateIntrinsic(
3220 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3221 } else if (Name.starts_with("xop.vpcom")) {
3222 bool IsSigned;
3223 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3224 Name.ends_with("uq"))
3225 IsSigned = false;
3226 else if (Name.ends_with("b") || Name.ends_with("w") ||
3227 Name.ends_with("d") || Name.ends_with("q"))
3228 IsSigned = true;
3229 else
3230 llvm_unreachable("Unknown suffix");
3231
3232 unsigned Imm;
3233 if (CI->arg_size() == 3) {
3234 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3235 } else {
3236 Name = Name.substr(9); // strip off "xop.vpcom"
3237 if (Name.starts_with("lt"))
3238 Imm = 0;
3239 else if (Name.starts_with("le"))
3240 Imm = 1;
3241 else if (Name.starts_with("gt"))
3242 Imm = 2;
3243 else if (Name.starts_with("ge"))
3244 Imm = 3;
3245 else if (Name.starts_with("eq"))
3246 Imm = 4;
3247 else if (Name.starts_with("ne"))
3248 Imm = 5;
3249 else if (Name.starts_with("false"))
3250 Imm = 6;
3251 else if (Name.starts_with("true"))
3252 Imm = 7;
3253 else
3254 llvm_unreachable("Unknown condition");
3255 }
3256
3257 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3258 } else if (Name.starts_with("xop.vpcmov")) {
3259 Value *Sel = CI->getArgOperand(2);
3260 Value *NotSel = Builder.CreateNot(Sel);
3261 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3262 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3263 Rep = Builder.CreateOr(Sel0, Sel1);
3264 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3265 Name.starts_with("avx512.mask.prol")) {
3266 Rep = upgradeX86Rotate(Builder, *CI, false);
3267 } else if (Name.starts_with("avx512.pror") ||
3268 Name.starts_with("avx512.mask.pror")) {
3269 Rep = upgradeX86Rotate(Builder, *CI, true);
3270 } else if (Name.starts_with("avx512.vpshld.") ||
3271 Name.starts_with("avx512.mask.vpshld") ||
3272 Name.starts_with("avx512.maskz.vpshld")) {
3273 bool ZeroMask = Name[11] == 'z';
3274 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3275 } else if (Name.starts_with("avx512.vpshrd.") ||
3276 Name.starts_with("avx512.mask.vpshrd") ||
3277 Name.starts_with("avx512.maskz.vpshrd")) {
3278 bool ZeroMask = Name[11] == 'z';
3279 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3280 } else if (Name == "sse42.crc32.64.8") {
3281 Value *Trunc0 =
3282 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3283 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3284 {Trunc0, CI->getArgOperand(1)});
3285 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3286 } else if (Name.starts_with("avx.vbroadcast.s") ||
3287 Name.starts_with("avx512.vbroadcast.s")) {
3288 // Replace broadcasts with a series of insertelements.
3289 auto *VecTy = cast<FixedVectorType>(CI->getType());
3290 Type *EltTy = VecTy->getElementType();
3291 unsigned EltNum = VecTy->getNumElements();
3292 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3293 Type *I32Ty = Type::getInt32Ty(C);
3294 Rep = PoisonValue::get(VecTy);
3295 for (unsigned I = 0; I < EltNum; ++I)
3296 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3297 } else if (Name.starts_with("sse41.pmovsx") ||
3298 Name.starts_with("sse41.pmovzx") ||
3299 Name.starts_with("avx2.pmovsx") ||
3300 Name.starts_with("avx2.pmovzx") ||
3301 Name.starts_with("avx512.mask.pmovsx") ||
3302 Name.starts_with("avx512.mask.pmovzx")) {
3303 auto *DstTy = cast<FixedVectorType>(CI->getType());
3304 unsigned NumDstElts = DstTy->getNumElements();
3305
3306 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3307 SmallVector<int, 8> ShuffleMask(NumDstElts);
3308 for (unsigned i = 0; i != NumDstElts; ++i)
3309 ShuffleMask[i] = i;
3310
3311 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3312
3313 bool DoSext = Name.contains("pmovsx");
3314 Rep =
3315 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3316 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3317 if (CI->arg_size() == 3)
3318 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3319 CI->getArgOperand(1));
3320 } else if (Name == "avx512.mask.pmov.qd.256" ||
3321 Name == "avx512.mask.pmov.qd.512" ||
3322 Name == "avx512.mask.pmov.wb.256" ||
3323 Name == "avx512.mask.pmov.wb.512") {
3324 Type *Ty = CI->getArgOperand(1)->getType();
3325 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3326 Rep =
3327 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3328 } else if (Name.starts_with("avx.vbroadcastf128") ||
3329 Name == "avx2.vbroadcasti128") {
3330 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3331 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3332 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3333 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3334 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3335 if (NumSrcElts == 2)
3336 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3337 else
3338 Rep = Builder.CreateShuffleVector(Load,
3339 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3340 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3341 Name.starts_with("avx512.mask.shuf.f")) {
3342 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3343 Type *VT = CI->getType();
3344 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3345 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3346 unsigned ControlBitsMask = NumLanes - 1;
3347 unsigned NumControlBits = NumLanes / 2;
3348 SmallVector<int, 8> ShuffleMask(0);
3349
3350 for (unsigned l = 0; l != NumLanes; ++l) {
3351 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3352 // We actually need the other source.
3353 if (l >= NumLanes / 2)
3354 LaneMask += NumLanes;
3355 for (unsigned i = 0; i != NumElementsInLane; ++i)
3356 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3357 }
3358 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3359 CI->getArgOperand(1), ShuffleMask);
3360 Rep =
3361 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3362 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3363 Name.starts_with("avx512.mask.broadcasti")) {
3364 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3365 ->getNumElements();
3366 unsigned NumDstElts =
3367 cast<FixedVectorType>(CI->getType())->getNumElements();
3368
3369 SmallVector<int, 8> ShuffleMask(NumDstElts);
3370 for (unsigned i = 0; i != NumDstElts; ++i)
3371 ShuffleMask[i] = i % NumSrcElts;
3372
3373 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3374 CI->getArgOperand(0), ShuffleMask);
3375 Rep =
3376 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3377 } else if (Name.starts_with("avx2.pbroadcast") ||
3378 Name.starts_with("avx2.vbroadcast") ||
3379 Name.starts_with("avx512.pbroadcast") ||
3380 Name.starts_with("avx512.mask.broadcast.s")) {
3381 // Replace vp?broadcasts with a vector shuffle.
3382 Value *Op = CI->getArgOperand(0);
3383 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3384 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3387 Rep = Builder.CreateShuffleVector(Op, M);
3388
3389 if (CI->arg_size() == 3)
3390 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3391 CI->getArgOperand(1));
3392 } else if (Name.starts_with("sse2.padds.") ||
3393 Name.starts_with("avx2.padds.") ||
3394 Name.starts_with("avx512.padds.") ||
3395 Name.starts_with("avx512.mask.padds.")) {
3396 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3397 } else if (Name.starts_with("sse2.psubs.") ||
3398 Name.starts_with("avx2.psubs.") ||
3399 Name.starts_with("avx512.psubs.") ||
3400 Name.starts_with("avx512.mask.psubs.")) {
3401 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3402 } else if (Name.starts_with("sse2.paddus.") ||
3403 Name.starts_with("avx2.paddus.") ||
3404 Name.starts_with("avx512.mask.paddus.")) {
3405 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3406 } else if (Name.starts_with("sse2.psubus.") ||
3407 Name.starts_with("avx2.psubus.") ||
3408 Name.starts_with("avx512.mask.psubus.")) {
3409 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3410 } else if (Name.starts_with("avx512.mask.palignr.")) {
3411 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3412 CI->getArgOperand(1), CI->getArgOperand(2),
3413 CI->getArgOperand(3), CI->getArgOperand(4),
3414 false);
3415 } else if (Name.starts_with("avx512.mask.valign.")) {
3417 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3418 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3419 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3420 // 128/256-bit shift left specified in bits.
3421 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3422 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3423 Shift / 8); // Shift is in bits.
3424 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3425 // 128/256-bit shift right specified in bits.
3426 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3427 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3428 Shift / 8); // Shift is in bits.
3429 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3430 Name == "avx512.psll.dq.512") {
3431 // 128/256/512-bit shift left specified in bytes.
3432 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3433 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3434 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3435 Name == "avx512.psrl.dq.512") {
3436 // 128/256/512-bit shift right specified in bytes.
3437 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3438 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3439 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3440 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3441 Name.starts_with("avx2.pblendd.")) {
3442 Value *Op0 = CI->getArgOperand(0);
3443 Value *Op1 = CI->getArgOperand(1);
3444 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3445 auto *VecTy = cast<FixedVectorType>(CI->getType());
3446 unsigned NumElts = VecTy->getNumElements();
3447
3448 SmallVector<int, 16> Idxs(NumElts);
3449 for (unsigned i = 0; i != NumElts; ++i)
3450 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3451
3452 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3453 } else if (Name.starts_with("avx.vinsertf128.") ||
3454 Name == "avx2.vinserti128" ||
3455 Name.starts_with("avx512.mask.insert")) {
3456 Value *Op0 = CI->getArgOperand(0);
3457 Value *Op1 = CI->getArgOperand(1);
3458 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3459 unsigned DstNumElts =
3460 cast<FixedVectorType>(CI->getType())->getNumElements();
3461 unsigned SrcNumElts =
3462 cast<FixedVectorType>(Op1->getType())->getNumElements();
3463 unsigned Scale = DstNumElts / SrcNumElts;
3464
3465 // Mask off the high bits of the immediate value; hardware ignores those.
3466 Imm = Imm % Scale;
3467
3468 // Extend the second operand into a vector the size of the destination.
3469 SmallVector<int, 8> Idxs(DstNumElts);
3470 for (unsigned i = 0; i != SrcNumElts; ++i)
3471 Idxs[i] = i;
3472 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3473 Idxs[i] = SrcNumElts;
3474 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3475
3476 // Insert the second operand into the first operand.
3477
3478 // Note that there is no guarantee that instruction lowering will actually
3479 // produce a vinsertf128 instruction for the created shuffles. In
3480 // particular, the 0 immediate case involves no lane changes, so it can
3481 // be handled as a blend.
3482
3483 // Example of shuffle mask for 32-bit elements:
3484 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3485 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3486
3487 // First fill with identify mask.
3488 for (unsigned i = 0; i != DstNumElts; ++i)
3489 Idxs[i] = i;
3490 // Then replace the elements where we need to insert.
3491 for (unsigned i = 0; i != SrcNumElts; ++i)
3492 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3493 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3494
3495 // If the intrinsic has a mask operand, handle that.
3496 if (CI->arg_size() == 5)
3497 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3498 CI->getArgOperand(3));
3499 } else if (Name.starts_with("avx.vextractf128.") ||
3500 Name == "avx2.vextracti128" ||
3501 Name.starts_with("avx512.mask.vextract")) {
3502 Value *Op0 = CI->getArgOperand(0);
3503 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3504 unsigned DstNumElts =
3505 cast<FixedVectorType>(CI->getType())->getNumElements();
3506 unsigned SrcNumElts =
3507 cast<FixedVectorType>(Op0->getType())->getNumElements();
3508 unsigned Scale = SrcNumElts / DstNumElts;
3509
3510 // Mask off the high bits of the immediate value; hardware ignores those.
3511 Imm = Imm % Scale;
3512
3513 // Get indexes for the subvector of the input vector.
3514 SmallVector<int, 8> Idxs(DstNumElts);
3515 for (unsigned i = 0; i != DstNumElts; ++i) {
3516 Idxs[i] = i + (Imm * DstNumElts);
3517 }
3518 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3519
3520 // If the intrinsic has a mask operand, handle that.
3521 if (CI->arg_size() == 4)
3522 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3523 CI->getArgOperand(2));
3524 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3525 Name.starts_with("avx512.mask.perm.di.")) {
3526 Value *Op0 = CI->getArgOperand(0);
3527 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3528 auto *VecTy = cast<FixedVectorType>(CI->getType());
3529 unsigned NumElts = VecTy->getNumElements();
3530
3531 SmallVector<int, 8> Idxs(NumElts);
3532 for (unsigned i = 0; i != NumElts; ++i)
3533 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3534
3535 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3536
3537 if (CI->arg_size() == 4)
3538 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3539 CI->getArgOperand(2));
3540 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3541 // The immediate permute control byte looks like this:
3542 // [1:0] - select 128 bits from sources for low half of destination
3543 // [2] - ignore
3544 // [3] - zero low half of destination
3545 // [5:4] - select 128 bits from sources for high half of destination
3546 // [6] - ignore
3547 // [7] - zero high half of destination
3548
3549 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3550
3551 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3552 unsigned HalfSize = NumElts / 2;
3553 SmallVector<int, 8> ShuffleMask(NumElts);
3554
3555 // Determine which operand(s) are actually in use for this instruction.
3556 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3557 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3558
3559 // If needed, replace operands based on zero mask.
3560 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3561 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3562
3563 // Permute low half of result.
3564 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3565 for (unsigned i = 0; i < HalfSize; ++i)
3566 ShuffleMask[i] = StartIndex + i;
3567
3568 // Permute high half of result.
3569 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3570 for (unsigned i = 0; i < HalfSize; ++i)
3571 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3572
3573 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3574
3575 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3576 Name.starts_with("avx512.mask.vpermil.p") ||
3577 Name.starts_with("avx512.mask.pshuf.d.")) {
3578 Value *Op0 = CI->getArgOperand(0);
3579 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3580 auto *VecTy = cast<FixedVectorType>(CI->getType());
3581 unsigned NumElts = VecTy->getNumElements();
3582 // Calculate the size of each index in the immediate.
3583 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3584 unsigned IdxMask = ((1 << IdxSize) - 1);
3585
3586 SmallVector<int, 8> Idxs(NumElts);
3587 // Lookup the bits for this element, wrapping around the immediate every
3588 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3589 // to offset by the first index of each group.
3590 for (unsigned i = 0; i != NumElts; ++i)
3591 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3592
3593 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3594
3595 if (CI->arg_size() == 4)
3596 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3597 CI->getArgOperand(2));
3598 } else if (Name == "sse2.pshufl.w" ||
3599 Name.starts_with("avx512.mask.pshufl.w.")) {
3600 Value *Op0 = CI->getArgOperand(0);
3601 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3602 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3603
3604 SmallVector<int, 16> Idxs(NumElts);
3605 for (unsigned l = 0; l != NumElts; l += 8) {
3606 for (unsigned i = 0; i != 4; ++i)
3607 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3608 for (unsigned i = 4; i != 8; ++i)
3609 Idxs[i + l] = i + l;
3610 }
3611
3612 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3613
3614 if (CI->arg_size() == 4)
3615 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3616 CI->getArgOperand(2));
3617 } else if (Name == "sse2.pshufh.w" ||
3618 Name.starts_with("avx512.mask.pshufh.w.")) {
3619 Value *Op0 = CI->getArgOperand(0);
3620 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3621 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3622
3623 SmallVector<int, 16> Idxs(NumElts);
3624 for (unsigned l = 0; l != NumElts; l += 8) {
3625 for (unsigned i = 0; i != 4; ++i)
3626 Idxs[i + l] = i + l;
3627 for (unsigned i = 0; i != 4; ++i)
3628 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3629 }
3630
3631 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3632
3633 if (CI->arg_size() == 4)
3634 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3635 CI->getArgOperand(2));
3636 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3637 Value *Op0 = CI->getArgOperand(0);
3638 Value *Op1 = CI->getArgOperand(1);
3639 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3640 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3641
3642 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3643 unsigned HalfLaneElts = NumLaneElts / 2;
3644
3645 SmallVector<int, 16> Idxs(NumElts);
3646 for (unsigned i = 0; i != NumElts; ++i) {
3647 // Base index is the starting element of the lane.
3648 Idxs[i] = i - (i % NumLaneElts);
3649 // If we are half way through the lane switch to the other source.
3650 if ((i % NumLaneElts) >= HalfLaneElts)
3651 Idxs[i] += NumElts;
3652 // Now select the specific element. By adding HalfLaneElts bits from
3653 // the immediate. Wrapping around the immediate every 8-bits.
3654 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3655 }
3656
3657 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3658
3659 Rep =
3660 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3661 } else if (Name.starts_with("avx512.mask.movddup") ||
3662 Name.starts_with("avx512.mask.movshdup") ||
3663 Name.starts_with("avx512.mask.movsldup")) {
3664 Value *Op0 = CI->getArgOperand(0);
3665 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3666 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3667
3668 unsigned Offset = 0;
3669 if (Name.starts_with("avx512.mask.movshdup."))
3670 Offset = 1;
3671
3672 SmallVector<int, 16> Idxs(NumElts);
3673 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3674 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3675 Idxs[i + l + 0] = i + l + Offset;
3676 Idxs[i + l + 1] = i + l + Offset;
3677 }
3678
3679 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3680
3681 Rep =
3682 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3683 } else if (Name.starts_with("avx512.mask.punpckl") ||
3684 Name.starts_with("avx512.mask.unpckl.")) {
3685 Value *Op0 = CI->getArgOperand(0);
3686 Value *Op1 = CI->getArgOperand(1);
3687 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3688 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3689
3690 SmallVector<int, 64> Idxs(NumElts);
3691 for (int l = 0; l != NumElts; l += NumLaneElts)
3692 for (int i = 0; i != NumLaneElts; ++i)
3693 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3694
3695 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3696
3697 Rep =
3698 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3699 } else if (Name.starts_with("avx512.mask.punpckh") ||
3700 Name.starts_with("avx512.mask.unpckh.")) {
3701 Value *Op0 = CI->getArgOperand(0);
3702 Value *Op1 = CI->getArgOperand(1);
3703 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3704 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3705
3706 SmallVector<int, 64> Idxs(NumElts);
3707 for (int l = 0; l != NumElts; l += NumLaneElts)
3708 for (int i = 0; i != NumLaneElts; ++i)
3709 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3710
3711 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3712
3713 Rep =
3714 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3715 } else if (Name.starts_with("avx512.mask.and.") ||
3716 Name.starts_with("avx512.mask.pand.")) {
3717 VectorType *FTy = cast<VectorType>(CI->getType());
3719 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3720 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3721 Rep = Builder.CreateBitCast(Rep, FTy);
3722 Rep =
3723 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3724 } else if (Name.starts_with("avx512.mask.andn.") ||
3725 Name.starts_with("avx512.mask.pandn.")) {
3726 VectorType *FTy = cast<VectorType>(CI->getType());
3728 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3729 Rep = Builder.CreateAnd(Rep,
3730 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3731 Rep = Builder.CreateBitCast(Rep, FTy);
3732 Rep =
3733 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3734 } else if (Name.starts_with("avx512.mask.or.") ||
3735 Name.starts_with("avx512.mask.por.")) {
3736 VectorType *FTy = cast<VectorType>(CI->getType());
3738 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3739 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3740 Rep = Builder.CreateBitCast(Rep, FTy);
3741 Rep =
3742 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3743 } else if (Name.starts_with("avx512.mask.xor.") ||
3744 Name.starts_with("avx512.mask.pxor.")) {
3745 VectorType *FTy = cast<VectorType>(CI->getType());
3747 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3748 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3749 Rep = Builder.CreateBitCast(Rep, FTy);
3750 Rep =
3751 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3752 } else if (Name.starts_with("avx512.mask.padd.")) {
3753 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3754 Rep =
3755 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3756 } else if (Name.starts_with("avx512.mask.psub.")) {
3757 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3758 Rep =
3759 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3760 } else if (Name.starts_with("avx512.mask.pmull.")) {
3761 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3762 Rep =
3763 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3764 } else if (Name.starts_with("avx512.mask.add.p")) {
3765 if (Name.ends_with(".512")) {
3766 Intrinsic::ID IID;
3767 if (Name[17] == 's')
3768 IID = Intrinsic::x86_avx512_add_ps_512;
3769 else
3770 IID = Intrinsic::x86_avx512_add_pd_512;
3771
3772 Rep = Builder.CreateIntrinsic(
3773 IID,
3774 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3775 } else {
3776 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3777 }
3778 Rep =
3779 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3780 } else if (Name.starts_with("avx512.mask.div.p")) {
3781 if (Name.ends_with(".512")) {
3782 Intrinsic::ID IID;
3783 if (Name[17] == 's')
3784 IID = Intrinsic::x86_avx512_div_ps_512;
3785 else
3786 IID = Intrinsic::x86_avx512_div_pd_512;
3787
3788 Rep = Builder.CreateIntrinsic(
3789 IID,
3790 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3791 } else {
3792 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3793 }
3794 Rep =
3795 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3796 } else if (Name.starts_with("avx512.mask.mul.p")) {
3797 if (Name.ends_with(".512")) {
3798 Intrinsic::ID IID;
3799 if (Name[17] == 's')
3800 IID = Intrinsic::x86_avx512_mul_ps_512;
3801 else
3802 IID = Intrinsic::x86_avx512_mul_pd_512;
3803
3804 Rep = Builder.CreateIntrinsic(
3805 IID,
3806 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3807 } else {
3808 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3809 }
3810 Rep =
3811 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3812 } else if (Name.starts_with("avx512.mask.sub.p")) {
3813 if (Name.ends_with(".512")) {
3814 Intrinsic::ID IID;
3815 if (Name[17] == 's')
3816 IID = Intrinsic::x86_avx512_sub_ps_512;
3817 else
3818 IID = Intrinsic::x86_avx512_sub_pd_512;
3819
3820 Rep = Builder.CreateIntrinsic(
3821 IID,
3822 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3823 } else {
3824 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3825 }
3826 Rep =
3827 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3828 } else if ((Name.starts_with("avx512.mask.max.p") ||
3829 Name.starts_with("avx512.mask.min.p")) &&
3830 Name.drop_front(18) == ".512") {
3831 bool IsDouble = Name[17] == 'd';
3832 bool IsMin = Name[13] == 'i';
3833 static const Intrinsic::ID MinMaxTbl[2][2] = {
3834 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3835 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3836 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3837
3838 Rep = Builder.CreateIntrinsic(
3839 IID,
3840 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3841 Rep =
3842 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3843 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3844 Rep =
3845 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3846 {CI->getArgOperand(0), Builder.getInt1(false)});
3847 Rep =
3848 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3849 } else if (Name.starts_with("avx512.mask.psll")) {
3850 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3851 bool IsVariable = Name[16] == 'v';
3852 char Size = Name[16] == '.' ? Name[17]
3853 : Name[17] == '.' ? Name[18]
3854 : Name[18] == '.' ? Name[19]
3855 : Name[20];
3856
3857 Intrinsic::ID IID;
3858 if (IsVariable && Name[17] != '.') {
3859 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3860 IID = Intrinsic::x86_avx2_psllv_q;
3861 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3862 IID = Intrinsic::x86_avx2_psllv_q_256;
3863 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3864 IID = Intrinsic::x86_avx2_psllv_d;
3865 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3866 IID = Intrinsic::x86_avx2_psllv_d_256;
3867 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3868 IID = Intrinsic::x86_avx512_psllv_w_128;
3869 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3870 IID = Intrinsic::x86_avx512_psllv_w_256;
3871 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3872 IID = Intrinsic::x86_avx512_psllv_w_512;
3873 else
3874 llvm_unreachable("Unexpected size");
3875 } else if (Name.ends_with(".128")) {
3876 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3877 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3878 : Intrinsic::x86_sse2_psll_d;
3879 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3880 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3881 : Intrinsic::x86_sse2_psll_q;
3882 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3883 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3884 : Intrinsic::x86_sse2_psll_w;
3885 else
3886 llvm_unreachable("Unexpected size");
3887 } else if (Name.ends_with(".256")) {
3888 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3889 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3890 : Intrinsic::x86_avx2_psll_d;
3891 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3892 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3893 : Intrinsic::x86_avx2_psll_q;
3894 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3895 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3896 : Intrinsic::x86_avx2_psll_w;
3897 else
3898 llvm_unreachable("Unexpected size");
3899 } else {
3900 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3901 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3902 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3903 : Intrinsic::x86_avx512_psll_d_512;
3904 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3905 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3906 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3907 : Intrinsic::x86_avx512_psll_q_512;
3908 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3909 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3910 : Intrinsic::x86_avx512_psll_w_512;
3911 else
3912 llvm_unreachable("Unexpected size");
3913 }
3914
3915 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3916 } else if (Name.starts_with("avx512.mask.psrl")) {
3917 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3918 bool IsVariable = Name[16] == 'v';
3919 char Size = Name[16] == '.' ? Name[17]
3920 : Name[17] == '.' ? Name[18]
3921 : Name[18] == '.' ? Name[19]
3922 : Name[20];
3923
3924 Intrinsic::ID IID;
3925 if (IsVariable && Name[17] != '.') {
3926 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3927 IID = Intrinsic::x86_avx2_psrlv_q;
3928 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3929 IID = Intrinsic::x86_avx2_psrlv_q_256;
3930 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3931 IID = Intrinsic::x86_avx2_psrlv_d;
3932 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3933 IID = Intrinsic::x86_avx2_psrlv_d_256;
3934 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3935 IID = Intrinsic::x86_avx512_psrlv_w_128;
3936 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3937 IID = Intrinsic::x86_avx512_psrlv_w_256;
3938 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3939 IID = Intrinsic::x86_avx512_psrlv_w_512;
3940 else
3941 llvm_unreachable("Unexpected size");
3942 } else if (Name.ends_with(".128")) {
3943 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3944 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3945 : Intrinsic::x86_sse2_psrl_d;
3946 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3947 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3948 : Intrinsic::x86_sse2_psrl_q;
3949 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3950 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3951 : Intrinsic::x86_sse2_psrl_w;
3952 else
3953 llvm_unreachable("Unexpected size");
3954 } else if (Name.ends_with(".256")) {
3955 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3956 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3957 : Intrinsic::x86_avx2_psrl_d;
3958 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3959 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3960 : Intrinsic::x86_avx2_psrl_q;
3961 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3962 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3963 : Intrinsic::x86_avx2_psrl_w;
3964 else
3965 llvm_unreachable("Unexpected size");
3966 } else {
3967 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3968 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3969 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3970 : Intrinsic::x86_avx512_psrl_d_512;
3971 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3972 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3973 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3974 : Intrinsic::x86_avx512_psrl_q_512;
3975 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3976 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3977 : Intrinsic::x86_avx512_psrl_w_512;
3978 else
3979 llvm_unreachable("Unexpected size");
3980 }
3981
3982 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3983 } else if (Name.starts_with("avx512.mask.psra")) {
3984 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3985 bool IsVariable = Name[16] == 'v';
3986 char Size = Name[16] == '.' ? Name[17]
3987 : Name[17] == '.' ? Name[18]
3988 : Name[18] == '.' ? Name[19]
3989 : Name[20];
3990
3991 Intrinsic::ID IID;
3992 if (IsVariable && Name[17] != '.') {
3993 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3994 IID = Intrinsic::x86_avx2_psrav_d;
3995 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3996 IID = Intrinsic::x86_avx2_psrav_d_256;
3997 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3998 IID = Intrinsic::x86_avx512_psrav_w_128;
3999 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4000 IID = Intrinsic::x86_avx512_psrav_w_256;
4001 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4002 IID = Intrinsic::x86_avx512_psrav_w_512;
4003 else
4004 llvm_unreachable("Unexpected size");
4005 } else if (Name.ends_with(".128")) {
4006 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4007 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4008 : Intrinsic::x86_sse2_psra_d;
4009 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4010 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4011 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4012 : Intrinsic::x86_avx512_psra_q_128;
4013 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4014 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4015 : Intrinsic::x86_sse2_psra_w;
4016 else
4017 llvm_unreachable("Unexpected size");
4018 } else if (Name.ends_with(".256")) {
4019 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4020 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4021 : Intrinsic::x86_avx2_psra_d;
4022 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4023 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4024 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4025 : Intrinsic::x86_avx512_psra_q_256;
4026 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4027 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4028 : Intrinsic::x86_avx2_psra_w;
4029 else
4030 llvm_unreachable("Unexpected size");
4031 } else {
4032 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4033 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4034 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4035 : Intrinsic::x86_avx512_psra_d_512;
4036 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4037 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4038 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4039 : Intrinsic::x86_avx512_psra_q_512;
4040 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4041 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4042 : Intrinsic::x86_avx512_psra_w_512;
4043 else
4044 llvm_unreachable("Unexpected size");
4045 }
4046
4047 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4048 } else if (Name.starts_with("avx512.mask.move.s")) {
4049 Rep = upgradeMaskedMove(Builder, *CI);
4050 } else if (Name.starts_with("avx512.cvtmask2")) {
4051 Rep = upgradeMaskToInt(Builder, *CI);
4052 } else if (Name.ends_with(".movntdqa")) {
4054 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4055
4056 LoadInst *LI = Builder.CreateAlignedLoad(
4057 CI->getType(), CI->getArgOperand(0),
4059 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4060 Rep = LI;
4061 } else if (Name.starts_with("fma.vfmadd.") ||
4062 Name.starts_with("fma.vfmsub.") ||
4063 Name.starts_with("fma.vfnmadd.") ||
4064 Name.starts_with("fma.vfnmsub.")) {
4065 bool NegMul = Name[6] == 'n';
4066 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4067 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4068
4069 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4070 CI->getArgOperand(2)};
4071
4072 if (IsScalar) {
4073 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4074 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4075 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4076 }
4077
4078 if (NegMul && !IsScalar)
4079 Ops[0] = Builder.CreateFNeg(Ops[0]);
4080 if (NegMul && IsScalar)
4081 Ops[1] = Builder.CreateFNeg(Ops[1]);
4082 if (NegAcc)
4083 Ops[2] = Builder.CreateFNeg(Ops[2]);
4084
4085 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4086
4087 if (IsScalar)
4088 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4089 } else if (Name.starts_with("fma4.vfmadd.s")) {
4090 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4091 CI->getArgOperand(2)};
4092
4093 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4094 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4095 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4096
4097 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4098
4099 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4100 Rep, (uint64_t)0);
4101 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4102 Name.starts_with("avx512.maskz.vfmadd.s") ||
4103 Name.starts_with("avx512.mask3.vfmadd.s") ||
4104 Name.starts_with("avx512.mask3.vfmsub.s") ||
4105 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4106 bool IsMask3 = Name[11] == '3';
4107 bool IsMaskZ = Name[11] == 'z';
4108 // Drop the "avx512.mask." to make it easier.
4109 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4110 bool NegMul = Name[2] == 'n';
4111 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4112
4113 Value *A = CI->getArgOperand(0);
4114 Value *B = CI->getArgOperand(1);
4115 Value *C = CI->getArgOperand(2);
4116
4117 if (NegMul && (IsMask3 || IsMaskZ))
4118 A = Builder.CreateFNeg(A);
4119 if (NegMul && !(IsMask3 || IsMaskZ))
4120 B = Builder.CreateFNeg(B);
4121 if (NegAcc)
4122 C = Builder.CreateFNeg(C);
4123
4124 A = Builder.CreateExtractElement(A, (uint64_t)0);
4125 B = Builder.CreateExtractElement(B, (uint64_t)0);
4126 C = Builder.CreateExtractElement(C, (uint64_t)0);
4127
4128 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4129 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4130 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4131
4132 Intrinsic::ID IID;
4133 if (Name.back() == 'd')
4134 IID = Intrinsic::x86_avx512_vfmadd_f64;
4135 else
4136 IID = Intrinsic::x86_avx512_vfmadd_f32;
4137 Rep = Builder.CreateIntrinsic(IID, Ops);
4138 } else {
4139 Rep = Builder.CreateFMA(A, B, C);
4140 }
4141
4142 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4143 : IsMask3 ? C
4144 : A;
4145
4146 // For Mask3 with NegAcc, we need to create a new extractelement that
4147 // avoids the negation above.
4148 if (NegAcc && IsMask3)
4149 PassThru =
4150 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4151
4152 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4153 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4154 (uint64_t)0);
4155 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4156 Name.starts_with("avx512.mask.vfnmadd.p") ||
4157 Name.starts_with("avx512.mask.vfnmsub.p") ||
4158 Name.starts_with("avx512.mask3.vfmadd.p") ||
4159 Name.starts_with("avx512.mask3.vfmsub.p") ||
4160 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4161 Name.starts_with("avx512.maskz.vfmadd.p")) {
4162 bool IsMask3 = Name[11] == '3';
4163 bool IsMaskZ = Name[11] == 'z';
4164 // Drop the "avx512.mask." to make it easier.
4165 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4166 bool NegMul = Name[2] == 'n';
4167 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4168
4169 Value *A = CI->getArgOperand(0);
4170 Value *B = CI->getArgOperand(1);
4171 Value *C = CI->getArgOperand(2);
4172
4173 if (NegMul && (IsMask3 || IsMaskZ))
4174 A = Builder.CreateFNeg(A);
4175 if (NegMul && !(IsMask3 || IsMaskZ))
4176 B = Builder.CreateFNeg(B);
4177 if (NegAcc)
4178 C = Builder.CreateFNeg(C);
4179
4180 if (CI->arg_size() == 5 &&
4181 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4182 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4183 Intrinsic::ID IID;
4184 // Check the character before ".512" in string.
4185 if (Name[Name.size() - 5] == 's')
4186 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4187 else
4188 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4189
4190 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4191 } else {
4192 Rep = Builder.CreateFMA(A, B, C);
4193 }
4194
4195 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4196 : IsMask3 ? CI->getArgOperand(2)
4197 : CI->getArgOperand(0);
4198
4199 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4200 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4201 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4202 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4203 Intrinsic::ID IID;
4204 if (VecWidth == 128 && EltWidth == 32)
4205 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4206 else if (VecWidth == 256 && EltWidth == 32)
4207 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4208 else if (VecWidth == 128 && EltWidth == 64)
4209 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4210 else if (VecWidth == 256 && EltWidth == 64)
4211 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4212 else
4213 llvm_unreachable("Unexpected intrinsic");
4214
4215 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4216 CI->getArgOperand(2)};
4217 Ops[2] = Builder.CreateFNeg(Ops[2]);
4218 Rep = Builder.CreateIntrinsic(IID, Ops);
4219 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4220 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4221 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4222 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4223 bool IsMask3 = Name[11] == '3';
4224 bool IsMaskZ = Name[11] == 'z';
4225 // Drop the "avx512.mask." to make it easier.
4226 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4227 bool IsSubAdd = Name[3] == 's';
4228 if (CI->arg_size() == 5) {
4229 Intrinsic::ID IID;
4230 // Check the character before ".512" in string.
4231 if (Name[Name.size() - 5] == 's')
4232 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4233 else
4234 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4235
4236 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4237 CI->getArgOperand(2), CI->getArgOperand(4)};
4238 if (IsSubAdd)
4239 Ops[2] = Builder.CreateFNeg(Ops[2]);
4240
4241 Rep = Builder.CreateIntrinsic(IID, Ops);
4242 } else {
4243 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4244
4245 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4246 CI->getArgOperand(2)};
4247
4249 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4250 Value *Odd = Builder.CreateCall(FMA, Ops);
4251 Ops[2] = Builder.CreateFNeg(Ops[2]);
4252 Value *Even = Builder.CreateCall(FMA, Ops);
4253
4254 if (IsSubAdd)
4255 std::swap(Even, Odd);
4256
4257 SmallVector<int, 32> Idxs(NumElts);
4258 for (int i = 0; i != NumElts; ++i)
4259 Idxs[i] = i + (i % 2) * NumElts;
4260
4261 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4262 }
4263
4264 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4265 : IsMask3 ? CI->getArgOperand(2)
4266 : CI->getArgOperand(0);
4267
4268 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4269 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4270 Name.starts_with("avx512.maskz.pternlog.")) {
4271 bool ZeroMask = Name[11] == 'z';
4272 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4273 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4274 Intrinsic::ID IID;
4275 if (VecWidth == 128 && EltWidth == 32)
4276 IID = Intrinsic::x86_avx512_pternlog_d_128;
4277 else if (VecWidth == 256 && EltWidth == 32)
4278 IID = Intrinsic::x86_avx512_pternlog_d_256;
4279 else if (VecWidth == 512 && EltWidth == 32)
4280 IID = Intrinsic::x86_avx512_pternlog_d_512;
4281 else if (VecWidth == 128 && EltWidth == 64)
4282 IID = Intrinsic::x86_avx512_pternlog_q_128;
4283 else if (VecWidth == 256 && EltWidth == 64)
4284 IID = Intrinsic::x86_avx512_pternlog_q_256;
4285 else if (VecWidth == 512 && EltWidth == 64)
4286 IID = Intrinsic::x86_avx512_pternlog_q_512;
4287 else
4288 llvm_unreachable("Unexpected intrinsic");
4289
4290 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4291 CI->getArgOperand(2), CI->getArgOperand(3)};
4292 Rep = Builder.CreateIntrinsic(IID, Args);
4293 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4294 : CI->getArgOperand(0);
4295 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4296 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4297 Name.starts_with("avx512.maskz.vpmadd52")) {
4298 bool ZeroMask = Name[11] == 'z';
4299 bool High = Name[20] == 'h' || Name[21] == 'h';
4300 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4301 Intrinsic::ID IID;
4302 if (VecWidth == 128 && !High)
4303 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4304 else if (VecWidth == 256 && !High)
4305 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4306 else if (VecWidth == 512 && !High)
4307 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4308 else if (VecWidth == 128 && High)
4309 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4310 else if (VecWidth == 256 && High)
4311 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4312 else if (VecWidth == 512 && High)
4313 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4314 else
4315 llvm_unreachable("Unexpected intrinsic");
4316
4317 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4318 CI->getArgOperand(2)};
4319 Rep = Builder.CreateIntrinsic(IID, Args);
4320 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4321 : CI->getArgOperand(0);
4322 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4323 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4324 Name.starts_with("avx512.mask.vpermt2var.") ||
4325 Name.starts_with("avx512.maskz.vpermt2var.")) {
4326 bool ZeroMask = Name[11] == 'z';
4327 bool IndexForm = Name[17] == 'i';
4328 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4329 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4330 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4331 Name.starts_with("avx512.mask.vpdpbusds.") ||
4332 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4333 bool ZeroMask = Name[11] == 'z';
4334 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4335 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4336 Intrinsic::ID IID;
4337 if (VecWidth == 128 && !IsSaturating)
4338 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4339 else if (VecWidth == 256 && !IsSaturating)
4340 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4341 else if (VecWidth == 512 && !IsSaturating)
4342 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4343 else if (VecWidth == 128 && IsSaturating)
4344 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4345 else if (VecWidth == 256 && IsSaturating)
4346 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4347 else if (VecWidth == 512 && IsSaturating)
4348 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4349 else
4350 llvm_unreachable("Unexpected intrinsic");
4351
4352 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4353 CI->getArgOperand(2)};
4354
4355 // Input arguments types were incorrectly set to vectors of i32 before but
4356 // they should be vectors of i8. Insert bit cast when encountering the old
4357 // types
4358 if (Args[1]->getType()->isVectorTy() &&
4359 cast<VectorType>(Args[1]->getType())
4360 ->getElementType()
4361 ->isIntegerTy(32) &&
4362 Args[2]->getType()->isVectorTy() &&
4363 cast<VectorType>(Args[2]->getType())
4364 ->getElementType()
4365 ->isIntegerTy(32)) {
4366 Type *NewArgType = nullptr;
4367 if (VecWidth == 128)
4368 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4369 else if (VecWidth == 256)
4370 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4371 else if (VecWidth == 512)
4372 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4373 else
4374 llvm_unreachable("Unexpected vector bit width");
4375
4376 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4377 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4378 }
4379
4380 Rep = Builder.CreateIntrinsic(IID, Args);
4381 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4382 : CI->getArgOperand(0);
4383 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4384 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4385 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4386 Name.starts_with("avx512.mask.vpdpwssds.") ||
4387 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4388 bool ZeroMask = Name[11] == 'z';
4389 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4390 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4391 Intrinsic::ID IID;
4392 if (VecWidth == 128 && !IsSaturating)
4393 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4394 else if (VecWidth == 256 && !IsSaturating)
4395 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4396 else if (VecWidth == 512 && !IsSaturating)
4397 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4398 else if (VecWidth == 128 && IsSaturating)
4399 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4400 else if (VecWidth == 256 && IsSaturating)
4401 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4402 else if (VecWidth == 512 && IsSaturating)
4403 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4404 else
4405 llvm_unreachable("Unexpected intrinsic");
4406
4407 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4408 CI->getArgOperand(2)};
4409
4410 // Input arguments types were incorrectly set to vectors of i32 before but
4411 // they should be vectors of i16. Insert bit cast when encountering the old
4412 // types
4413 if (Args[1]->getType()->isVectorTy() &&
4414 cast<VectorType>(Args[1]->getType())
4415 ->getElementType()
4416 ->isIntegerTy(32) &&
4417 Args[2]->getType()->isVectorTy() &&
4418 cast<VectorType>(Args[2]->getType())
4419 ->getElementType()
4420 ->isIntegerTy(32)) {
4421 Type *NewArgType = nullptr;
4422 if (VecWidth == 128)
4423 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4424 else if (VecWidth == 256)
4425 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4426 else if (VecWidth == 512)
4427 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4428 else
4429 llvm_unreachable("Unexpected vector bit width");
4430
4431 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4432 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4433 }
4434
4435 Rep = Builder.CreateIntrinsic(IID, Args);
4436 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4437 : CI->getArgOperand(0);
4438 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4439 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4440 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4441 Name == "subborrow.u32" || Name == "subborrow.u64") {
4442 Intrinsic::ID IID;
4443 if (Name[0] == 'a' && Name.back() == '2')
4444 IID = Intrinsic::x86_addcarry_32;
4445 else if (Name[0] == 'a' && Name.back() == '4')
4446 IID = Intrinsic::x86_addcarry_64;
4447 else if (Name[0] == 's' && Name.back() == '2')
4448 IID = Intrinsic::x86_subborrow_32;
4449 else if (Name[0] == 's' && Name.back() == '4')
4450 IID = Intrinsic::x86_subborrow_64;
4451 else
4452 llvm_unreachable("Unexpected intrinsic");
4453
4454 // Make a call with 3 operands.
4455 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4456 CI->getArgOperand(2)};
4457 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4458
4459 // Extract the second result and store it.
4460 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4461 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4462 // Replace the original call result with the first result of the new call.
4463 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4464
4465 CI->replaceAllUsesWith(CF);
4466 Rep = nullptr;
4467 } else if (Name.starts_with("avx512.mask.") &&
4468 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4469 // Rep will be updated by the call in the condition.
4470 }
4471
4472 return Rep;
4473}
4474
4476 Function *F, IRBuilder<> &Builder) {
4477 if (Name.starts_with("neon.bfcvt")) {
4478 if (Name.starts_with("neon.bfcvtn2")) {
4479 SmallVector<int, 32> LoMask(4);
4480 std::iota(LoMask.begin(), LoMask.end(), 0);
4481 SmallVector<int, 32> ConcatMask(8);
4482 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4483 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4484 Value *Trunc =
4485 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4486 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4487 } else if (Name.starts_with("neon.bfcvtn")) {
4488 SmallVector<int, 32> ConcatMask(8);
4489 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4490 Type *V4BF16 =
4491 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4492 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4493 dbgs() << "Trunc: " << *Trunc << "\n";
4494 return Builder.CreateShuffleVector(
4495 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4496 } else {
4497 return Builder.CreateFPTrunc(CI->getOperand(0),
4498 Type::getBFloatTy(F->getContext()));
4499 }
4500 } else if (Name.starts_with("sve.fcvt")) {
4501 Intrinsic::ID NewID =
4503 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4504 .Case("sve.fcvtnt.bf16f32",
4505 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4507 if (NewID == Intrinsic::not_intrinsic)
4508 llvm_unreachable("Unhandled Intrinsic!");
4509
4510 SmallVector<Value *, 3> Args(CI->args());
4511
4512 // The original intrinsics incorrectly used a predicate based on the
4513 // smallest element type rather than the largest.
4514 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4515 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4516
4517 if (Args[1]->getType() != BadPredTy)
4518 llvm_unreachable("Unexpected predicate type!");
4519
4520 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4521 BadPredTy, Args[1]);
4522 Args[1] = Builder.CreateIntrinsic(
4523 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4524
4525 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4526 CI->getName());
4527 }
4528
4529 llvm_unreachable("Unhandled Intrinsic!");
4530}
4531
4533 IRBuilder<> &Builder) {
4534 if (Name == "mve.vctp64.old") {
4535 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4536 // correct type.
4537 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4538 CI->getArgOperand(0),
4539 /*FMFSource=*/nullptr, CI->getName());
4540 Value *C1 = Builder.CreateIntrinsic(
4541 Intrinsic::arm_mve_pred_v2i,
4542 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4543 return Builder.CreateIntrinsic(
4544 Intrinsic::arm_mve_pred_i2v,
4545 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4546 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4547 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4548 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4549 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4550 Name ==
4551 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4552 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4553 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4554 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4555 Name ==
4556 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4557 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4558 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4559 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4560 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4561 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4562 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4563 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4564 std::vector<Type *> Tys;
4565 unsigned ID = CI->getIntrinsicID();
4566 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4567 switch (ID) {
4568 case Intrinsic::arm_mve_mull_int_predicated:
4569 case Intrinsic::arm_mve_vqdmull_predicated:
4570 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4571 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4572 break;
4573 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4574 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4575 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4576 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4577 V2I1Ty};
4578 break;
4579 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4580 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4581 CI->getOperand(1)->getType(), V2I1Ty};
4582 break;
4583 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4584 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4585 CI->getOperand(2)->getType(), V2I1Ty};
4586 break;
4587 case Intrinsic::arm_cde_vcx1q_predicated:
4588 case Intrinsic::arm_cde_vcx1qa_predicated:
4589 case Intrinsic::arm_cde_vcx2q_predicated:
4590 case Intrinsic::arm_cde_vcx2qa_predicated:
4591 case Intrinsic::arm_cde_vcx3q_predicated:
4592 case Intrinsic::arm_cde_vcx3qa_predicated:
4593 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4594 break;
4595 default:
4596 llvm_unreachable("Unhandled Intrinsic!");
4597 }
4598
4599 std::vector<Value *> Ops;
4600 for (Value *Op : CI->args()) {
4601 Type *Ty = Op->getType();
4602 if (Ty->getScalarSizeInBits() == 1) {
4603 Value *C1 = Builder.CreateIntrinsic(
4604 Intrinsic::arm_mve_pred_v2i,
4605 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4606 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4607 }
4608 Ops.push_back(Op);
4609 }
4610
4611 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4612 CI->getName());
4613 }
4614 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4615}
4616
4617// These are expected to have the arguments:
4618// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4619//
4620// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4621//
4623 Function *F, IRBuilder<> &Builder) {
4624 if (CI->arg_size() == 7 &&
4625 F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4626 // Legacy WMMA IU8 intrinsic lacked the optional clamp operand. Append
4627 // clamp=false for compatibility.
4628
4629 SmallVector<Value *, 8> Args(CI->args().begin(), CI->args().end());
4630 Args.push_back(Builder.getFalse());
4631
4633 F->getParent(), Intrinsic::amdgcn_wmma_i32_16x16x64_iu8,
4634 {CI->getArgOperand(4)->getType(), CI->getArgOperand(1)->getType()});
4635
4637 CI->getOperandBundlesAsDefs(Bundles);
4638
4639 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4640 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4641 NewCall->setCallingConv(CI->getCallingConv());
4642 NewCall->setAttributes(CI->getAttributes());
4643 NewCall->setDebugLoc(CI->getDebugLoc());
4644 NewCall->copyMetadata(*CI);
4645 return NewCall;
4646 }
4647
4648 AtomicRMWInst::BinOp RMWOp =
4650 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4651 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4652 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4653 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4654 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4655 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4656 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4657 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4658 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4659 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4660 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4661 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4662 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4663
4664 unsigned NumOperands = CI->getNumOperands();
4665 if (NumOperands < 3) // Malformed bitcode.
4666 return nullptr;
4667
4668 Value *Ptr = CI->getArgOperand(0);
4669 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4670 if (!PtrTy) // Malformed.
4671 return nullptr;
4672
4673 Value *Val = CI->getArgOperand(1);
4674 if (Val->getType() != CI->getType()) // Malformed.
4675 return nullptr;
4676
4677 ConstantInt *OrderArg = nullptr;
4678 bool IsVolatile = false;
4679
4680 // These should have 5 arguments (plus the callee). A separate version of the
4681 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4682 if (NumOperands > 3)
4683 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4684
4685 // Ignore scope argument at 3
4686
4687 if (NumOperands > 5) {
4688 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4689 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4690 }
4691
4693 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4694 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4697
4698 LLVMContext &Ctx = F->getContext();
4699
4700 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4701 Type *RetTy = CI->getType();
4702 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4703 if (VT->getElementType()->isIntegerTy(16)) {
4704 VectorType *AsBF16 =
4705 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4706 Val = Builder.CreateBitCast(Val, AsBF16);
4707 }
4708 }
4709
4710 // The scope argument never really worked correctly. Use agent as the most
4711 // conservative option which should still always produce the instruction.
4712 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4713 AtomicRMWInst *RMW =
4714 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4715
4716 unsigned AddrSpace = PtrTy->getAddressSpace();
4717 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4718 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4719 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4720 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4721 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4722 }
4723
4724 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4725 MDBuilder MDB(F->getContext());
4726 MDNode *RangeNotPrivate =
4729 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4730 }
4731
4732 if (IsVolatile)
4733 RMW->setVolatile(true);
4734
4735 return Builder.CreateBitCast(RMW, RetTy);
4736}
4737
4738/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4739/// plain MDNode, as it's the verifier's job to check these are the correct
4740/// types later.
4741static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4742 if (Op < CI->arg_size()) {
4743 if (MetadataAsValue *MAV =
4745 Metadata *MD = MAV->getMetadata();
4746 return dyn_cast_if_present<MDNode>(MD);
4747 }
4748 }
4749 return nullptr;
4750}
4751
4752/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4753static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4754 if (Op < CI->arg_size())
4756 return MAV->getMetadata();
4757 return nullptr;
4758}
4759
4761 // The MDNode attached to this instruction might not be the correct type,
4762 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4763 return I->getDebugLoc().getAsMDNode();
4764}
4765
4766/// Convert debug intrinsic calls to non-instruction debug records.
4767/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4768/// \p CI - The debug intrinsic call.
4770 DbgRecord *DR = nullptr;
4771 if (Name == "label") {
4773 CI->getDebugLoc());
4774 } else if (Name == "assign") {
4777 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4778 unwrapMAVMetadataOp(CI, 4),
4779 /*The address is a Value ref, it will be stored as a Metadata */
4780 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4781 } else if (Name == "declare") {
4784 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4785 getDebugLocSafe(CI));
4786 } else if (Name == "addr") {
4787 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4788 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4789 // Don't try to add something to the expression if it's not an expression.
4790 // Instead, allow the verifier to fail later.
4791 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4792 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4793 }
4796 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4797 getDebugLocSafe(CI));
4798 } else if (Name == "value") {
4799 // An old version of dbg.value had an extra offset argument.
4800 unsigned VarOp = 1;
4801 unsigned ExprOp = 2;
4802 if (CI->arg_size() == 4) {
4804 // Nonzero offset dbg.values get dropped without a replacement.
4805 if (!Offset || !Offset->isZeroValue())
4806 return;
4807 VarOp = 2;
4808 ExprOp = 3;
4809 }
4812 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4813 nullptr, getDebugLocSafe(CI));
4814 }
4815 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4816 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4817}
4818
4819/// Upgrade a call to an old intrinsic. All argument and return casting must be
4820/// provided to seamlessly integrate with existing context.
4822 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4823 // checks the callee's function type matches. It's likely we need to handle
4824 // type changes here.
4826 if (!F)
4827 return;
4828
4829 LLVMContext &C = CI->getContext();
4830 IRBuilder<> Builder(C);
4831 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4832
4833 if (!NewFn) {
4834 // Get the Function's name.
4835 StringRef Name = F->getName();
4836
4837 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4838 Name = Name.substr(5);
4839
4840 bool IsX86 = Name.consume_front("x86.");
4841 bool IsNVVM = Name.consume_front("nvvm.");
4842 bool IsAArch64 = Name.consume_front("aarch64.");
4843 bool IsARM = Name.consume_front("arm.");
4844 bool IsAMDGCN = Name.consume_front("amdgcn.");
4845 bool IsDbg = Name.consume_front("dbg.");
4846 Value *Rep = nullptr;
4847
4848 if (!IsX86 && Name == "stackprotectorcheck") {
4849 Rep = nullptr;
4850 } else if (IsNVVM) {
4851 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4852 } else if (IsX86) {
4853 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4854 } else if (IsAArch64) {
4855 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4856 } else if (IsARM) {
4857 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4858 } else if (IsAMDGCN) {
4859 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4860 } else if (IsDbg) {
4862 } else {
4863 llvm_unreachable("Unknown function for CallBase upgrade.");
4864 }
4865
4866 if (Rep)
4867 CI->replaceAllUsesWith(Rep);
4868 CI->eraseFromParent();
4869 return;
4870 }
4871
4872 const auto &DefaultCase = [&]() -> void {
4873 if (F == NewFn)
4874 return;
4875
4876 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4877 // Handle generic mangling change.
4878 assert(
4879 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4880 "Unknown function for CallBase upgrade and isn't just a name change");
4881 CI->setCalledFunction(NewFn);
4882 return;
4883 }
4884
4885 // This must be an upgrade from a named to a literal struct.
4886 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4887 assert(OldST != NewFn->getReturnType() &&
4888 "Return type must have changed");
4889 assert(OldST->getNumElements() ==
4890 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4891 "Must have same number of elements");
4892
4893 SmallVector<Value *> Args(CI->args());
4894 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4895 NewCI->setAttributes(CI->getAttributes());
4896 Value *Res = PoisonValue::get(OldST);
4897 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4898 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4899 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4900 }
4901 CI->replaceAllUsesWith(Res);
4902 CI->eraseFromParent();
4903 return;
4904 }
4905
4906 // We're probably about to produce something invalid. Let the verifier catch
4907 // it instead of dying here.
4908 CI->setCalledOperand(
4910 return;
4911 };
4912 CallInst *NewCall = nullptr;
4913 switch (NewFn->getIntrinsicID()) {
4914 default: {
4915 DefaultCase();
4916 return;
4917 }
4918 case Intrinsic::arm_neon_vst1:
4919 case Intrinsic::arm_neon_vst2:
4920 case Intrinsic::arm_neon_vst3:
4921 case Intrinsic::arm_neon_vst4:
4922 case Intrinsic::arm_neon_vst2lane:
4923 case Intrinsic::arm_neon_vst3lane:
4924 case Intrinsic::arm_neon_vst4lane: {
4925 SmallVector<Value *, 4> Args(CI->args());
4926 NewCall = Builder.CreateCall(NewFn, Args);
4927 break;
4928 }
4929 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4930 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4931 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4932 LLVMContext &Ctx = F->getParent()->getContext();
4933 SmallVector<Value *, 4> Args(CI->args());
4934 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4935 cast<ConstantInt>(Args[3])->getZExtValue());
4936 NewCall = Builder.CreateCall(NewFn, Args);
4937 break;
4938 }
4939 case Intrinsic::aarch64_sve_ld3_sret:
4940 case Intrinsic::aarch64_sve_ld4_sret:
4941 case Intrinsic::aarch64_sve_ld2_sret: {
4942 StringRef Name = F->getName();
4943 Name = Name.substr(5);
4944 unsigned N = StringSwitch<unsigned>(Name)
4945 .StartsWith("aarch64.sve.ld2", 2)
4946 .StartsWith("aarch64.sve.ld3", 3)
4947 .StartsWith("aarch64.sve.ld4", 4)
4948 .Default(0);
4949 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4950 unsigned MinElts = RetTy->getMinNumElements() / N;
4951 SmallVector<Value *, 2> Args(CI->args());
4952 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4953 Value *Ret = llvm::PoisonValue::get(RetTy);
4954 for (unsigned I = 0; I < N; I++) {
4955 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4956 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4957 }
4958 NewCall = dyn_cast<CallInst>(Ret);
4959 break;
4960 }
4961
4962 case Intrinsic::coro_end: {
4963 SmallVector<Value *, 3> Args(CI->args());
4964 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4965 NewCall = Builder.CreateCall(NewFn, Args);
4966 break;
4967 }
4968
4969 case Intrinsic::vector_extract: {
4970 StringRef Name = F->getName();
4971 Name = Name.substr(5); // Strip llvm
4972 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4973 DefaultCase();
4974 return;
4975 }
4976 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4977 unsigned MinElts = RetTy->getMinNumElements();
4978 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4979 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4980 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4981 break;
4982 }
4983
4984 case Intrinsic::vector_insert: {
4985 StringRef Name = F->getName();
4986 Name = Name.substr(5);
4987 if (!Name.starts_with("aarch64.sve.tuple")) {
4988 DefaultCase();
4989 return;
4990 }
4991 if (Name.starts_with("aarch64.sve.tuple.set")) {
4992 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4993 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4994 Value *NewIdx =
4995 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4996 NewCall = Builder.CreateCall(
4997 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4998 break;
4999 }
5000 if (Name.starts_with("aarch64.sve.tuple.create")) {
5001 unsigned N = StringSwitch<unsigned>(Name)
5002 .StartsWith("aarch64.sve.tuple.create2", 2)
5003 .StartsWith("aarch64.sve.tuple.create3", 3)
5004 .StartsWith("aarch64.sve.tuple.create4", 4)
5005 .Default(0);
5006 assert(N > 1 && "Create is expected to be between 2-4");
5007 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5008 Value *Ret = llvm::PoisonValue::get(RetTy);
5009 unsigned MinElts = RetTy->getMinNumElements() / N;
5010 for (unsigned I = 0; I < N; I++) {
5011 Value *V = CI->getArgOperand(I);
5012 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5013 }
5014 NewCall = dyn_cast<CallInst>(Ret);
5015 }
5016 break;
5017 }
5018
5019 case Intrinsic::arm_neon_bfdot:
5020 case Intrinsic::arm_neon_bfmmla:
5021 case Intrinsic::arm_neon_bfmlalb:
5022 case Intrinsic::arm_neon_bfmlalt:
5023 case Intrinsic::aarch64_neon_bfdot:
5024 case Intrinsic::aarch64_neon_bfmmla:
5025 case Intrinsic::aarch64_neon_bfmlalb:
5026 case Intrinsic::aarch64_neon_bfmlalt: {
5028 assert(CI->arg_size() == 3 &&
5029 "Mismatch between function args and call args");
5030 size_t OperandWidth =
5032 assert((OperandWidth == 64 || OperandWidth == 128) &&
5033 "Unexpected operand width");
5034 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5035 auto Iter = CI->args().begin();
5036 Args.push_back(*Iter++);
5037 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5038 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5039 NewCall = Builder.CreateCall(NewFn, Args);
5040 break;
5041 }
5042
5043 case Intrinsic::bitreverse:
5044 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5045 break;
5046
5047 case Intrinsic::ctlz:
5048 case Intrinsic::cttz: {
5049 if (CI->arg_size() != 1) {
5050 DefaultCase();
5051 return;
5052 }
5053
5054 NewCall =
5055 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5056 break;
5057 }
5058
5059 case Intrinsic::objectsize: {
5060 Value *NullIsUnknownSize =
5061 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5062 Value *Dynamic =
5063 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5064 NewCall = Builder.CreateCall(
5065 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5066 break;
5067 }
5068
5069 case Intrinsic::ctpop:
5070 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5071 break;
5072
5073 case Intrinsic::convert_from_fp16:
5074 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5075 break;
5076
5077 case Intrinsic::dbg_value: {
5078 StringRef Name = F->getName();
5079 Name = Name.substr(5); // Strip llvm.
5080 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5081 if (Name.starts_with("dbg.addr")) {
5083 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5084 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5085 NewCall =
5086 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5087 MetadataAsValue::get(C, Expr)});
5088 break;
5089 }
5090
5091 // Upgrade from the old version that had an extra offset argument.
5092 assert(CI->arg_size() == 4);
5093 // Drop nonzero offsets instead of attempting to upgrade them.
5095 if (Offset->isZeroValue()) {
5096 NewCall = Builder.CreateCall(
5097 NewFn,
5098 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5099 break;
5100 }
5101 CI->eraseFromParent();
5102 return;
5103 }
5104
5105 case Intrinsic::ptr_annotation:
5106 // Upgrade from versions that lacked the annotation attribute argument.
5107 if (CI->arg_size() != 4) {
5108 DefaultCase();
5109 return;
5110 }
5111
5112 // Create a new call with an added null annotation attribute argument.
5113 NewCall = Builder.CreateCall(
5114 NewFn,
5115 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5116 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5117 NewCall->takeName(CI);
5118 CI->replaceAllUsesWith(NewCall);
5119 CI->eraseFromParent();
5120 return;
5121
5122 case Intrinsic::var_annotation:
5123 // Upgrade from versions that lacked the annotation attribute argument.
5124 if (CI->arg_size() != 4) {
5125 DefaultCase();
5126 return;
5127 }
5128 // Create a new call with an added null annotation attribute argument.
5129 NewCall = Builder.CreateCall(
5130 NewFn,
5131 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5132 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5133 NewCall->takeName(CI);
5134 CI->replaceAllUsesWith(NewCall);
5135 CI->eraseFromParent();
5136 return;
5137
5138 case Intrinsic::riscv_aes32dsi:
5139 case Intrinsic::riscv_aes32dsmi:
5140 case Intrinsic::riscv_aes32esi:
5141 case Intrinsic::riscv_aes32esmi:
5142 case Intrinsic::riscv_sm4ks:
5143 case Intrinsic::riscv_sm4ed: {
5144 // The last argument to these intrinsics used to be i8 and changed to i32.
5145 // The type overload for sm4ks and sm4ed was removed.
5146 Value *Arg2 = CI->getArgOperand(2);
5147 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5148 return;
5149
5150 Value *Arg0 = CI->getArgOperand(0);
5151 Value *Arg1 = CI->getArgOperand(1);
5152 if (CI->getType()->isIntegerTy(64)) {
5153 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5154 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5155 }
5156
5157 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5158 cast<ConstantInt>(Arg2)->getZExtValue());
5159
5160 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5161 Value *Res = NewCall;
5162 if (Res->getType() != CI->getType())
5163 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5164 NewCall->takeName(CI);
5165 CI->replaceAllUsesWith(Res);
5166 CI->eraseFromParent();
5167 return;
5168 }
5169 case Intrinsic::nvvm_mapa_shared_cluster: {
5170 // Create a new call with the correct address space.
5171 NewCall =
5172 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5173 Value *Res = NewCall;
5174 Res = Builder.CreateAddrSpaceCast(
5175 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5176 NewCall->takeName(CI);
5177 CI->replaceAllUsesWith(Res);
5178 CI->eraseFromParent();
5179 return;
5180 }
5181 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5182 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5183 // Create a new call with the correct address space.
5184 SmallVector<Value *, 4> Args(CI->args());
5185 Args[0] = Builder.CreateAddrSpaceCast(
5186 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5187
5188 NewCall = Builder.CreateCall(NewFn, Args);
5189 NewCall->takeName(CI);
5190 CI->replaceAllUsesWith(NewCall);
5191 CI->eraseFromParent();
5192 return;
5193 }
5194 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5195 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5196 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5197 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5198 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5199 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5200 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5201 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5202 SmallVector<Value *, 16> Args(CI->args());
5203
5204 // Create AddrSpaceCast to shared_cluster if needed.
5205 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5206 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5208 Args[0] = Builder.CreateAddrSpaceCast(
5209 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5210
5211 // Attach the flag argument for cta_group, with a
5212 // default value of 0. This handles case (2) in
5213 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5214 size_t NumArgs = CI->arg_size();
5215 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5216 if (!FlagArg->getType()->isIntegerTy(1))
5217 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5218
5219 NewCall = Builder.CreateCall(NewFn, Args);
5220 NewCall->takeName(CI);
5221 CI->replaceAllUsesWith(NewCall);
5222 CI->eraseFromParent();
5223 return;
5224 }
5225 case Intrinsic::riscv_sha256sig0:
5226 case Intrinsic::riscv_sha256sig1:
5227 case Intrinsic::riscv_sha256sum0:
5228 case Intrinsic::riscv_sha256sum1:
5229 case Intrinsic::riscv_sm3p0:
5230 case Intrinsic::riscv_sm3p1: {
5231 // The last argument to these intrinsics used to be i8 and changed to i32.
5232 // The type overload for sm4ks and sm4ed was removed.
5233 if (!CI->getType()->isIntegerTy(64))
5234 return;
5235
5236 Value *Arg =
5237 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5238
5239 NewCall = Builder.CreateCall(NewFn, Arg);
5240 Value *Res =
5241 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5242 NewCall->takeName(CI);
5243 CI->replaceAllUsesWith(Res);
5244 CI->eraseFromParent();
5245 return;
5246 }
5247
5248 case Intrinsic::x86_xop_vfrcz_ss:
5249 case Intrinsic::x86_xop_vfrcz_sd:
5250 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5251 break;
5252
5253 case Intrinsic::x86_xop_vpermil2pd:
5254 case Intrinsic::x86_xop_vpermil2ps:
5255 case Intrinsic::x86_xop_vpermil2pd_256:
5256 case Intrinsic::x86_xop_vpermil2ps_256: {
5257 SmallVector<Value *, 4> Args(CI->args());
5258 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5259 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5260 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5261 NewCall = Builder.CreateCall(NewFn, Args);
5262 break;
5263 }
5264
5265 case Intrinsic::x86_sse41_ptestc:
5266 case Intrinsic::x86_sse41_ptestz:
5267 case Intrinsic::x86_sse41_ptestnzc: {
5268 // The arguments for these intrinsics used to be v4f32, and changed
5269 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5270 // So, the only thing required is a bitcast for both arguments.
5271 // First, check the arguments have the old type.
5272 Value *Arg0 = CI->getArgOperand(0);
5273 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5274 return;
5275
5276 // Old intrinsic, add bitcasts
5277 Value *Arg1 = CI->getArgOperand(1);
5278
5279 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5280
5281 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5282 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5283
5284 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5285 break;
5286 }
5287
5288 case Intrinsic::x86_rdtscp: {
5289 // This used to take 1 arguments. If we have no arguments, it is already
5290 // upgraded.
5291 if (CI->getNumOperands() == 0)
5292 return;
5293
5294 NewCall = Builder.CreateCall(NewFn);
5295 // Extract the second result and store it.
5296 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5297 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5298 // Replace the original call result with the first result of the new call.
5299 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5300
5301 NewCall->takeName(CI);
5302 CI->replaceAllUsesWith(TSC);
5303 CI->eraseFromParent();
5304 return;
5305 }
5306
5307 case Intrinsic::x86_sse41_insertps:
5308 case Intrinsic::x86_sse41_dppd:
5309 case Intrinsic::x86_sse41_dpps:
5310 case Intrinsic::x86_sse41_mpsadbw:
5311 case Intrinsic::x86_avx_dp_ps_256:
5312 case Intrinsic::x86_avx2_mpsadbw: {
5313 // Need to truncate the last argument from i32 to i8 -- this argument models
5314 // an inherently 8-bit immediate operand to these x86 instructions.
5315 SmallVector<Value *, 4> Args(CI->args());
5316
5317 // Replace the last argument with a trunc.
5318 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5319 NewCall = Builder.CreateCall(NewFn, Args);
5320 break;
5321 }
5322
5323 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5324 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5325 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5326 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5327 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5328 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5329 SmallVector<Value *, 4> Args(CI->args());
5330 unsigned NumElts =
5331 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5332 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5333
5334 NewCall = Builder.CreateCall(NewFn, Args);
5335 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5336
5337 NewCall->takeName(CI);
5338 CI->replaceAllUsesWith(Res);
5339 CI->eraseFromParent();
5340 return;
5341 }
5342
5343 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5344 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5345 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5346 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5347 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5348 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5349 SmallVector<Value *, 4> Args(CI->args());
5350 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5351 if (NewFn->getIntrinsicID() ==
5352 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5353 Args[1] = Builder.CreateBitCast(
5354 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5355
5356 NewCall = Builder.CreateCall(NewFn, Args);
5357 Value *Res = Builder.CreateBitCast(
5358 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5359
5360 NewCall->takeName(CI);
5361 CI->replaceAllUsesWith(Res);
5362 CI->eraseFromParent();
5363 return;
5364 }
5365 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5366 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5367 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5368 SmallVector<Value *, 4> Args(CI->args());
5369 unsigned NumElts =
5370 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5371 Args[1] = Builder.CreateBitCast(
5372 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5373 Args[2] = Builder.CreateBitCast(
5374 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5375
5376 NewCall = Builder.CreateCall(NewFn, Args);
5377 break;
5378 }
5379
5380 case Intrinsic::thread_pointer: {
5381 NewCall = Builder.CreateCall(NewFn, {});
5382 break;
5383 }
5384
5385 case Intrinsic::memcpy:
5386 case Intrinsic::memmove:
5387 case Intrinsic::memset: {
5388 // We have to make sure that the call signature is what we're expecting.
5389 // We only want to change the old signatures by removing the alignment arg:
5390 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5391 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5392 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5393 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5394 // Note: i8*'s in the above can be any pointer type
5395 if (CI->arg_size() != 5) {
5396 DefaultCase();
5397 return;
5398 }
5399 // Remove alignment argument (3), and add alignment attributes to the
5400 // dest/src pointers.
5401 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5402 CI->getArgOperand(2), CI->getArgOperand(4)};
5403 NewCall = Builder.CreateCall(NewFn, Args);
5404 AttributeList OldAttrs = CI->getAttributes();
5405 AttributeList NewAttrs = AttributeList::get(
5406 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5407 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5408 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5409 NewCall->setAttributes(NewAttrs);
5410 auto *MemCI = cast<MemIntrinsic>(NewCall);
5411 // All mem intrinsics support dest alignment.
5413 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5414 // Memcpy/Memmove also support source alignment.
5415 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5416 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5417 break;
5418 }
5419
5420 case Intrinsic::masked_load:
5421 case Intrinsic::masked_gather:
5422 case Intrinsic::masked_store:
5423 case Intrinsic::masked_scatter: {
5424 if (CI->arg_size() != 4) {
5425 DefaultCase();
5426 return;
5427 }
5428
5429 auto GetMaybeAlign = [](Value *Op) {
5430 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5431 uint64_t Val = CI->getZExtValue();
5432 if (Val == 0)
5433 return MaybeAlign();
5434 if (isPowerOf2_64(Val))
5435 return MaybeAlign(Val);
5436 }
5437 reportFatalUsageError("Invalid alignment argument");
5438 };
5439 auto GetAlign = [&](Value *Op) {
5440 MaybeAlign Align = GetMaybeAlign(Op);
5441 if (Align)
5442 return *Align;
5443 reportFatalUsageError("Invalid zero alignment argument");
5444 };
5445
5446 const DataLayout &DL = CI->getDataLayout();
5447 switch (NewFn->getIntrinsicID()) {
5448 case Intrinsic::masked_load:
5449 NewCall = Builder.CreateMaskedLoad(
5450 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5451 CI->getArgOperand(2), CI->getArgOperand(3));
5452 break;
5453 case Intrinsic::masked_gather:
5454 NewCall = Builder.CreateMaskedGather(
5455 CI->getType(), CI->getArgOperand(0),
5456 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5457 CI->getType()->getScalarType()),
5458 CI->getArgOperand(2), CI->getArgOperand(3));
5459 break;
5460 case Intrinsic::masked_store:
5461 NewCall = Builder.CreateMaskedStore(
5462 CI->getArgOperand(0), CI->getArgOperand(1),
5463 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5464 break;
5465 case Intrinsic::masked_scatter:
5466 NewCall = Builder.CreateMaskedScatter(
5467 CI->getArgOperand(0), CI->getArgOperand(1),
5468 DL.getValueOrABITypeAlignment(
5469 GetMaybeAlign(CI->getArgOperand(2)),
5470 CI->getArgOperand(0)->getType()->getScalarType()),
5471 CI->getArgOperand(3));
5472 break;
5473 default:
5474 llvm_unreachable("Unexpected intrinsic ID");
5475 }
5476 // Previous metadata is still valid.
5477 NewCall->copyMetadata(*CI);
5478 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5479 break;
5480 }
5481
5482 case Intrinsic::lifetime_start:
5483 case Intrinsic::lifetime_end: {
5484 if (CI->arg_size() != 2) {
5485 DefaultCase();
5486 return;
5487 }
5488
5489 Value *Ptr = CI->getArgOperand(1);
5490 // Try to strip pointer casts, such that the lifetime works on an alloca.
5491 Ptr = Ptr->stripPointerCasts();
5492 if (isa<AllocaInst>(Ptr)) {
5493 // Don't use NewFn, as we might have looked through an addrspacecast.
5494 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5495 NewCall = Builder.CreateLifetimeStart(Ptr);
5496 else
5497 NewCall = Builder.CreateLifetimeEnd(Ptr);
5498 break;
5499 }
5500
5501 // Otherwise remove the lifetime marker.
5502 CI->eraseFromParent();
5503 return;
5504 }
5505
5506 case Intrinsic::x86_avx512_vpdpbusd_128:
5507 case Intrinsic::x86_avx512_vpdpbusd_256:
5508 case Intrinsic::x86_avx512_vpdpbusd_512:
5509 case Intrinsic::x86_avx512_vpdpbusds_128:
5510 case Intrinsic::x86_avx512_vpdpbusds_256:
5511 case Intrinsic::x86_avx512_vpdpbusds_512:
5512 case Intrinsic::x86_avx2_vpdpbssd_128:
5513 case Intrinsic::x86_avx2_vpdpbssd_256:
5514 case Intrinsic::x86_avx10_vpdpbssd_512:
5515 case Intrinsic::x86_avx2_vpdpbssds_128:
5516 case Intrinsic::x86_avx2_vpdpbssds_256:
5517 case Intrinsic::x86_avx10_vpdpbssds_512:
5518 case Intrinsic::x86_avx2_vpdpbsud_128:
5519 case Intrinsic::x86_avx2_vpdpbsud_256:
5520 case Intrinsic::x86_avx10_vpdpbsud_512:
5521 case Intrinsic::x86_avx2_vpdpbsuds_128:
5522 case Intrinsic::x86_avx2_vpdpbsuds_256:
5523 case Intrinsic::x86_avx10_vpdpbsuds_512:
5524 case Intrinsic::x86_avx2_vpdpbuud_128:
5525 case Intrinsic::x86_avx2_vpdpbuud_256:
5526 case Intrinsic::x86_avx10_vpdpbuud_512:
5527 case Intrinsic::x86_avx2_vpdpbuuds_128:
5528 case Intrinsic::x86_avx2_vpdpbuuds_256:
5529 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5530 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5531 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5532 CI->getArgOperand(2)};
5533 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5534 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5535 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5536
5537 NewCall = Builder.CreateCall(NewFn, Args);
5538 break;
5539 }
5540 case Intrinsic::x86_avx512_vpdpwssd_128:
5541 case Intrinsic::x86_avx512_vpdpwssd_256:
5542 case Intrinsic::x86_avx512_vpdpwssd_512:
5543 case Intrinsic::x86_avx512_vpdpwssds_128:
5544 case Intrinsic::x86_avx512_vpdpwssds_256:
5545 case Intrinsic::x86_avx512_vpdpwssds_512:
5546 case Intrinsic::x86_avx2_vpdpwsud_128:
5547 case Intrinsic::x86_avx2_vpdpwsud_256:
5548 case Intrinsic::x86_avx10_vpdpwsud_512:
5549 case Intrinsic::x86_avx2_vpdpwsuds_128:
5550 case Intrinsic::x86_avx2_vpdpwsuds_256:
5551 case Intrinsic::x86_avx10_vpdpwsuds_512:
5552 case Intrinsic::x86_avx2_vpdpwusd_128:
5553 case Intrinsic::x86_avx2_vpdpwusd_256:
5554 case Intrinsic::x86_avx10_vpdpwusd_512:
5555 case Intrinsic::x86_avx2_vpdpwusds_128:
5556 case Intrinsic::x86_avx2_vpdpwusds_256:
5557 case Intrinsic::x86_avx10_vpdpwusds_512:
5558 case Intrinsic::x86_avx2_vpdpwuud_128:
5559 case Intrinsic::x86_avx2_vpdpwuud_256:
5560 case Intrinsic::x86_avx10_vpdpwuud_512:
5561 case Intrinsic::x86_avx2_vpdpwuuds_128:
5562 case Intrinsic::x86_avx2_vpdpwuuds_256:
5563 case Intrinsic::x86_avx10_vpdpwuuds_512:
5564 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5565 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5566 CI->getArgOperand(2)};
5567 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5568 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5569 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5570
5571 NewCall = Builder.CreateCall(NewFn, Args);
5572 break;
5573 }
5574 assert(NewCall && "Should have either set this variable or returned through "
5575 "the default case");
5576 NewCall->takeName(CI);
5577 CI->replaceAllUsesWith(NewCall);
5578 CI->eraseFromParent();
5579}
5580
5582 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5583
5584 // Check if this function should be upgraded and get the replacement function
5585 // if there is one.
5586 Function *NewFn;
5587 if (UpgradeIntrinsicFunction(F, NewFn)) {
5588 // Replace all users of the old function with the new function or new
5589 // instructions. This is not a range loop because the call is deleted.
5590 for (User *U : make_early_inc_range(F->users()))
5591 if (CallBase *CB = dyn_cast<CallBase>(U))
5592 UpgradeIntrinsicCall(CB, NewFn);
5593
5594 // Remove old function, no longer used, from the module.
5595 if (F != NewFn)
5596 F->eraseFromParent();
5597 }
5598}
5599
5601 const unsigned NumOperands = MD.getNumOperands();
5602 if (NumOperands == 0)
5603 return &MD; // Invalid, punt to a verifier error.
5604
5605 // Check if the tag uses struct-path aware TBAA format.
5606 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5607 return &MD;
5608
5609 auto &Context = MD.getContext();
5610 if (NumOperands == 3) {
5611 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5612 MDNode *ScalarType = MDNode::get(Context, Elts);
5613 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5614 Metadata *Elts2[] = {ScalarType, ScalarType,
5617 MD.getOperand(2)};
5618 return MDNode::get(Context, Elts2);
5619 }
5620 // Create a MDNode <MD, MD, offset 0>
5622 Type::getInt64Ty(Context)))};
5623 return MDNode::get(Context, Elts);
5624}
5625
5627 Instruction *&Temp) {
5628 if (Opc != Instruction::BitCast)
5629 return nullptr;
5630
5631 Temp = nullptr;
5632 Type *SrcTy = V->getType();
5633 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5634 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5635 LLVMContext &Context = V->getContext();
5636
5637 // We have no information about target data layout, so we assume that
5638 // the maximum pointer size is 64bit.
5639 Type *MidTy = Type::getInt64Ty(Context);
5640 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5641
5642 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5643 }
5644
5645 return nullptr;
5646}
5647
5649 if (Opc != Instruction::BitCast)
5650 return nullptr;
5651
5652 Type *SrcTy = C->getType();
5653 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5654 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5655 LLVMContext &Context = C->getContext();
5656
5657 // We have no information about target data layout, so we assume that
5658 // the maximum pointer size is 64bit.
5659 Type *MidTy = Type::getInt64Ty(Context);
5660
5662 DestTy);
5663 }
5664
5665 return nullptr;
5666}
5667
5668/// Check the debug info version number, if it is out-dated, drop the debug
5669/// info. Return true if module is modified.
5672 return false;
5673
5674 llvm::TimeTraceScope timeScope("Upgrade debug info");
5675 // We need to get metadata before the module is verified (i.e., getModuleFlag
5676 // makes assumptions that we haven't verified yet). Carefully extract the flag
5677 // from the metadata.
5678 unsigned Version = 0;
5679 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5680 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5681 if (Flag->getNumOperands() < 3)
5682 return false;
5683 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5684 return K->getString() == "Debug Info Version";
5685 return false;
5686 });
5687 if (OpIt != ModFlags->op_end()) {
5688 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5689 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5690 Version = CI->getZExtValue();
5691 }
5692 }
5693
5695 bool BrokenDebugInfo = false;
5696 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5697 report_fatal_error("Broken module found, compilation aborted!");
5698 if (!BrokenDebugInfo)
5699 // Everything is ok.
5700 return false;
5701 else {
5702 // Diagnose malformed debug info.
5704 M.getContext().diagnose(Diag);
5705 }
5706 }
5707 bool Modified = StripDebugInfo(M);
5709 // Diagnose a version mismatch.
5711 M.getContext().diagnose(DiagVersion);
5712 }
5713 return Modified;
5714}
5715
5716static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5717 GlobalValue *GV, const Metadata *V) {
5718 Function *F = cast<Function>(GV);
5719
5720 constexpr StringLiteral DefaultValue = "1";
5721 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5722 unsigned Length = 0;
5723
5724 if (F->hasFnAttribute(Attr)) {
5725 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5726 // parse these elements placing them into Vect3
5727 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5728 for (; Length < 3 && !S.empty(); Length++) {
5729 auto [Part, Rest] = S.split(',');
5730 Vect3[Length] = Part.trim();
5731 S = Rest;
5732 }
5733 }
5734
5735 const unsigned Dim = DimC - 'x';
5736 assert(Dim < 3 && "Unexpected dim char");
5737
5738 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5739
5740 // local variable required for StringRef in Vect3 to point to.
5741 const std::string VStr = llvm::utostr(VInt);
5742 Vect3[Dim] = VStr;
5743 Length = std::max(Length, Dim + 1);
5744
5745 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5746 F->addFnAttr(Attr, NewAttr);
5747}
5748
5749static inline bool isXYZ(StringRef S) {
5750 return S == "x" || S == "y" || S == "z";
5751}
5752
5754 const Metadata *V) {
5755 if (K == "kernel") {
5757 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5758 return true;
5759 }
5760 if (K == "align") {
5761 // V is a bitfeild specifying two 16-bit values. The alignment value is
5762 // specfied in low 16-bits, The index is specified in the high bits. For the
5763 // index, 0 indicates the return value while higher values correspond to
5764 // each parameter (idx = param + 1).
5765 const uint64_t AlignIdxValuePair =
5766 mdconst::extract<ConstantInt>(V)->getZExtValue();
5767 const unsigned Idx = (AlignIdxValuePair >> 16);
5768 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5769 cast<Function>(GV)->addAttributeAtIndex(
5770 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5771 return true;
5772 }
5773 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5774 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5775 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5776 return true;
5777 }
5778 if (K == "minctasm") {
5779 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5780 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5781 return true;
5782 }
5783 if (K == "maxnreg") {
5784 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5785 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5786 return true;
5787 }
5788 if (K.consume_front("maxntid") && isXYZ(K)) {
5789 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5790 return true;
5791 }
5792 if (K.consume_front("reqntid") && isXYZ(K)) {
5793 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5794 return true;
5795 }
5796 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5797 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5798 return true;
5799 }
5800 if (K == "grid_constant") {
5801 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5802 for (const auto &Op : cast<MDNode>(V)->operands()) {
5803 // For some reason, the index is 1-based in the metadata. Good thing we're
5804 // able to auto-upgrade it!
5805 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5806 cast<Function>(GV)->addParamAttr(Index, Attr);
5807 }
5808 return true;
5809 }
5810
5811 return false;
5812}
5813
5815 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5816 if (!NamedMD)
5817 return;
5818
5819 SmallVector<MDNode *, 8> NewNodes;
5821 for (MDNode *MD : NamedMD->operands()) {
5822 if (!SeenNodes.insert(MD).second)
5823 continue;
5824
5825 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5826 if (!GV)
5827 continue;
5828
5829 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5830
5831 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5832 // Each nvvm.annotations metadata entry will be of the following form:
5833 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5834 // start index = 1, to skip the global variable key
5835 // increment = 2, to skip the value for each property-value pairs
5836 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5837 MDString *K = cast<MDString>(MD->getOperand(j));
5838 const MDOperand &V = MD->getOperand(j + 1);
5839 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5840 if (!Upgraded)
5841 NewOperands.append({K, V});
5842 }
5843
5844 if (NewOperands.size() > 1)
5845 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5846 }
5847
5848 NamedMD->clearOperands();
5849 for (MDNode *N : NewNodes)
5850 NamedMD->addOperand(N);
5851}
5852
5853/// This checks for objc retain release marker which should be upgraded. It
5854/// returns true if module is modified.
5856 bool Changed = false;
5857 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5858 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5859 if (ModRetainReleaseMarker) {
5860 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5861 if (Op) {
5862 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5863 if (ID) {
5864 SmallVector<StringRef, 4> ValueComp;
5865 ID->getString().split(ValueComp, "#");
5866 if (ValueComp.size() == 2) {
5867 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5868 ID = MDString::get(M.getContext(), NewValue);
5869 }
5870 M.addModuleFlag(Module::Error, MarkerKey, ID);
5871 M.eraseNamedMetadata(ModRetainReleaseMarker);
5872 Changed = true;
5873 }
5874 }
5875 }
5876 return Changed;
5877}
5878
5880 // This lambda converts normal function calls to ARC runtime functions to
5881 // intrinsic calls.
5882 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5883 llvm::Intrinsic::ID IntrinsicFunc) {
5884 Function *Fn = M.getFunction(OldFunc);
5885
5886 if (!Fn)
5887 return;
5888
5889 Function *NewFn =
5890 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5891
5892 for (User *U : make_early_inc_range(Fn->users())) {
5894 if (!CI || CI->getCalledFunction() != Fn)
5895 continue;
5896
5897 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5898 FunctionType *NewFuncTy = NewFn->getFunctionType();
5900
5901 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5902 // value to the return type of the old function.
5903 if (NewFuncTy->getReturnType() != CI->getType() &&
5904 !CastInst::castIsValid(Instruction::BitCast, CI,
5905 NewFuncTy->getReturnType()))
5906 continue;
5907
5908 bool InvalidCast = false;
5909
5910 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5911 Value *Arg = CI->getArgOperand(I);
5912
5913 // Bitcast argument to the parameter type of the new function if it's
5914 // not a variadic argument.
5915 if (I < NewFuncTy->getNumParams()) {
5916 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5917 // to the parameter type of the new function.
5918 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5919 NewFuncTy->getParamType(I))) {
5920 InvalidCast = true;
5921 break;
5922 }
5923 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5924 }
5925 Args.push_back(Arg);
5926 }
5927
5928 if (InvalidCast)
5929 continue;
5930
5931 // Create a call instruction that calls the new function.
5932 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5933 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5934 NewCall->takeName(CI);
5935
5936 // Bitcast the return value back to the type of the old call.
5937 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5938
5939 if (!CI->use_empty())
5940 CI->replaceAllUsesWith(NewRetVal);
5941 CI->eraseFromParent();
5942 }
5943
5944 if (Fn->use_empty())
5945 Fn->eraseFromParent();
5946 };
5947
5948 // Unconditionally convert a call to "clang.arc.use" to a call to
5949 // "llvm.objc.clang.arc.use".
5950 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5951
5952 // Upgrade the retain release marker. If there is no need to upgrade
5953 // the marker, that means either the module is already new enough to contain
5954 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5956 return;
5957
5958 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5959 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5960 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5961 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5962 {"objc_autoreleaseReturnValue",
5963 llvm::Intrinsic::objc_autoreleaseReturnValue},
5964 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5965 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5966 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5967 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5968 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5969 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5970 {"objc_release", llvm::Intrinsic::objc_release},
5971 {"objc_retain", llvm::Intrinsic::objc_retain},
5972 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5973 {"objc_retainAutoreleaseReturnValue",
5974 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5975 {"objc_retainAutoreleasedReturnValue",
5976 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5977 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5978 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5979 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5980 {"objc_unsafeClaimAutoreleasedReturnValue",
5981 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5982 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5983 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5984 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5985 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5986 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5987 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5988 {"objc_arc_annotation_topdown_bbstart",
5989 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5990 {"objc_arc_annotation_topdown_bbend",
5991 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5992 {"objc_arc_annotation_bottomup_bbstart",
5993 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5994 {"objc_arc_annotation_bottomup_bbend",
5995 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5996
5997 for (auto &I : RuntimeFuncs)
5998 UpgradeToIntrinsic(I.first, I.second);
5999}
6000
6002 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6003 if (!ModFlags)
6004 return false;
6005
6006 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6007 bool HasSwiftVersionFlag = false;
6008 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6009 uint32_t SwiftABIVersion;
6010 auto Int8Ty = Type::getInt8Ty(M.getContext());
6011 auto Int32Ty = Type::getInt32Ty(M.getContext());
6012
6013 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6014 MDNode *Op = ModFlags->getOperand(I);
6015 if (Op->getNumOperands() != 3)
6016 continue;
6017 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6018 if (!ID)
6019 continue;
6020 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6021 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6022 Type::getInt32Ty(M.getContext()), B)),
6023 MDString::get(M.getContext(), ID->getString()),
6024 Op->getOperand(2)};
6025 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6026 Changed = true;
6027 };
6028
6029 if (ID->getString() == "Objective-C Image Info Version")
6030 HasObjCFlag = true;
6031 if (ID->getString() == "Objective-C Class Properties")
6032 HasClassProperties = true;
6033 // Upgrade PIC from Error/Max to Min.
6034 if (ID->getString() == "PIC Level") {
6035 if (auto *Behavior =
6037 uint64_t V = Behavior->getLimitedValue();
6038 if (V == Module::Error || V == Module::Max)
6039 SetBehavior(Module::Min);
6040 }
6041 }
6042 // Upgrade "PIE Level" from Error to Max.
6043 if (ID->getString() == "PIE Level")
6044 if (auto *Behavior =
6046 if (Behavior->getLimitedValue() == Module::Error)
6047 SetBehavior(Module::Max);
6048
6049 // Upgrade branch protection and return address signing module flags. The
6050 // module flag behavior for these fields were Error and now they are Min.
6051 if (ID->getString() == "branch-target-enforcement" ||
6052 ID->getString().starts_with("sign-return-address")) {
6053 if (auto *Behavior =
6055 if (Behavior->getLimitedValue() == Module::Error) {
6056 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6057 Metadata *Ops[3] = {
6058 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6059 Op->getOperand(1), Op->getOperand(2)};
6060 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6061 Changed = true;
6062 }
6063 }
6064 }
6065
6066 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6067 // section name so that llvm-lto will not complain about mismatching
6068 // module flags that is functionally the same.
6069 if (ID->getString() == "Objective-C Image Info Section") {
6070 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6071 SmallVector<StringRef, 4> ValueComp;
6072 Value->getString().split(ValueComp, " ");
6073 if (ValueComp.size() != 1) {
6074 std::string NewValue;
6075 for (auto &S : ValueComp)
6076 NewValue += S.str();
6077 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6078 MDString::get(M.getContext(), NewValue)};
6079 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6080 Changed = true;
6081 }
6082 }
6083 }
6084
6085 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6086 // If the higher bits are set, it adds new module flag for swift info.
6087 if (ID->getString() == "Objective-C Garbage Collection") {
6088 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6089 if (Md) {
6090 assert(Md->getValue() && "Expected non-empty metadata");
6091 auto Type = Md->getValue()->getType();
6092 if (Type == Int8Ty)
6093 continue;
6094 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6095 if ((Val & 0xff) != Val) {
6096 HasSwiftVersionFlag = true;
6097 SwiftABIVersion = (Val & 0xff00) >> 8;
6098 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6099 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6100 }
6101 Metadata *Ops[3] = {
6103 Op->getOperand(1),
6104 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6105 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6106 Changed = true;
6107 }
6108 }
6109
6110 if (ID->getString() == "amdgpu_code_object_version") {
6111 Metadata *Ops[3] = {
6112 Op->getOperand(0),
6113 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6114 Op->getOperand(2)};
6115 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6116 Changed = true;
6117 }
6118 }
6119
6120 // "Objective-C Class Properties" is recently added for Objective-C. We
6121 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6122 // flag of value 0, so we can correclty downgrade this flag when trying to
6123 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6124 // this module flag.
6125 if (HasObjCFlag && !HasClassProperties) {
6126 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6127 (uint32_t)0);
6128 Changed = true;
6129 }
6130
6131 if (HasSwiftVersionFlag) {
6132 M.addModuleFlag(Module::Error, "Swift ABI Version",
6133 SwiftABIVersion);
6134 M.addModuleFlag(Module::Error, "Swift Major Version",
6135 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6136 M.addModuleFlag(Module::Error, "Swift Minor Version",
6137 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6138 Changed = true;
6139 }
6140
6141 return Changed;
6142}
6143
6145 auto TrimSpaces = [](StringRef Section) -> std::string {
6146 SmallVector<StringRef, 5> Components;
6147 Section.split(Components, ',');
6148
6149 SmallString<32> Buffer;
6150 raw_svector_ostream OS(Buffer);
6151
6152 for (auto Component : Components)
6153 OS << ',' << Component.trim();
6154
6155 return std::string(OS.str().substr(1));
6156 };
6157
6158 for (auto &GV : M.globals()) {
6159 if (!GV.hasSection())
6160 continue;
6161
6162 StringRef Section = GV.getSection();
6163
6164 if (!Section.starts_with("__DATA, __objc_catlist"))
6165 continue;
6166
6167 // __DATA, __objc_catlist, regular, no_dead_strip
6168 // __DATA,__objc_catlist,regular,no_dead_strip
6169 GV.setSection(TrimSpaces(Section));
6170 }
6171}
6172
6173namespace {
6174// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6175// callsites within a function that did not also have the strictfp attribute.
6176// Since 10.0, if strict FP semantics are needed within a function, the
6177// function must have the strictfp attribute and all calls within the function
6178// must also have the strictfp attribute. This latter restriction is
6179// necessary to prevent unwanted libcall simplification when a function is
6180// being cloned (such as for inlining).
6181//
6182// The "dangling" strictfp attribute usage was only used to prevent constant
6183// folding and other libcall simplification. The nobuiltin attribute on the
6184// callsite has the same effect.
6185struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6186 StrictFPUpgradeVisitor() = default;
6187
6188 void visitCallBase(CallBase &Call) {
6189 if (!Call.isStrictFP())
6190 return;
6192 return;
6193 // If we get here, the caller doesn't have the strictfp attribute
6194 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6195 Call.removeFnAttr(Attribute::StrictFP);
6196 Call.addFnAttr(Attribute::NoBuiltin);
6197 }
6198};
6199
6200/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6201struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6202 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6203 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6204
6205 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6206 if (!RMW.isFloatingPointOperation())
6207 return;
6208
6209 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6210 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6211 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6212 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6213 }
6214};
6215} // namespace
6216
6218 // If a function definition doesn't have the strictfp attribute,
6219 // convert any callsite strictfp attributes to nobuiltin.
6220 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6221 StrictFPUpgradeVisitor SFPV;
6222 SFPV.visit(F);
6223 }
6224
6225 // Remove all incompatibile attributes from function.
6226 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6227 F.getReturnType(), F.getAttributes().getRetAttrs()));
6228 for (auto &Arg : F.args())
6229 Arg.removeAttrs(
6230 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6231
6232 // Older versions of LLVM treated an "implicit-section-name" attribute
6233 // similarly to directly setting the section on a Function.
6234 if (Attribute A = F.getFnAttribute("implicit-section-name");
6235 A.isValid() && A.isStringAttribute()) {
6236 F.setSection(A.getValueAsString());
6237 F.removeFnAttr("implicit-section-name");
6238 }
6239
6240 if (!F.empty()) {
6241 // For some reason this is called twice, and the first time is before any
6242 // instructions are loaded into the body.
6243
6244 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6245 A.isValid()) {
6246
6247 if (A.getValueAsBool()) {
6248 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6249 Visitor.visit(F);
6250 }
6251
6252 // We will leave behind dead attribute uses on external declarations, but
6253 // clang never added these to declarations anyway.
6254 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6255 }
6256 }
6257}
6258
6259// Check if the function attribute is not present and set it.
6261 StringRef Value) {
6262 if (!F.hasFnAttribute(FnAttrName))
6263 F.addFnAttr(FnAttrName, Value);
6264}
6265
6266// Check if the function attribute is not present and set it if needed.
6267// If the attribute is "false" then removes it.
6268// If the attribute is "true" resets it to a valueless attribute.
6269static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6270 if (!F.hasFnAttribute(FnAttrName)) {
6271 if (Set)
6272 F.addFnAttr(FnAttrName);
6273 } else {
6274 auto A = F.getFnAttribute(FnAttrName);
6275 if ("false" == A.getValueAsString())
6276 F.removeFnAttr(FnAttrName);
6277 else if ("true" == A.getValueAsString()) {
6278 F.removeFnAttr(FnAttrName);
6279 F.addFnAttr(FnAttrName);
6280 }
6281 }
6282}
6283
6285 Triple T(M.getTargetTriple());
6286 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6287 return;
6288
6289 uint64_t BTEValue = 0;
6290 uint64_t BPPLRValue = 0;
6291 uint64_t GCSValue = 0;
6292 uint64_t SRAValue = 0;
6293 uint64_t SRAALLValue = 0;
6294 uint64_t SRABKeyValue = 0;
6295
6296 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6297 if (ModFlags) {
6298 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6299 MDNode *Op = ModFlags->getOperand(I);
6300 if (Op->getNumOperands() != 3)
6301 continue;
6302
6303 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6304 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6305 if (!ID || !CI)
6306 continue;
6307
6308 StringRef IDStr = ID->getString();
6309 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6310 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6311 : IDStr == "guarded-control-stack" ? &GCSValue
6312 : IDStr == "sign-return-address" ? &SRAValue
6313 : IDStr == "sign-return-address-all" ? &SRAALLValue
6314 : IDStr == "sign-return-address-with-bkey"
6315 ? &SRABKeyValue
6316 : nullptr;
6317 if (!ValPtr)
6318 continue;
6319
6320 *ValPtr = CI->getZExtValue();
6321 if (*ValPtr == 2)
6322 return;
6323 }
6324 }
6325
6326 bool BTE = BTEValue == 1;
6327 bool BPPLR = BPPLRValue == 1;
6328 bool GCS = GCSValue == 1;
6329 bool SRA = SRAValue == 1;
6330
6331 StringRef SignTypeValue = "non-leaf";
6332 if (SRA && SRAALLValue == 1)
6333 SignTypeValue = "all";
6334
6335 StringRef SignKeyValue = "a_key";
6336 if (SRA && SRABKeyValue == 1)
6337 SignKeyValue = "b_key";
6338
6339 for (Function &F : M.getFunctionList()) {
6340 if (F.isDeclaration())
6341 continue;
6342
6343 if (SRA) {
6344 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6345 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6346 } else {
6347 if (auto A = F.getFnAttribute("sign-return-address");
6348 A.isValid() && "none" == A.getValueAsString()) {
6349 F.removeFnAttr("sign-return-address");
6350 F.removeFnAttr("sign-return-address-key");
6351 }
6352 }
6353 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6354 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6355 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6356 }
6357
6358 if (BTE)
6359 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6360 if (BPPLR)
6361 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6362 if (GCS)
6363 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6364 if (SRA) {
6365 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6366 if (SRAALLValue == 1)
6367 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6368 if (SRABKeyValue == 1)
6369 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6370 }
6371}
6372
6373static bool isOldLoopArgument(Metadata *MD) {
6374 auto *T = dyn_cast_or_null<MDTuple>(MD);
6375 if (!T)
6376 return false;
6377 if (T->getNumOperands() < 1)
6378 return false;
6379 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6380 if (!S)
6381 return false;
6382 return S->getString().starts_with("llvm.vectorizer.");
6383}
6384
6386 StringRef OldPrefix = "llvm.vectorizer.";
6387 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6388
6389 if (OldTag == "llvm.vectorizer.unroll")
6390 return MDString::get(C, "llvm.loop.interleave.count");
6391
6392 return MDString::get(
6393 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6394 .str());
6395}
6396
6398 auto *T = dyn_cast_or_null<MDTuple>(MD);
6399 if (!T)
6400 return MD;
6401 if (T->getNumOperands() < 1)
6402 return MD;
6403 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6404 if (!OldTag)
6405 return MD;
6406 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6407 return MD;
6408
6409 // This has an old tag. Upgrade it.
6411 Ops.reserve(T->getNumOperands());
6412 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6413 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6414 Ops.push_back(T->getOperand(I));
6415
6416 return MDTuple::get(T->getContext(), Ops);
6417}
6418
6420 auto *T = dyn_cast<MDTuple>(&N);
6421 if (!T)
6422 return &N;
6423
6424 if (none_of(T->operands(), isOldLoopArgument))
6425 return &N;
6426
6428 Ops.reserve(T->getNumOperands());
6429 for (Metadata *MD : T->operands())
6430 Ops.push_back(upgradeLoopArgument(MD));
6431
6432 return MDTuple::get(T->getContext(), Ops);
6433}
6434
6436 Triple T(TT);
6437 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6438 // the address space of globals to 1. This does not apply to SPIRV Logical.
6439 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6440 !DL.contains("-G") && !DL.starts_with("G")) {
6441 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6442 }
6443
6444 if (T.isLoongArch64() || T.isRISCV64()) {
6445 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6446 auto I = DL.find("-n64-");
6447 if (I != StringRef::npos)
6448 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6449 return DL.str();
6450 }
6451
6452 // AMDGPU data layout upgrades.
6453 std::string Res = DL.str();
6454 if (T.isAMDGPU()) {
6455 // Define address spaces for constants.
6456 if (!DL.contains("-G") && !DL.starts_with("G"))
6457 Res.append(Res.empty() ? "G1" : "-G1");
6458
6459 // AMDGCN data layout upgrades.
6460 if (T.isAMDGCN()) {
6461
6462 // Add missing non-integral declarations.
6463 // This goes before adding new address spaces to prevent incoherent string
6464 // values.
6465 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6466 Res.append("-ni:7:8:9");
6467 // Update ni:7 to ni:7:8:9.
6468 if (DL.ends_with("ni:7"))
6469 Res.append(":8:9");
6470 if (DL.ends_with("ni:7:8"))
6471 Res.append(":9");
6472
6473 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6474 // resources) An empty data layout has already been upgraded to G1 by now.
6475 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6476 Res.append("-p7:160:256:256:32");
6477 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6478 Res.append("-p8:128:128:128:48");
6479 constexpr StringRef OldP8("-p8:128:128-");
6480 if (DL.contains(OldP8))
6481 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6482 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6483 Res.append("-p9:192:256:256:32");
6484 }
6485
6486 // Upgrade the ELF mangling mode.
6487 if (!DL.contains("m:e"))
6488 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6489
6490 return Res;
6491 }
6492
6493 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6494 // If the datalayout matches the expected format, add pointer size address
6495 // spaces to the datalayout.
6496 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6497 if (!DL.contains(AddrSpaces)) {
6499 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6500 if (R.match(Res, &Groups))
6501 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6502 }
6503 };
6504
6505 // AArch64 data layout upgrades.
6506 if (T.isAArch64()) {
6507 // Add "-Fn32"
6508 if (!DL.empty() && !DL.contains("-Fn32"))
6509 Res.append("-Fn32");
6510 AddPtr32Ptr64AddrSpaces();
6511 return Res;
6512 }
6513
6514 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6515 T.isWasm()) {
6516 // Mips64 with o32 ABI did not add "-i128:128".
6517 // Add "-i128:128"
6518 std::string I64 = "-i64:64";
6519 std::string I128 = "-i128:128";
6520 if (!StringRef(Res).contains(I128)) {
6521 size_t Pos = Res.find(I64);
6522 if (Pos != size_t(-1))
6523 Res.insert(Pos + I64.size(), I128);
6524 }
6525 }
6526
6527 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6528 size_t Pos = Res.find("-S128");
6529 if (Pos == StringRef::npos)
6530 Pos = Res.size();
6531 Res.insert(Pos, "-f64:32:64");
6532 }
6533
6534 if (!T.isX86())
6535 return Res;
6536
6537 AddPtr32Ptr64AddrSpaces();
6538
6539 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6540 // for i128 operations prior to this being reflected in the data layout, and
6541 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6542 // boundaries, so although this is a breaking change, the upgrade is expected
6543 // to fix more IR than it breaks.
6544 // Intel MCU is an exception and uses 4-byte-alignment.
6545 if (!T.isOSIAMCU()) {
6546 std::string I128 = "-i128:128";
6547 if (StringRef Ref = Res; !Ref.contains(I128)) {
6549 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6550 if (R.match(Res, &Groups))
6551 Res = (Groups[1] + I128 + Groups[3]).str();
6552 }
6553 }
6554
6555 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6556 // Raising the alignment is safe because Clang did not produce f80 values in
6557 // the MSVC environment before this upgrade was added.
6558 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6559 StringRef Ref = Res;
6560 auto I = Ref.find("-f80:32-");
6561 if (I != StringRef::npos)
6562 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6563 }
6564
6565 return Res;
6566}
6567
6568void llvm::UpgradeAttributes(AttrBuilder &B) {
6569 StringRef FramePointer;
6570 Attribute A = B.getAttribute("no-frame-pointer-elim");
6571 if (A.isValid()) {
6572 // The value can be "true" or "false".
6573 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6574 B.removeAttribute("no-frame-pointer-elim");
6575 }
6576 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6577 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6578 if (FramePointer != "all")
6579 FramePointer = "non-leaf";
6580 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6581 }
6582 if (!FramePointer.empty())
6583 B.addAttribute("frame-pointer", FramePointer);
6584
6585 A = B.getAttribute("null-pointer-is-valid");
6586 if (A.isValid()) {
6587 // The value can be "true" or "false".
6588 bool NullPointerIsValid = A.getValueAsString() == "true";
6589 B.removeAttribute("null-pointer-is-valid");
6590 if (NullPointerIsValid)
6591 B.addAttribute(Attribute::NullPointerIsValid);
6592 }
6593}
6594
6595void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6596 // clang.arc.attachedcall bundles are now required to have an operand.
6597 // If they don't, it's okay to drop them entirely: when there is an operand,
6598 // the "attachedcall" is meaningful and required, but without an operand,
6599 // it's just a marker NOP. Dropping it merely prevents an optimization.
6600 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6601 return OBD.getTag() == "clang.arc.attachedcall" &&
6602 OBD.inputs().empty();
6603 });
6604}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:222
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:171
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:826
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:233
unsigned getNumOperands() const
Definition User.h:255
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2168
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106