LLVM 20.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Value.h"
41#include "llvm/IR/Verifier.h"
45#include "llvm/Support/Regex.h"
47#include <cstring>
48
49using namespace llvm;
50
51static cl::opt<bool>
52 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
53 cl::desc("Disable autoupgrade of debug info"));
54
55static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
56
57// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
58// changed their type from v4f32 to v2i64.
60 Function *&NewFn) {
61 // Check whether this is an old version of the function, which received
62 // v4f32 arguments.
63 Type *Arg0Type = F->getFunctionType()->getParamType(0);
64 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
65 return false;
66
67 // Yes, it's old, replace it with new version.
68 rename(F);
69 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
70 return true;
71}
72
73// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
74// arguments have changed their type from i32 to i8.
76 Function *&NewFn) {
77 // Check that the last argument is an i32.
78 Type *LastArgType = F->getFunctionType()->getParamType(
79 F->getFunctionType()->getNumParams() - 1);
80 if (!LastArgType->isIntegerTy(32))
81 return false;
82
83 // Move this function aside and map down.
84 rename(F);
85 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
86 return true;
87}
88
89// Upgrade the declaration of fp compare intrinsics that change return type
90// from scalar to vXi1 mask.
92 Function *&NewFn) {
93 // Check if the return type is a vector.
94 if (F->getReturnType()->isVectorTy())
95 return false;
96
97 rename(F);
98 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
99 return true;
100}
101
103 Function *&NewFn) {
104 if (F->getReturnType()->getScalarType()->isBFloatTy())
105 return false;
106
107 rename(F);
108 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
109 return true;
110}
111
113 Function *&NewFn) {
114 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
115 return false;
116
117 rename(F);
118 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
119 return true;
120}
121
123 // All of the intrinsics matches below should be marked with which llvm
124 // version started autoupgrading them. At some point in the future we would
125 // like to use this information to remove upgrade code for some older
126 // intrinsics. It is currently undecided how we will determine that future
127 // point.
128 if (Name.consume_front("avx."))
129 return (Name.starts_with("blend.p") || // Added in 3.7
130 Name == "cvt.ps2.pd.256" || // Added in 3.9
131 Name == "cvtdq2.pd.256" || // Added in 3.9
132 Name == "cvtdq2.ps.256" || // Added in 7.0
133 Name.starts_with("movnt.") || // Added in 3.2
134 Name.starts_with("sqrt.p") || // Added in 7.0
135 Name.starts_with("storeu.") || // Added in 3.9
136 Name.starts_with("vbroadcast.s") || // Added in 3.5
137 Name.starts_with("vbroadcastf128") || // Added in 4.0
138 Name.starts_with("vextractf128.") || // Added in 3.7
139 Name.starts_with("vinsertf128.") || // Added in 3.7
140 Name.starts_with("vperm2f128.") || // Added in 6.0
141 Name.starts_with("vpermil.")); // Added in 3.1
142
143 if (Name.consume_front("avx2."))
144 return (Name == "movntdqa" || // Added in 5.0
145 Name.starts_with("pabs.") || // Added in 6.0
146 Name.starts_with("padds.") || // Added in 8.0
147 Name.starts_with("paddus.") || // Added in 8.0
148 Name.starts_with("pblendd.") || // Added in 3.7
149 Name == "pblendw" || // Added in 3.7
150 Name.starts_with("pbroadcast") || // Added in 3.8
151 Name.starts_with("pcmpeq.") || // Added in 3.1
152 Name.starts_with("pcmpgt.") || // Added in 3.1
153 Name.starts_with("pmax") || // Added in 3.9
154 Name.starts_with("pmin") || // Added in 3.9
155 Name.starts_with("pmovsx") || // Added in 3.9
156 Name.starts_with("pmovzx") || // Added in 3.9
157 Name == "pmul.dq" || // Added in 7.0
158 Name == "pmulu.dq" || // Added in 7.0
159 Name.starts_with("psll.dq") || // Added in 3.7
160 Name.starts_with("psrl.dq") || // Added in 3.7
161 Name.starts_with("psubs.") || // Added in 8.0
162 Name.starts_with("psubus.") || // Added in 8.0
163 Name.starts_with("vbroadcast") || // Added in 3.8
164 Name == "vbroadcasti128" || // Added in 3.7
165 Name == "vextracti128" || // Added in 3.7
166 Name == "vinserti128" || // Added in 3.7
167 Name == "vperm2i128"); // Added in 6.0
168
169 if (Name.consume_front("avx512.")) {
170 if (Name.consume_front("mask."))
171 // 'avx512.mask.*'
172 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
173 Name.starts_with("and.") || // Added in 3.9
174 Name.starts_with("andn.") || // Added in 3.9
175 Name.starts_with("broadcast.s") || // Added in 3.9
176 Name.starts_with("broadcastf32x4.") || // Added in 6.0
177 Name.starts_with("broadcastf32x8.") || // Added in 6.0
178 Name.starts_with("broadcastf64x2.") || // Added in 6.0
179 Name.starts_with("broadcastf64x4.") || // Added in 6.0
180 Name.starts_with("broadcasti32x4.") || // Added in 6.0
181 Name.starts_with("broadcasti32x8.") || // Added in 6.0
182 Name.starts_with("broadcasti64x2.") || // Added in 6.0
183 Name.starts_with("broadcasti64x4.") || // Added in 6.0
184 Name.starts_with("cmp.b") || // Added in 5.0
185 Name.starts_with("cmp.d") || // Added in 5.0
186 Name.starts_with("cmp.q") || // Added in 5.0
187 Name.starts_with("cmp.w") || // Added in 5.0
188 Name.starts_with("compress.b") || // Added in 9.0
189 Name.starts_with("compress.d") || // Added in 9.0
190 Name.starts_with("compress.p") || // Added in 9.0
191 Name.starts_with("compress.q") || // Added in 9.0
192 Name.starts_with("compress.store.") || // Added in 7.0
193 Name.starts_with("compress.w") || // Added in 9.0
194 Name.starts_with("conflict.") || // Added in 9.0
195 Name.starts_with("cvtdq2pd.") || // Added in 4.0
196 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
197 Name == "cvtpd2dq.256" || // Added in 7.0
198 Name == "cvtpd2ps.256" || // Added in 7.0
199 Name == "cvtps2pd.128" || // Added in 7.0
200 Name == "cvtps2pd.256" || // Added in 7.0
201 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
202 Name == "cvtqq2ps.256" || // Added in 9.0
203 Name == "cvtqq2ps.512" || // Added in 9.0
204 Name == "cvttpd2dq.256" || // Added in 7.0
205 Name == "cvttps2dq.128" || // Added in 7.0
206 Name == "cvttps2dq.256" || // Added in 7.0
207 Name.starts_with("cvtudq2pd.") || // Added in 4.0
208 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
209 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
210 Name == "cvtuqq2ps.256" || // Added in 9.0
211 Name == "cvtuqq2ps.512" || // Added in 9.0
212 Name.starts_with("dbpsadbw.") || // Added in 7.0
213 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
214 Name.starts_with("expand.b") || // Added in 9.0
215 Name.starts_with("expand.d") || // Added in 9.0
216 Name.starts_with("expand.load.") || // Added in 7.0
217 Name.starts_with("expand.p") || // Added in 9.0
218 Name.starts_with("expand.q") || // Added in 9.0
219 Name.starts_with("expand.w") || // Added in 9.0
220 Name.starts_with("fpclass.p") || // Added in 7.0
221 Name.starts_with("insert") || // Added in 4.0
222 Name.starts_with("load.") || // Added in 3.9
223 Name.starts_with("loadu.") || // Added in 3.9
224 Name.starts_with("lzcnt.") || // Added in 5.0
225 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
226 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
227 Name.starts_with("movddup") || // Added in 3.9
228 Name.starts_with("move.s") || // Added in 4.0
229 Name.starts_with("movshdup") || // Added in 3.9
230 Name.starts_with("movsldup") || // Added in 3.9
231 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
232 Name.starts_with("or.") || // Added in 3.9
233 Name.starts_with("pabs.") || // Added in 6.0
234 Name.starts_with("packssdw.") || // Added in 5.0
235 Name.starts_with("packsswb.") || // Added in 5.0
236 Name.starts_with("packusdw.") || // Added in 5.0
237 Name.starts_with("packuswb.") || // Added in 5.0
238 Name.starts_with("padd.") || // Added in 4.0
239 Name.starts_with("padds.") || // Added in 8.0
240 Name.starts_with("paddus.") || // Added in 8.0
241 Name.starts_with("palignr.") || // Added in 3.9
242 Name.starts_with("pand.") || // Added in 3.9
243 Name.starts_with("pandn.") || // Added in 3.9
244 Name.starts_with("pavg") || // Added in 6.0
245 Name.starts_with("pbroadcast") || // Added in 6.0
246 Name.starts_with("pcmpeq.") || // Added in 3.9
247 Name.starts_with("pcmpgt.") || // Added in 3.9
248 Name.starts_with("perm.df.") || // Added in 3.9
249 Name.starts_with("perm.di.") || // Added in 3.9
250 Name.starts_with("permvar.") || // Added in 7.0
251 Name.starts_with("pmaddubs.w.") || // Added in 7.0
252 Name.starts_with("pmaddw.d.") || // Added in 7.0
253 Name.starts_with("pmax") || // Added in 4.0
254 Name.starts_with("pmin") || // Added in 4.0
255 Name == "pmov.qd.256" || // Added in 9.0
256 Name == "pmov.qd.512" || // Added in 9.0
257 Name == "pmov.wb.256" || // Added in 9.0
258 Name == "pmov.wb.512" || // Added in 9.0
259 Name.starts_with("pmovsx") || // Added in 4.0
260 Name.starts_with("pmovzx") || // Added in 4.0
261 Name.starts_with("pmul.dq.") || // Added in 4.0
262 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
263 Name.starts_with("pmulh.w.") || // Added in 7.0
264 Name.starts_with("pmulhu.w.") || // Added in 7.0
265 Name.starts_with("pmull.") || // Added in 4.0
266 Name.starts_with("pmultishift.qb.") || // Added in 8.0
267 Name.starts_with("pmulu.dq.") || // Added in 4.0
268 Name.starts_with("por.") || // Added in 3.9
269 Name.starts_with("prol.") || // Added in 8.0
270 Name.starts_with("prolv.") || // Added in 8.0
271 Name.starts_with("pror.") || // Added in 8.0
272 Name.starts_with("prorv.") || // Added in 8.0
273 Name.starts_with("pshuf.b.") || // Added in 4.0
274 Name.starts_with("pshuf.d.") || // Added in 3.9
275 Name.starts_with("pshufh.w.") || // Added in 3.9
276 Name.starts_with("pshufl.w.") || // Added in 3.9
277 Name.starts_with("psll.d") || // Added in 4.0
278 Name.starts_with("psll.q") || // Added in 4.0
279 Name.starts_with("psll.w") || // Added in 4.0
280 Name.starts_with("pslli") || // Added in 4.0
281 Name.starts_with("psllv") || // Added in 4.0
282 Name.starts_with("psra.d") || // Added in 4.0
283 Name.starts_with("psra.q") || // Added in 4.0
284 Name.starts_with("psra.w") || // Added in 4.0
285 Name.starts_with("psrai") || // Added in 4.0
286 Name.starts_with("psrav") || // Added in 4.0
287 Name.starts_with("psrl.d") || // Added in 4.0
288 Name.starts_with("psrl.q") || // Added in 4.0
289 Name.starts_with("psrl.w") || // Added in 4.0
290 Name.starts_with("psrli") || // Added in 4.0
291 Name.starts_with("psrlv") || // Added in 4.0
292 Name.starts_with("psub.") || // Added in 4.0
293 Name.starts_with("psubs.") || // Added in 8.0
294 Name.starts_with("psubus.") || // Added in 8.0
295 Name.starts_with("pternlog.") || // Added in 7.0
296 Name.starts_with("punpckh") || // Added in 3.9
297 Name.starts_with("punpckl") || // Added in 3.9
298 Name.starts_with("pxor.") || // Added in 3.9
299 Name.starts_with("shuf.f") || // Added in 6.0
300 Name.starts_with("shuf.i") || // Added in 6.0
301 Name.starts_with("shuf.p") || // Added in 4.0
302 Name.starts_with("sqrt.p") || // Added in 7.0
303 Name.starts_with("store.b.") || // Added in 3.9
304 Name.starts_with("store.d.") || // Added in 3.9
305 Name.starts_with("store.p") || // Added in 3.9
306 Name.starts_with("store.q.") || // Added in 3.9
307 Name.starts_with("store.w.") || // Added in 3.9
308 Name == "store.ss" || // Added in 7.0
309 Name.starts_with("storeu.") || // Added in 3.9
310 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
311 Name.starts_with("ucmp.") || // Added in 5.0
312 Name.starts_with("unpckh.") || // Added in 3.9
313 Name.starts_with("unpckl.") || // Added in 3.9
314 Name.starts_with("valign.") || // Added in 4.0
315 Name == "vcvtph2ps.128" || // Added in 11.0
316 Name == "vcvtph2ps.256" || // Added in 11.0
317 Name.starts_with("vextract") || // Added in 4.0
318 Name.starts_with("vfmadd.") || // Added in 7.0
319 Name.starts_with("vfmaddsub.") || // Added in 7.0
320 Name.starts_with("vfnmadd.") || // Added in 7.0
321 Name.starts_with("vfnmsub.") || // Added in 7.0
322 Name.starts_with("vpdpbusd.") || // Added in 7.0
323 Name.starts_with("vpdpbusds.") || // Added in 7.0
324 Name.starts_with("vpdpwssd.") || // Added in 7.0
325 Name.starts_with("vpdpwssds.") || // Added in 7.0
326 Name.starts_with("vpermi2var.") || // Added in 7.0
327 Name.starts_with("vpermil.p") || // Added in 3.9
328 Name.starts_with("vpermilvar.") || // Added in 4.0
329 Name.starts_with("vpermt2var.") || // Added in 7.0
330 Name.starts_with("vpmadd52") || // Added in 7.0
331 Name.starts_with("vpshld.") || // Added in 7.0
332 Name.starts_with("vpshldv.") || // Added in 8.0
333 Name.starts_with("vpshrd.") || // Added in 7.0
334 Name.starts_with("vpshrdv.") || // Added in 8.0
335 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
336 Name.starts_with("xor.")); // Added in 3.9
337
338 if (Name.consume_front("mask3."))
339 // 'avx512.mask3.*'
340 return (Name.starts_with("vfmadd.") || // Added in 7.0
341 Name.starts_with("vfmaddsub.") || // Added in 7.0
342 Name.starts_with("vfmsub.") || // Added in 7.0
343 Name.starts_with("vfmsubadd.") || // Added in 7.0
344 Name.starts_with("vfnmsub.")); // Added in 7.0
345
346 if (Name.consume_front("maskz."))
347 // 'avx512.maskz.*'
348 return (Name.starts_with("pternlog.") || // Added in 7.0
349 Name.starts_with("vfmadd.") || // Added in 7.0
350 Name.starts_with("vfmaddsub.") || // Added in 7.0
351 Name.starts_with("vpdpbusd.") || // Added in 7.0
352 Name.starts_with("vpdpbusds.") || // Added in 7.0
353 Name.starts_with("vpdpwssd.") || // Added in 7.0
354 Name.starts_with("vpdpwssds.") || // Added in 7.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshldv.") || // Added in 8.0
358 Name.starts_with("vpshrdv.")); // Added in 8.0
359
360 // 'avx512.*'
361 return (Name == "movntdqa" || // Added in 5.0
362 Name == "pmul.dq.512" || // Added in 7.0
363 Name == "pmulu.dq.512" || // Added in 7.0
364 Name.starts_with("broadcastm") || // Added in 6.0
365 Name.starts_with("cmp.p") || // Added in 12.0
366 Name.starts_with("cvtb2mask.") || // Added in 7.0
367 Name.starts_with("cvtd2mask.") || // Added in 7.0
368 Name.starts_with("cvtmask2") || // Added in 5.0
369 Name.starts_with("cvtq2mask.") || // Added in 7.0
370 Name == "cvtusi2sd" || // Added in 7.0
371 Name.starts_with("cvtw2mask.") || // Added in 7.0
372 Name == "kand.w" || // Added in 7.0
373 Name == "kandn.w" || // Added in 7.0
374 Name == "knot.w" || // Added in 7.0
375 Name == "kor.w" || // Added in 7.0
376 Name == "kortestc.w" || // Added in 7.0
377 Name == "kortestz.w" || // Added in 7.0
378 Name.starts_with("kunpck") || // added in 6.0
379 Name == "kxnor.w" || // Added in 7.0
380 Name == "kxor.w" || // Added in 7.0
381 Name.starts_with("padds.") || // Added in 8.0
382 Name.starts_with("pbroadcast") || // Added in 3.9
383 Name.starts_with("prol") || // Added in 8.0
384 Name.starts_with("pror") || // Added in 8.0
385 Name.starts_with("psll.dq") || // Added in 3.9
386 Name.starts_with("psrl.dq") || // Added in 3.9
387 Name.starts_with("psubs.") || // Added in 8.0
388 Name.starts_with("ptestm") || // Added in 6.0
389 Name.starts_with("ptestnm") || // Added in 6.0
390 Name.starts_with("storent.") || // Added in 3.9
391 Name.starts_with("vbroadcast.s") || // Added in 7.0
392 Name.starts_with("vpshld.") || // Added in 8.0
393 Name.starts_with("vpshrd.")); // Added in 8.0
394 }
395
396 if (Name.consume_front("fma."))
397 return (Name.starts_with("vfmadd.") || // Added in 7.0
398 Name.starts_with("vfmsub.") || // Added in 7.0
399 Name.starts_with("vfmsubadd.") || // Added in 7.0
400 Name.starts_with("vfnmadd.") || // Added in 7.0
401 Name.starts_with("vfnmsub.")); // Added in 7.0
402
403 if (Name.consume_front("fma4."))
404 return Name.starts_with("vfmadd.s"); // Added in 7.0
405
406 if (Name.consume_front("sse."))
407 return (Name == "add.ss" || // Added in 4.0
408 Name == "cvtsi2ss" || // Added in 7.0
409 Name == "cvtsi642ss" || // Added in 7.0
410 Name == "div.ss" || // Added in 4.0
411 Name == "mul.ss" || // Added in 4.0
412 Name.starts_with("sqrt.p") || // Added in 7.0
413 Name == "sqrt.ss" || // Added in 7.0
414 Name.starts_with("storeu.") || // Added in 3.9
415 Name == "sub.ss"); // Added in 4.0
416
417 if (Name.consume_front("sse2."))
418 return (Name == "add.sd" || // Added in 4.0
419 Name == "cvtdq2pd" || // Added in 3.9
420 Name == "cvtdq2ps" || // Added in 7.0
421 Name == "cvtps2pd" || // Added in 3.9
422 Name == "cvtsi2sd" || // Added in 7.0
423 Name == "cvtsi642sd" || // Added in 7.0
424 Name == "cvtss2sd" || // Added in 7.0
425 Name == "div.sd" || // Added in 4.0
426 Name == "mul.sd" || // Added in 4.0
427 Name.starts_with("padds.") || // Added in 8.0
428 Name.starts_with("paddus.") || // Added in 8.0
429 Name.starts_with("pcmpeq.") || // Added in 3.1
430 Name.starts_with("pcmpgt.") || // Added in 3.1
431 Name == "pmaxs.w" || // Added in 3.9
432 Name == "pmaxu.b" || // Added in 3.9
433 Name == "pmins.w" || // Added in 3.9
434 Name == "pminu.b" || // Added in 3.9
435 Name == "pmulu.dq" || // Added in 7.0
436 Name.starts_with("pshuf") || // Added in 3.9
437 Name.starts_with("psll.dq") || // Added in 3.7
438 Name.starts_with("psrl.dq") || // Added in 3.7
439 Name.starts_with("psubs.") || // Added in 8.0
440 Name.starts_with("psubus.") || // Added in 8.0
441 Name.starts_with("sqrt.p") || // Added in 7.0
442 Name == "sqrt.sd" || // Added in 7.0
443 Name == "storel.dq" || // Added in 3.9
444 Name.starts_with("storeu.") || // Added in 3.9
445 Name == "sub.sd"); // Added in 4.0
446
447 if (Name.consume_front("sse41."))
448 return (Name.starts_with("blendp") || // Added in 3.7
449 Name == "movntdqa" || // Added in 5.0
450 Name == "pblendw" || // Added in 3.7
451 Name == "pmaxsb" || // Added in 3.9
452 Name == "pmaxsd" || // Added in 3.9
453 Name == "pmaxud" || // Added in 3.9
454 Name == "pmaxuw" || // Added in 3.9
455 Name == "pminsb" || // Added in 3.9
456 Name == "pminsd" || // Added in 3.9
457 Name == "pminud" || // Added in 3.9
458 Name == "pminuw" || // Added in 3.9
459 Name.starts_with("pmovsx") || // Added in 3.8
460 Name.starts_with("pmovzx") || // Added in 3.9
461 Name == "pmuldq"); // Added in 7.0
462
463 if (Name.consume_front("sse42."))
464 return Name == "crc32.64.8"; // Added in 3.4
465
466 if (Name.consume_front("sse4a."))
467 return Name.starts_with("movnt."); // Added in 3.9
468
469 if (Name.consume_front("ssse3."))
470 return (Name == "pabs.b.128" || // Added in 6.0
471 Name == "pabs.d.128" || // Added in 6.0
472 Name == "pabs.w.128"); // Added in 6.0
473
474 if (Name.consume_front("xop."))
475 return (Name == "vpcmov" || // Added in 3.8
476 Name == "vpcmov.256" || // Added in 5.0
477 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
478 Name.starts_with("vprot")); // Added in 8.0
479
480 return (Name == "addcarry.u32" || // Added in 8.0
481 Name == "addcarry.u64" || // Added in 8.0
482 Name == "addcarryx.u32" || // Added in 8.0
483 Name == "addcarryx.u64" || // Added in 8.0
484 Name == "subborrow.u32" || // Added in 8.0
485 Name == "subborrow.u64" || // Added in 8.0
486 Name.starts_with("vcvtph2ps.")); // Added in 11.0
487}
488
490 Function *&NewFn) {
491 // Only handle intrinsics that start with "x86.".
492 if (!Name.consume_front("x86."))
493 return false;
494
496 NewFn = nullptr;
497 return true;
498 }
499
500 if (Name == "rdtscp") { // Added in 8.0
501 // If this intrinsic has 0 operands, it's the new version.
502 if (F->getFunctionType()->getNumParams() == 0)
503 return false;
504
505 rename(F);
506 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
507 Intrinsic::x86_rdtscp);
508 return true;
509 }
510
512
513 // SSE4.1 ptest functions may have an old signature.
514 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
516 .Case("c", Intrinsic::x86_sse41_ptestc)
517 .Case("z", Intrinsic::x86_sse41_ptestz)
518 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
521 return upgradePTESTIntrinsic(F, ID, NewFn);
522
523 return false;
524 }
525
526 // Several blend and other instructions with masks used the wrong number of
527 // bits.
528
529 // Added in 3.6
531 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
532 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
533 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
534 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
535 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
536 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
539 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
540
541 if (Name.consume_front("avx512.mask.cmp.")) {
542 // Added in 7.0
544 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
545 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
546 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
547 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
548 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
549 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
552 return upgradeX86MaskedFPCompare(F, ID, NewFn);
553 return false; // No other 'x86.avx523.mask.cmp.*'.
554 }
555
556 if (Name.consume_front("avx512bf16.")) {
557 // Added in 9.0
559 .Case("cvtne2ps2bf16.128",
560 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
561 .Case("cvtne2ps2bf16.256",
562 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
563 .Case("cvtne2ps2bf16.512",
564 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
565 .Case("mask.cvtneps2bf16.128",
566 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
567 .Case("cvtneps2bf16.256",
568 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
569 .Case("cvtneps2bf16.512",
570 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
573 return upgradeX86BF16Intrinsic(F, ID, NewFn);
574
575 // Added in 9.0
577 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
578 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
579 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
582 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
583 return false; // No other 'x86.avx512bf16.*'.
584 }
585
586 if (Name.consume_front("xop.")) {
588 if (Name.starts_with("vpermil2")) { // Added in 3.9
589 // Upgrade any XOP PERMIL2 index operand still using a float/double
590 // vector.
591 auto Idx = F->getFunctionType()->getParamType(2);
592 if (Idx->isFPOrFPVectorTy()) {
593 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
594 unsigned EltSize = Idx->getScalarSizeInBits();
595 if (EltSize == 64 && IdxSize == 128)
596 ID = Intrinsic::x86_xop_vpermil2pd;
597 else if (EltSize == 32 && IdxSize == 128)
598 ID = Intrinsic::x86_xop_vpermil2ps;
599 else if (EltSize == 64 && IdxSize == 256)
600 ID = Intrinsic::x86_xop_vpermil2pd_256;
601 else
602 ID = Intrinsic::x86_xop_vpermil2ps_256;
603 }
604 } else if (F->arg_size() == 2)
605 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
607 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
608 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
610
612 rename(F);
613 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
614 return true;
615 }
616 return false; // No other 'x86.xop.*'
617 }
618
619 if (Name == "seh.recoverfp") {
620 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
621 Intrinsic::eh_recoverfp);
622 return true;
623 }
624
625 return false;
626}
627
628// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
629// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
632 Function *&NewFn) {
633 if (Name.starts_with("rbit")) {
634 // '(arm|aarch64).rbit'.
636 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
637 return true;
638 }
639
640 if (Name == "thread.pointer") {
641 // '(arm|aarch64).thread.pointer'.
642 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
643 Intrinsic::thread_pointer);
644 return true;
645 }
646
647 bool Neon = Name.consume_front("neon.");
648 if (Neon) {
649 // '(arm|aarch64).neon.*'.
650 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
651 // v16i8 respectively.
652 if (Name.consume_front("bfdot.")) {
653 // (arm|aarch64).neon.bfdot.*'.
656 .Cases("v2f32.v8i8", "v4f32.v16i8",
657 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
658 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
661 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
662 assert((OperandWidth == 64 || OperandWidth == 128) &&
663 "Unexpected operand width");
664 LLVMContext &Ctx = F->getParent()->getContext();
665 std::array<Type *, 2> Tys{
666 {F->getReturnType(),
667 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
668 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
669 return true;
670 }
671 return false; // No other '(arm|aarch64).neon.bfdot.*'.
672 }
673
674 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
675 // anymore and accept v8bf16 instead of v16i8.
676 if (Name.consume_front("bfm")) {
677 // (arm|aarch64).neon.bfm*'.
678 if (Name.consume_back(".v4f32.v16i8")) {
679 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
682 .Case("mla",
683 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
684 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
685 .Case("lalb",
686 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
687 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
688 .Case("lalt",
689 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
690 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
693 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
694 return true;
695 }
696 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
697 }
698 return false; // No other '(arm|aarch64).neon.bfm*.
699 }
700 // Continue on to Aarch64 Neon or Arm Neon.
701 }
702 // Continue on to Arm or Aarch64.
703
704 if (IsArm) {
705 // 'arm.*'.
706 if (Neon) {
707 // 'arm.neon.*'.
709 .StartsWith("vclz.", Intrinsic::ctlz)
710 .StartsWith("vcnt.", Intrinsic::ctpop)
711 .StartsWith("vqadds.", Intrinsic::sadd_sat)
712 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
713 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
714 .StartsWith("vqsubu.", Intrinsic::usub_sat)
717 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
718 F->arg_begin()->getType());
719 return true;
720 }
721
722 if (Name.consume_front("vst")) {
723 // 'arm.neon.vst*'.
724 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
726 if (vstRegex.match(Name, &Groups)) {
727 static const Intrinsic::ID StoreInts[] = {
728 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
729 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
730
731 static const Intrinsic::ID StoreLaneInts[] = {
732 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
733 Intrinsic::arm_neon_vst4lane};
734
735 auto fArgs = F->getFunctionType()->params();
736 Type *Tys[] = {fArgs[0], fArgs[1]};
737 if (Groups[1].size() == 1)
739 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
740 else
742 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
743 return true;
744 }
745 return false; // No other 'arm.neon.vst*'.
746 }
747
748 return false; // No other 'arm.neon.*'.
749 }
750
751 if (Name.consume_front("mve.")) {
752 // 'arm.mve.*'.
753 if (Name == "vctp64") {
754 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
755 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
756 // the function and deal with it below in UpgradeIntrinsicCall.
757 rename(F);
758 return true;
759 }
760 return false; // Not 'arm.mve.vctp64'.
761 }
762
763 // These too are changed to accept a v2i1 instead of the old v4i1.
764 if (Name.consume_back(".v4i1")) {
765 // 'arm.mve.*.v4i1'.
766 if (Name.consume_back(".predicated.v2i64.v4i32"))
767 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
768 return Name == "mull.int" || Name == "vqdmull";
769
770 if (Name.consume_back(".v2i64")) {
771 // 'arm.mve.*.v2i64.v4i1'
772 bool IsGather = Name.consume_front("vldr.gather.");
773 if (IsGather || Name.consume_front("vstr.scatter.")) {
774 if (Name.consume_front("base.")) {
775 // Optional 'wb.' prefix.
776 Name.consume_front("wb.");
777 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
778 // predicated.v2i64.v2i64.v4i1'.
779 return Name == "predicated.v2i64";
780 }
781
782 if (Name.consume_front("offset.predicated."))
783 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
784 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
785
786 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
787 return false;
788 }
789
790 return false; // No other 'arm.mve.*.v2i64.v4i1'.
791 }
792 return false; // No other 'arm.mve.*.v4i1'.
793 }
794 return false; // No other 'arm.mve.*'.
795 }
796
797 if (Name.consume_front("cde.vcx")) {
798 // 'arm.cde.vcx*'.
799 if (Name.consume_back(".predicated.v2i64.v4i1"))
800 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
801 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
802 Name == "3q" || Name == "3qa";
803
804 return false; // No other 'arm.cde.vcx*'.
805 }
806 } else {
807 // 'aarch64.*'.
808 if (Neon) {
809 // 'aarch64.neon.*'.
811 .StartsWith("frintn", Intrinsic::roundeven)
812 .StartsWith("rbit", Intrinsic::bitreverse)
815 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
816 F->arg_begin()->getType());
817 return true;
818 }
819
820 if (Name.starts_with("addp")) {
821 // 'aarch64.neon.addp*'.
822 if (F->arg_size() != 2)
823 return false; // Invalid IR.
824 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
825 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
827 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
828 return true;
829 }
830 }
831 return false; // No other 'aarch64.neon.*'.
832 }
833 if (Name.consume_front("sve.")) {
834 // 'aarch64.sve.*'.
835 if (Name.consume_front("bf")) {
836 if (Name.consume_back(".lane")) {
837 // 'aarch64.sve.bf*.lane'.
840 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
841 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
842 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other 'aarch64.sve.bf*.lane'.
849 }
850 return false; // No other 'aarch64.sve.bf*'.
851 }
852
853 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
854 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
855 NewFn = nullptr;
856 return true;
857 }
858
859 if (Name.consume_front("addqv")) {
860 // 'aarch64.sve.addqv'.
861 if (!F->getReturnType()->isFPOrFPVectorTy())
862 return false;
863
864 auto Args = F->getFunctionType()->params();
865 Type *Tys[] = {F->getReturnType(), Args[1]};
867 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
868 return true;
869 }
870
871 if (Name.consume_front("ld")) {
872 // 'aarch64.sve.ld*'.
873 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
874 if (LdRegex.match(Name)) {
875 Type *ScalarTy =
876 cast<VectorType>(F->getReturnType())->getElementType();
877 ElementCount EC =
878 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
879 Type *Ty = VectorType::get(ScalarTy, EC);
880 static const Intrinsic::ID LoadIDs[] = {
881 Intrinsic::aarch64_sve_ld2_sret,
882 Intrinsic::aarch64_sve_ld3_sret,
883 Intrinsic::aarch64_sve_ld4_sret,
884 };
885 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
886 LoadIDs[Name[0] - '2'], Ty);
887 return true;
888 }
889 return false; // No other 'aarch64.sve.ld*'.
890 }
891
892 if (Name.consume_front("tuple.")) {
893 // 'aarch64.sve.tuple.*'.
894 if (Name.starts_with("get")) {
895 // 'aarch64.sve.tuple.get*'.
896 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
898 F->getParent(), Intrinsic::vector_extract, Tys);
899 return true;
900 }
901
902 if (Name.starts_with("set")) {
903 // 'aarch64.sve.tuple.set*'.
904 auto Args = F->getFunctionType()->params();
905 Type *Tys[] = {Args[0], Args[2], Args[1]};
907 F->getParent(), Intrinsic::vector_insert, Tys);
908 return true;
909 }
910
911 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
912 if (CreateTupleRegex.match(Name)) {
913 // 'aarch64.sve.tuple.create*'.
914 auto Args = F->getFunctionType()->params();
915 Type *Tys[] = {F->getReturnType(), Args[1]};
917 F->getParent(), Intrinsic::vector_insert, Tys);
918 return true;
919 }
920 return false; // No other 'aarch64.sve.tuple.*'.
921 }
922 return false; // No other 'aarch64.sve.*'.
923 }
924 }
925 return false; // No other 'arm.*', 'aarch64.*'.
926}
927
929 if (Name.consume_front("abs."))
931 .Case("bf16", Intrinsic::nvvm_abs_bf16)
932 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
934
935 if (Name.consume_front("fma.rn."))
937 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
938 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
939 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
940 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
941 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
942 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
943 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
944 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
945 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
946 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
947 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
948 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
950
951 if (Name.consume_front("fmax."))
953 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
954 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
955 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
956 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
957 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
958 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
959 .Case("ftz.nan.xorsign.abs.bf16",
960 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
961 .Case("ftz.nan.xorsign.abs.bf16x2",
962 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
963 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
964 .Case("ftz.xorsign.abs.bf16x2",
965 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
966 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
967 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
968 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
969 .Case("nan.xorsign.abs.bf16x2",
970 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
971 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
972 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
974
975 if (Name.consume_front("fmin."))
977 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
978 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
979 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
980 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
981 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
982 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
983 .Case("ftz.nan.xorsign.abs.bf16",
984 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
985 .Case("ftz.nan.xorsign.abs.bf16x2",
986 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
987 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
988 .Case("ftz.xorsign.abs.bf16x2",
989 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
990 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
991 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
992 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
993 .Case("nan.xorsign.abs.bf16x2",
994 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
995 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
996 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
998
999 if (Name.consume_front("neg."))
1001 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1002 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1004
1006}
1007
1009 bool CanUpgradeDebugIntrinsicsToRecords) {
1010 assert(F && "Illegal to upgrade a non-existent Function.");
1011
1012 StringRef Name = F->getName();
1013
1014 // Quickly eliminate it, if it's not a candidate.
1015 if (!Name.consume_front("llvm.") || Name.empty())
1016 return false;
1017
1018 switch (Name[0]) {
1019 default: break;
1020 case 'a': {
1021 bool IsArm = Name.consume_front("arm.");
1022 if (IsArm || Name.consume_front("aarch64.")) {
1023 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1024 return true;
1025 break;
1026 }
1027
1028 if (Name.consume_front("amdgcn.")) {
1029 if (Name == "alignbit") {
1030 // Target specific intrinsic became redundant
1032 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1033 return true;
1034 }
1035
1036 if (Name.consume_front("atomic.")) {
1037 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1038 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1039 // there's no new declaration.
1040 NewFn = nullptr;
1041 return true;
1042 }
1043 break; // No other 'amdgcn.atomic.*'
1044 }
1045
1046 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1047 Name.consume_front("flat.atomic.")) {
1048 if (Name.starts_with("fadd") ||
1049 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1050 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1051 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1052 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1053 // declaration.
1054 NewFn = nullptr;
1055 return true;
1056 }
1057 }
1058
1059 if (Name.starts_with("ldexp.")) {
1060 // Target specific intrinsic became redundant
1062 F->getParent(), Intrinsic::ldexp,
1063 {F->getReturnType(), F->getArg(1)->getType()});
1064 return true;
1065 }
1066 break; // No other 'amdgcn.*'
1067 }
1068
1069 break;
1070 }
1071 case 'c': {
1072 if (F->arg_size() == 1) {
1074 .StartsWith("ctlz.", Intrinsic::ctlz)
1075 .StartsWith("cttz.", Intrinsic::cttz)
1078 rename(F);
1079 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1080 F->arg_begin()->getType());
1081 return true;
1082 }
1083 }
1084
1085 if (F->arg_size() == 2 && Name == "coro.end") {
1086 rename(F);
1087 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1088 Intrinsic::coro_end);
1089 return true;
1090 }
1091
1092 break;
1093 }
1094 case 'd':
1095 if (Name.consume_front("dbg.")) {
1096 // Mark debug intrinsics for upgrade to new debug format.
1097 if (CanUpgradeDebugIntrinsicsToRecords &&
1098 F->getParent()->IsNewDbgInfoFormat) {
1099 if (Name == "addr" || Name == "value" || Name == "assign" ||
1100 Name == "declare" || Name == "label") {
1101 // There's no function to replace these with.
1102 NewFn = nullptr;
1103 // But we do want these to get upgraded.
1104 return true;
1105 }
1106 }
1107 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1108 // converted to DbgVariableRecords later.
1109 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1110 rename(F);
1111 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1112 Intrinsic::dbg_value);
1113 return true;
1114 }
1115 break; // No other 'dbg.*'.
1116 }
1117 break;
1118 case 'e':
1119 if (Name.consume_front("experimental.vector.")) {
1122 // Skip over extract.last.active, otherwise it will be 'upgraded'
1123 // to a regular vector extract which is a different operation.
1124 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1125 .StartsWith("extract.", Intrinsic::vector_extract)
1126 .StartsWith("insert.", Intrinsic::vector_insert)
1127 .StartsWith("splice.", Intrinsic::vector_splice)
1128 .StartsWith("reverse.", Intrinsic::vector_reverse)
1129 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1130 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1133 const auto *FT = F->getFunctionType();
1135 if (ID == Intrinsic::vector_extract ||
1136 ID == Intrinsic::vector_interleave2)
1137 // Extracting overloads the return type.
1138 Tys.push_back(FT->getReturnType());
1139 if (ID != Intrinsic::vector_interleave2)
1140 Tys.push_back(FT->getParamType(0));
1141 if (ID == Intrinsic::vector_insert)
1142 // Inserting overloads the inserted type.
1143 Tys.push_back(FT->getParamType(1));
1144 rename(F);
1145 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1146 return true;
1147 }
1148
1149 if (Name.consume_front("reduce.")) {
1151 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1152 if (R.match(Name, &Groups))
1154 .Case("add", Intrinsic::vector_reduce_add)
1155 .Case("mul", Intrinsic::vector_reduce_mul)
1156 .Case("and", Intrinsic::vector_reduce_and)
1157 .Case("or", Intrinsic::vector_reduce_or)
1158 .Case("xor", Intrinsic::vector_reduce_xor)
1159 .Case("smax", Intrinsic::vector_reduce_smax)
1160 .Case("smin", Intrinsic::vector_reduce_smin)
1161 .Case("umax", Intrinsic::vector_reduce_umax)
1162 .Case("umin", Intrinsic::vector_reduce_umin)
1163 .Case("fmax", Intrinsic::vector_reduce_fmax)
1164 .Case("fmin", Intrinsic::vector_reduce_fmin)
1166
1167 bool V2 = false;
1169 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1170 Groups.clear();
1171 V2 = true;
1172 if (R2.match(Name, &Groups))
1174 .Case("fadd", Intrinsic::vector_reduce_fadd)
1175 .Case("fmul", Intrinsic::vector_reduce_fmul)
1177 }
1179 rename(F);
1180 auto Args = F->getFunctionType()->params();
1181 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1182 {Args[V2 ? 1 : 0]});
1183 return true;
1184 }
1185 break; // No other 'expermental.vector.reduce.*'.
1186 }
1187 break; // No other 'experimental.vector.*'.
1188 }
1189 if (Name.consume_front("experimental.stepvector.")) {
1190 Intrinsic::ID ID = Intrinsic::stepvector;
1191 rename(F);
1193 F->getParent(), ID, F->getFunctionType()->getReturnType());
1194 return true;
1195 }
1196 break; // No other 'e*'.
1197 case 'f':
1198 if (Name.starts_with("flt.rounds")) {
1199 rename(F);
1200 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1201 Intrinsic::get_rounding);
1202 return true;
1203 }
1204 break;
1205 case 'i':
1206 if (Name.starts_with("invariant.group.barrier")) {
1207 // Rename invariant.group.barrier to launder.invariant.group
1208 auto Args = F->getFunctionType()->params();
1209 Type* ObjectPtr[1] = {Args[0]};
1210 rename(F);
1212 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1213 return true;
1214 }
1215 break;
1216 case 'm': {
1217 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1218 // alignment parameter to embedding the alignment as an attribute of
1219 // the pointer args.
1220 if (unsigned ID = StringSwitch<unsigned>(Name)
1221 .StartsWith("memcpy.", Intrinsic::memcpy)
1222 .StartsWith("memmove.", Intrinsic::memmove)
1223 .Default(0)) {
1224 if (F->arg_size() == 5) {
1225 rename(F);
1226 // Get the types of dest, src, and len
1227 ArrayRef<Type *> ParamTypes =
1228 F->getFunctionType()->params().slice(0, 3);
1229 NewFn =
1230 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1231 return true;
1232 }
1233 }
1234 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1235 rename(F);
1236 // Get the types of dest, and len
1237 const auto *FT = F->getFunctionType();
1238 Type *ParamTypes[2] = {
1239 FT->getParamType(0), // Dest
1240 FT->getParamType(2) // len
1241 };
1242 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1243 Intrinsic::memset, ParamTypes);
1244 return true;
1245 }
1246 break;
1247 }
1248 case 'n': {
1249 if (Name.consume_front("nvvm.")) {
1250 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1251 if (F->arg_size() == 1) {
1252 Intrinsic::ID IID =
1254 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1255 .Case("clz.i", Intrinsic::ctlz)
1256 .Case("popc.i", Intrinsic::ctpop)
1258 if (IID != Intrinsic::not_intrinsic) {
1259 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1260 {F->getReturnType()});
1261 return true;
1262 }
1263 }
1264
1265 // Check for nvvm intrinsics that need a return type adjustment.
1266 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1268 if (IID != Intrinsic::not_intrinsic) {
1269 NewFn = nullptr;
1270 return true;
1271 }
1272 }
1273
1274 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1275 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1276 //
1277 // TODO: We could add lohi.i2d.
1278 bool Expand = false;
1279 if (Name.consume_front("abs."))
1280 // nvvm.abs.{i,ii}
1281 Expand = Name == "i" || Name == "ll";
1282 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1283 Expand = true;
1284 else if (Name.consume_front("max.") || Name.consume_front("min."))
1285 // nvvm.{min,max}.{i,ii,ui,ull}
1286 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1287 Name == "ui" || Name == "ull";
1288 else if (Name.consume_front("atomic.load.add."))
1289 // nvvm.atomic.load.add.{f32.p,f64.p}
1290 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1291 else if (Name.consume_front("bitcast."))
1292 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1293 Expand =
1294 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1295 else if (Name.consume_front("rotate."))
1296 // nvvm.rotate.{b32,b64,right.b64}
1297 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1298 else if (Name.consume_front("ptr.gen.to."))
1299 // nvvm.ptr.gen.to.{local,shared,global,constant}
1300 Expand = Name.starts_with("local") || Name.starts_with("shared") ||
1301 Name.starts_with("global") || Name.starts_with("constant");
1302 else if (Name.consume_front("ptr."))
1303 // nvvm.ptr.{local,shared,global,constant}.to.gen
1304 Expand =
1305 (Name.consume_front("local") || Name.consume_front("shared") ||
1306 Name.consume_front("global") || Name.consume_front("constant")) &&
1307 Name.starts_with(".to.gen");
1308 else if (Name.consume_front("ldg.global."))
1309 // nvvm.ldg.global.{i,p,f}
1310 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1311 Name.starts_with("p."));
1312 else
1313 Expand = false;
1314
1315 if (Expand) {
1316 NewFn = nullptr;
1317 return true;
1318 }
1319 break; // No other 'nvvm.*'.
1320 }
1321 break;
1322 }
1323 case 'o':
1324 // We only need to change the name to match the mangling including the
1325 // address space.
1326 if (Name.starts_with("objectsize.")) {
1327 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1328 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1329 F->getName() !=
1330 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1331 rename(F);
1332 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1333 Intrinsic::objectsize, Tys);
1334 return true;
1335 }
1336 }
1337 break;
1338
1339 case 'p':
1340 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1341 rename(F);
1343 F->getParent(), Intrinsic::ptr_annotation,
1344 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1345 return true;
1346 }
1347 break;
1348
1349 case 'r': {
1350 if (Name.consume_front("riscv.")) {
1353 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1354 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1355 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1356 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1359 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1360 rename(F);
1361 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1362 return true;
1363 }
1364 break; // No other applicable upgrades.
1365 }
1366
1368 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1369 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1372 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1373 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1374 rename(F);
1375 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1376 return true;
1377 }
1378 break; // No other applicable upgrades.
1379 }
1380
1382 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1383 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1384 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1385 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1386 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1387 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1390 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1393 return true;
1394 }
1395 break; // No other applicable upgrades.
1396 }
1397 break; // No other 'riscv.*' intrinsics
1398 }
1399 } break;
1400
1401 case 's':
1402 if (Name == "stackprotectorcheck") {
1403 NewFn = nullptr;
1404 return true;
1405 }
1406 break;
1407
1408 case 'v': {
1409 if (Name == "var.annotation" && F->arg_size() == 4) {
1410 rename(F);
1412 F->getParent(), Intrinsic::var_annotation,
1413 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1414 return true;
1415 }
1416 break;
1417 }
1418
1419 case 'w':
1420 if (Name.consume_front("wasm.")) {
1423 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1424 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1425 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1428 rename(F);
1429 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1430 F->getReturnType());
1431 return true;
1432 }
1433
1434 if (Name.consume_front("dot.i8x16.i7x16.")) {
1436 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1437 .Case("add.signed",
1438 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1441 rename(F);
1442 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1443 return true;
1444 }
1445 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1446 }
1447 break; // No other 'wasm.*'.
1448 }
1449 break;
1450
1451 case 'x':
1452 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1453 return true;
1454 }
1455
1456 auto *ST = dyn_cast<StructType>(F->getReturnType());
1457 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1458 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1459 // Replace return type with literal non-packed struct. Only do this for
1460 // intrinsics declared to return a struct, not for intrinsics with
1461 // overloaded return type, in which case the exact struct type will be
1462 // mangled into the name.
1465 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1466 auto *FT = F->getFunctionType();
1467 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1468 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1469 std::string Name = F->getName().str();
1470 rename(F);
1471 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1472 Name, F->getParent());
1473
1474 // The new function may also need remangling.
1475 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1476 NewFn = *Result;
1477 return true;
1478 }
1479 }
1480
1481 // Remangle our intrinsic since we upgrade the mangling
1483 if (Result != std::nullopt) {
1484 NewFn = *Result;
1485 return true;
1486 }
1487
1488 // This may not belong here. This function is effectively being overloaded
1489 // to both detect an intrinsic which needs upgrading, and to provide the
1490 // upgraded form of the intrinsic. We should perhaps have two separate
1491 // functions for this.
1492 return false;
1493}
1494
1496 bool CanUpgradeDebugIntrinsicsToRecords) {
1497 NewFn = nullptr;
1498 bool Upgraded =
1499 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1500 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1501
1502 // Upgrade intrinsic attributes. This does not change the function.
1503 if (NewFn)
1504 F = NewFn;
1505 if (Intrinsic::ID id = F->getIntrinsicID())
1506 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1507 return Upgraded;
1508}
1509
1511 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1512 GV->getName() == "llvm.global_dtors")) ||
1513 !GV->hasInitializer())
1514 return nullptr;
1515 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1516 if (!ATy)
1517 return nullptr;
1518 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1519 if (!STy || STy->getNumElements() != 2)
1520 return nullptr;
1521
1522 LLVMContext &C = GV->getContext();
1523 IRBuilder<> IRB(C);
1524 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1525 IRB.getPtrTy());
1526 Constant *Init = GV->getInitializer();
1527 unsigned N = Init->getNumOperands();
1528 std::vector<Constant *> NewCtors(N);
1529 for (unsigned i = 0; i != N; ++i) {
1530 auto Ctor = cast<Constant>(Init->getOperand(i));
1531 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1532 Ctor->getAggregateElement(1),
1534 }
1535 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1536
1537 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1538 NewInit, GV->getName());
1539}
1540
1541// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1542// to byte shuffles.
1544 unsigned Shift) {
1545 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1546 unsigned NumElts = ResultTy->getNumElements() * 8;
1547
1548 // Bitcast from a 64-bit element type to a byte element type.
1549 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1550 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1551
1552 // We'll be shuffling in zeroes.
1553 Value *Res = Constant::getNullValue(VecTy);
1554
1555 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1556 // we'll just return the zero vector.
1557 if (Shift < 16) {
1558 int Idxs[64];
1559 // 256/512-bit version is split into 2/4 16-byte lanes.
1560 for (unsigned l = 0; l != NumElts; l += 16)
1561 for (unsigned i = 0; i != 16; ++i) {
1562 unsigned Idx = NumElts + i - Shift;
1563 if (Idx < NumElts)
1564 Idx -= NumElts - 16; // end of lane, switch operand.
1565 Idxs[l + i] = Idx + l;
1566 }
1567
1568 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1569 }
1570
1571 // Bitcast back to a 64-bit element type.
1572 return Builder.CreateBitCast(Res, ResultTy, "cast");
1573}
1574
1575// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1576// to byte shuffles.
1578 unsigned Shift) {
1579 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1580 unsigned NumElts = ResultTy->getNumElements() * 8;
1581
1582 // Bitcast from a 64-bit element type to a byte element type.
1583 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1584 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1585
1586 // We'll be shuffling in zeroes.
1587 Value *Res = Constant::getNullValue(VecTy);
1588
1589 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1590 // we'll just return the zero vector.
1591 if (Shift < 16) {
1592 int Idxs[64];
1593 // 256/512-bit version is split into 2/4 16-byte lanes.
1594 for (unsigned l = 0; l != NumElts; l += 16)
1595 for (unsigned i = 0; i != 16; ++i) {
1596 unsigned Idx = i + Shift;
1597 if (Idx >= 16)
1598 Idx += NumElts - 16; // end of lane, switch operand.
1599 Idxs[l + i] = Idx + l;
1600 }
1601
1602 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1603 }
1604
1605 // Bitcast back to a 64-bit element type.
1606 return Builder.CreateBitCast(Res, ResultTy, "cast");
1607}
1608
1609static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1610 unsigned NumElts) {
1611 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1613 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1614 Mask = Builder.CreateBitCast(Mask, MaskTy);
1615
1616 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1617 // i8 and we need to extract down to the right number of elements.
1618 if (NumElts <= 4) {
1619 int Indices[4];
1620 for (unsigned i = 0; i != NumElts; ++i)
1621 Indices[i] = i;
1622 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1623 "extract");
1624 }
1625
1626 return Mask;
1627}
1628
1629static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1630 Value *Op1) {
1631 // If the mask is all ones just emit the first operation.
1632 if (const auto *C = dyn_cast<Constant>(Mask))
1633 if (C->isAllOnesValue())
1634 return Op0;
1635
1636 Mask = getX86MaskVec(Builder, Mask,
1637 cast<FixedVectorType>(Op0->getType())->getNumElements());
1638 return Builder.CreateSelect(Mask, Op0, Op1);
1639}
1640
1641static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1642 Value *Op1) {
1643 // If the mask is all ones just emit the first operation.
1644 if (const auto *C = dyn_cast<Constant>(Mask))
1645 if (C->isAllOnesValue())
1646 return Op0;
1647
1648 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1649 Mask->getType()->getIntegerBitWidth());
1650 Mask = Builder.CreateBitCast(Mask, MaskTy);
1651 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1652 return Builder.CreateSelect(Mask, Op0, Op1);
1653}
1654
1655// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1656// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1657// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1659 Value *Op1, Value *Shift,
1660 Value *Passthru, Value *Mask,
1661 bool IsVALIGN) {
1662 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1663
1664 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1665 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1666 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1667 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1668
1669 // Mask the immediate for VALIGN.
1670 if (IsVALIGN)
1671 ShiftVal &= (NumElts - 1);
1672
1673 // If palignr is shifting the pair of vectors more than the size of two
1674 // lanes, emit zero.
1675 if (ShiftVal >= 32)
1677
1678 // If palignr is shifting the pair of input vectors more than one lane,
1679 // but less than two lanes, convert to shifting in zeroes.
1680 if (ShiftVal > 16) {
1681 ShiftVal -= 16;
1682 Op1 = Op0;
1684 }
1685
1686 int Indices[64];
1687 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1688 for (unsigned l = 0; l < NumElts; l += 16) {
1689 for (unsigned i = 0; i != 16; ++i) {
1690 unsigned Idx = ShiftVal + i;
1691 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1692 Idx += NumElts - 16; // End of lane, switch operand.
1693 Indices[l + i] = Idx + l;
1694 }
1695 }
1696
1697 Value *Align = Builder.CreateShuffleVector(
1698 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1699
1700 return emitX86Select(Builder, Mask, Align, Passthru);
1701}
1702
1704 bool ZeroMask, bool IndexForm) {
1705 Type *Ty = CI.getType();
1706 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1707 unsigned EltWidth = Ty->getScalarSizeInBits();
1708 bool IsFloat = Ty->isFPOrFPVectorTy();
1709 Intrinsic::ID IID;
1710 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1711 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1712 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1713 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1714 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1715 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1716 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1717 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1718 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1719 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1720 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1721 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1722 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1723 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1724 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1725 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1726 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1727 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1728 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1729 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1730 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1731 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1732 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1733 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1734 else if (VecWidth == 128 && EltWidth == 16)
1735 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1736 else if (VecWidth == 256 && EltWidth == 16)
1737 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1738 else if (VecWidth == 512 && EltWidth == 16)
1739 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1740 else if (VecWidth == 128 && EltWidth == 8)
1741 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1742 else if (VecWidth == 256 && EltWidth == 8)
1743 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1744 else if (VecWidth == 512 && EltWidth == 8)
1745 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1746 else
1747 llvm_unreachable("Unexpected intrinsic");
1748
1749 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1750 CI.getArgOperand(2) };
1751
1752 // If this isn't index form we need to swap operand 0 and 1.
1753 if (!IndexForm)
1754 std::swap(Args[0], Args[1]);
1755
1756 Value *V = Builder.CreateIntrinsic(IID, {}, Args);
1757 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1758 : Builder.CreateBitCast(CI.getArgOperand(1),
1759 Ty);
1760 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1761}
1762
1764 Intrinsic::ID IID) {
1765 Type *Ty = CI.getType();
1766 Value *Op0 = CI.getOperand(0);
1767 Value *Op1 = CI.getOperand(1);
1768 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1769
1770 if (CI.arg_size() == 4) { // For masked intrinsics.
1771 Value *VecSrc = CI.getOperand(2);
1772 Value *Mask = CI.getOperand(3);
1773 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1774 }
1775 return Res;
1776}
1777
1779 bool IsRotateRight) {
1780 Type *Ty = CI.getType();
1781 Value *Src = CI.getArgOperand(0);
1782 Value *Amt = CI.getArgOperand(1);
1783
1784 // Amount may be scalar immediate, in which case create a splat vector.
1785 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1786 // we only care about the lowest log2 bits anyway.
1787 if (Amt->getType() != Ty) {
1788 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1789 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1790 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1791 }
1792
1793 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1794 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1795
1796 if (CI.arg_size() == 4) { // For masked intrinsics.
1797 Value *VecSrc = CI.getOperand(2);
1798 Value *Mask = CI.getOperand(3);
1799 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1800 }
1801 return Res;
1802}
1803
1804static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1805 bool IsSigned) {
1806 Type *Ty = CI.getType();
1807 Value *LHS = CI.getArgOperand(0);
1808 Value *RHS = CI.getArgOperand(1);
1809
1810 CmpInst::Predicate Pred;
1811 switch (Imm) {
1812 case 0x0:
1813 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1814 break;
1815 case 0x1:
1816 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1817 break;
1818 case 0x2:
1819 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1820 break;
1821 case 0x3:
1822 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1823 break;
1824 case 0x4:
1825 Pred = ICmpInst::ICMP_EQ;
1826 break;
1827 case 0x5:
1828 Pred = ICmpInst::ICMP_NE;
1829 break;
1830 case 0x6:
1831 return Constant::getNullValue(Ty); // FALSE
1832 case 0x7:
1833 return Constant::getAllOnesValue(Ty); // TRUE
1834 default:
1835 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1836 }
1837
1838 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1839 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1840 return Ext;
1841}
1842
1844 bool IsShiftRight, bool ZeroMask) {
1845 Type *Ty = CI.getType();
1846 Value *Op0 = CI.getArgOperand(0);
1847 Value *Op1 = CI.getArgOperand(1);
1848 Value *Amt = CI.getArgOperand(2);
1849
1850 if (IsShiftRight)
1851 std::swap(Op0, Op1);
1852
1853 // Amount may be scalar immediate, in which case create a splat vector.
1854 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1855 // we only care about the lowest log2 bits anyway.
1856 if (Amt->getType() != Ty) {
1857 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1858 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1859 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1860 }
1861
1862 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1863 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
1864
1865 unsigned NumArgs = CI.arg_size();
1866 if (NumArgs >= 4) { // For masked intrinsics.
1867 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1868 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1869 CI.getArgOperand(0);
1870 Value *Mask = CI.getOperand(NumArgs - 1);
1871 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1872 }
1873 return Res;
1874}
1875
1877 Value *Mask, bool Aligned) {
1878 // Cast the pointer to the right type.
1879 Ptr = Builder.CreateBitCast(Ptr,
1880 llvm::PointerType::getUnqual(Data->getType()));
1881 const Align Alignment =
1882 Aligned
1883 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1884 : Align(1);
1885
1886 // If the mask is all ones just emit a regular store.
1887 if (const auto *C = dyn_cast<Constant>(Mask))
1888 if (C->isAllOnesValue())
1889 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1890
1891 // Convert the mask from an integer type to a vector of i1.
1892 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1893 Mask = getX86MaskVec(Builder, Mask, NumElts);
1894 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1895}
1896
1898 Value *Passthru, Value *Mask, bool Aligned) {
1899 Type *ValTy = Passthru->getType();
1900 // Cast the pointer to the right type.
1902 const Align Alignment =
1903 Aligned
1904 ? Align(
1906 8)
1907 : Align(1);
1908
1909 // If the mask is all ones just emit a regular store.
1910 if (const auto *C = dyn_cast<Constant>(Mask))
1911 if (C->isAllOnesValue())
1912 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1913
1914 // Convert the mask from an integer type to a vector of i1.
1915 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1916 Mask = getX86MaskVec(Builder, Mask, NumElts);
1917 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1918}
1919
1920static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1921 Type *Ty = CI.getType();
1922 Value *Op0 = CI.getArgOperand(0);
1923 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
1924 {Op0, Builder.getInt1(false)});
1925 if (CI.arg_size() == 3)
1926 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1927 return Res;
1928}
1929
1930static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1931 Type *Ty = CI.getType();
1932
1933 // Arguments have a vXi32 type so cast to vXi64.
1934 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1935 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1936
1937 if (IsSigned) {
1938 // Shift left then arithmetic shift right.
1939 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1940 LHS = Builder.CreateShl(LHS, ShiftAmt);
1941 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1942 RHS = Builder.CreateShl(RHS, ShiftAmt);
1943 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1944 } else {
1945 // Clear the upper bits.
1946 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1947 LHS = Builder.CreateAnd(LHS, Mask);
1948 RHS = Builder.CreateAnd(RHS, Mask);
1949 }
1950
1951 Value *Res = Builder.CreateMul(LHS, RHS);
1952
1953 if (CI.arg_size() == 4)
1954 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1955
1956 return Res;
1957}
1958
1959// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1961 Value *Mask) {
1962 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1963 if (Mask) {
1964 const auto *C = dyn_cast<Constant>(Mask);
1965 if (!C || !C->isAllOnesValue())
1966 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1967 }
1968
1969 if (NumElts < 8) {
1970 int Indices[8];
1971 for (unsigned i = 0; i != NumElts; ++i)
1972 Indices[i] = i;
1973 for (unsigned i = NumElts; i != 8; ++i)
1974 Indices[i] = NumElts + i % NumElts;
1975 Vec = Builder.CreateShuffleVector(Vec,
1977 Indices);
1978 }
1979 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1980}
1981
1983 unsigned CC, bool Signed) {
1984 Value *Op0 = CI.getArgOperand(0);
1985 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1986
1987 Value *Cmp;
1988 if (CC == 3) {
1990 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1991 } else if (CC == 7) {
1993 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1994 } else {
1996 switch (CC) {
1997 default: llvm_unreachable("Unknown condition code");
1998 case 0: Pred = ICmpInst::ICMP_EQ; break;
1999 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2000 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2001 case 4: Pred = ICmpInst::ICMP_NE; break;
2002 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2003 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2004 }
2005 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2006 }
2007
2008 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2009
2010 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2011}
2012
2013// Replace a masked intrinsic with an older unmasked intrinsic.
2015 Intrinsic::ID IID) {
2016 Value *Rep = Builder.CreateIntrinsic(
2017 IID, {}, {CI.getArgOperand(0), CI.getArgOperand(1)});
2018 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2019}
2020
2022 Value* A = CI.getArgOperand(0);
2023 Value* B = CI.getArgOperand(1);
2024 Value* Src = CI.getArgOperand(2);
2025 Value* Mask = CI.getArgOperand(3);
2026
2027 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2028 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2029 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2030 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2031 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2032 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2033}
2034
2036 Value* Op = CI.getArgOperand(0);
2037 Type* ReturnOp = CI.getType();
2038 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2039 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2040 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2041}
2042
2043// Replace intrinsic with unmasked version and a select.
2045 CallBase &CI, Value *&Rep) {
2046 Name = Name.substr(12); // Remove avx512.mask.
2047
2048 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2049 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2050 Intrinsic::ID IID;
2051 if (Name.starts_with("max.p")) {
2052 if (VecWidth == 128 && EltWidth == 32)
2053 IID = Intrinsic::x86_sse_max_ps;
2054 else if (VecWidth == 128 && EltWidth == 64)
2055 IID = Intrinsic::x86_sse2_max_pd;
2056 else if (VecWidth == 256 && EltWidth == 32)
2057 IID = Intrinsic::x86_avx_max_ps_256;
2058 else if (VecWidth == 256 && EltWidth == 64)
2059 IID = Intrinsic::x86_avx_max_pd_256;
2060 else
2061 llvm_unreachable("Unexpected intrinsic");
2062 } else if (Name.starts_with("min.p")) {
2063 if (VecWidth == 128 && EltWidth == 32)
2064 IID = Intrinsic::x86_sse_min_ps;
2065 else if (VecWidth == 128 && EltWidth == 64)
2066 IID = Intrinsic::x86_sse2_min_pd;
2067 else if (VecWidth == 256 && EltWidth == 32)
2068 IID = Intrinsic::x86_avx_min_ps_256;
2069 else if (VecWidth == 256 && EltWidth == 64)
2070 IID = Intrinsic::x86_avx_min_pd_256;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with("pshuf.b.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_pshuf_b;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_pshuf_b_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with("pmul.hr.sw.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with("pmulh.w.")) {
2092 if (VecWidth == 128)
2093 IID = Intrinsic::x86_sse2_pmulh_w;
2094 else if (VecWidth == 256)
2095 IID = Intrinsic::x86_avx2_pmulh_w;
2096 else if (VecWidth == 512)
2097 IID = Intrinsic::x86_avx512_pmulh_w_512;
2098 else
2099 llvm_unreachable("Unexpected intrinsic");
2100 } else if (Name.starts_with("pmulhu.w.")) {
2101 if (VecWidth == 128)
2102 IID = Intrinsic::x86_sse2_pmulhu_w;
2103 else if (VecWidth == 256)
2104 IID = Intrinsic::x86_avx2_pmulhu_w;
2105 else if (VecWidth == 512)
2106 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2107 else
2108 llvm_unreachable("Unexpected intrinsic");
2109 } else if (Name.starts_with("pmaddw.d.")) {
2110 if (VecWidth == 128)
2111 IID = Intrinsic::x86_sse2_pmadd_wd;
2112 else if (VecWidth == 256)
2113 IID = Intrinsic::x86_avx2_pmadd_wd;
2114 else if (VecWidth == 512)
2115 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2116 else
2117 llvm_unreachable("Unexpected intrinsic");
2118 } else if (Name.starts_with("pmaddubs.w.")) {
2119 if (VecWidth == 128)
2120 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2121 else if (VecWidth == 256)
2122 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2123 else if (VecWidth == 512)
2124 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2125 else
2126 llvm_unreachable("Unexpected intrinsic");
2127 } else if (Name.starts_with("packsswb.")) {
2128 if (VecWidth == 128)
2129 IID = Intrinsic::x86_sse2_packsswb_128;
2130 else if (VecWidth == 256)
2131 IID = Intrinsic::x86_avx2_packsswb;
2132 else if (VecWidth == 512)
2133 IID = Intrinsic::x86_avx512_packsswb_512;
2134 else
2135 llvm_unreachable("Unexpected intrinsic");
2136 } else if (Name.starts_with("packssdw.")) {
2137 if (VecWidth == 128)
2138 IID = Intrinsic::x86_sse2_packssdw_128;
2139 else if (VecWidth == 256)
2140 IID = Intrinsic::x86_avx2_packssdw;
2141 else if (VecWidth == 512)
2142 IID = Intrinsic::x86_avx512_packssdw_512;
2143 else
2144 llvm_unreachable("Unexpected intrinsic");
2145 } else if (Name.starts_with("packuswb.")) {
2146 if (VecWidth == 128)
2147 IID = Intrinsic::x86_sse2_packuswb_128;
2148 else if (VecWidth == 256)
2149 IID = Intrinsic::x86_avx2_packuswb;
2150 else if (VecWidth == 512)
2151 IID = Intrinsic::x86_avx512_packuswb_512;
2152 else
2153 llvm_unreachable("Unexpected intrinsic");
2154 } else if (Name.starts_with("packusdw.")) {
2155 if (VecWidth == 128)
2156 IID = Intrinsic::x86_sse41_packusdw;
2157 else if (VecWidth == 256)
2158 IID = Intrinsic::x86_avx2_packusdw;
2159 else if (VecWidth == 512)
2160 IID = Intrinsic::x86_avx512_packusdw_512;
2161 else
2162 llvm_unreachable("Unexpected intrinsic");
2163 } else if (Name.starts_with("vpermilvar.")) {
2164 if (VecWidth == 128 && EltWidth == 32)
2165 IID = Intrinsic::x86_avx_vpermilvar_ps;
2166 else if (VecWidth == 128 && EltWidth == 64)
2167 IID = Intrinsic::x86_avx_vpermilvar_pd;
2168 else if (VecWidth == 256 && EltWidth == 32)
2169 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2170 else if (VecWidth == 256 && EltWidth == 64)
2171 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2172 else if (VecWidth == 512 && EltWidth == 32)
2173 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2174 else if (VecWidth == 512 && EltWidth == 64)
2175 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2176 else
2177 llvm_unreachable("Unexpected intrinsic");
2178 } else if (Name == "cvtpd2dq.256") {
2179 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2180 } else if (Name == "cvtpd2ps.256") {
2181 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2182 } else if (Name == "cvttpd2dq.256") {
2183 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2184 } else if (Name == "cvttps2dq.128") {
2185 IID = Intrinsic::x86_sse2_cvttps2dq;
2186 } else if (Name == "cvttps2dq.256") {
2187 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2188 } else if (Name.starts_with("permvar.")) {
2189 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2190 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2191 IID = Intrinsic::x86_avx2_permps;
2192 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2193 IID = Intrinsic::x86_avx2_permd;
2194 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2195 IID = Intrinsic::x86_avx512_permvar_df_256;
2196 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2197 IID = Intrinsic::x86_avx512_permvar_di_256;
2198 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2199 IID = Intrinsic::x86_avx512_permvar_sf_512;
2200 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2201 IID = Intrinsic::x86_avx512_permvar_si_512;
2202 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2203 IID = Intrinsic::x86_avx512_permvar_df_512;
2204 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2205 IID = Intrinsic::x86_avx512_permvar_di_512;
2206 else if (VecWidth == 128 && EltWidth == 16)
2207 IID = Intrinsic::x86_avx512_permvar_hi_128;
2208 else if (VecWidth == 256 && EltWidth == 16)
2209 IID = Intrinsic::x86_avx512_permvar_hi_256;
2210 else if (VecWidth == 512 && EltWidth == 16)
2211 IID = Intrinsic::x86_avx512_permvar_hi_512;
2212 else if (VecWidth == 128 && EltWidth == 8)
2213 IID = Intrinsic::x86_avx512_permvar_qi_128;
2214 else if (VecWidth == 256 && EltWidth == 8)
2215 IID = Intrinsic::x86_avx512_permvar_qi_256;
2216 else if (VecWidth == 512 && EltWidth == 8)
2217 IID = Intrinsic::x86_avx512_permvar_qi_512;
2218 else
2219 llvm_unreachable("Unexpected intrinsic");
2220 } else if (Name.starts_with("dbpsadbw.")) {
2221 if (VecWidth == 128)
2222 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2223 else if (VecWidth == 256)
2224 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2225 else if (VecWidth == 512)
2226 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2227 else
2228 llvm_unreachable("Unexpected intrinsic");
2229 } else if (Name.starts_with("pmultishift.qb.")) {
2230 if (VecWidth == 128)
2231 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2232 else if (VecWidth == 256)
2233 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2234 else if (VecWidth == 512)
2235 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2236 else
2237 llvm_unreachable("Unexpected intrinsic");
2238 } else if (Name.starts_with("conflict.")) {
2239 if (Name[9] == 'd' && VecWidth == 128)
2240 IID = Intrinsic::x86_avx512_conflict_d_128;
2241 else if (Name[9] == 'd' && VecWidth == 256)
2242 IID = Intrinsic::x86_avx512_conflict_d_256;
2243 else if (Name[9] == 'd' && VecWidth == 512)
2244 IID = Intrinsic::x86_avx512_conflict_d_512;
2245 else if (Name[9] == 'q' && VecWidth == 128)
2246 IID = Intrinsic::x86_avx512_conflict_q_128;
2247 else if (Name[9] == 'q' && VecWidth == 256)
2248 IID = Intrinsic::x86_avx512_conflict_q_256;
2249 else if (Name[9] == 'q' && VecWidth == 512)
2250 IID = Intrinsic::x86_avx512_conflict_q_512;
2251 else
2252 llvm_unreachable("Unexpected intrinsic");
2253 } else if (Name.starts_with("pavg.")) {
2254 if (Name[5] == 'b' && VecWidth == 128)
2255 IID = Intrinsic::x86_sse2_pavg_b;
2256 else if (Name[5] == 'b' && VecWidth == 256)
2257 IID = Intrinsic::x86_avx2_pavg_b;
2258 else if (Name[5] == 'b' && VecWidth == 512)
2259 IID = Intrinsic::x86_avx512_pavg_b_512;
2260 else if (Name[5] == 'w' && VecWidth == 128)
2261 IID = Intrinsic::x86_sse2_pavg_w;
2262 else if (Name[5] == 'w' && VecWidth == 256)
2263 IID = Intrinsic::x86_avx2_pavg_w;
2264 else if (Name[5] == 'w' && VecWidth == 512)
2265 IID = Intrinsic::x86_avx512_pavg_w_512;
2266 else
2267 llvm_unreachable("Unexpected intrinsic");
2268 } else
2269 return false;
2270
2271 SmallVector<Value *, 4> Args(CI.args());
2272 Args.pop_back();
2273 Args.pop_back();
2274 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2275 unsigned NumArgs = CI.arg_size();
2276 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2277 CI.getArgOperand(NumArgs - 2));
2278 return true;
2279}
2280
2281/// Upgrade comment in call to inline asm that represents an objc retain release
2282/// marker.
2283void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2284 size_t Pos;
2285 if (AsmStr->find("mov\tfp") == 0 &&
2286 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2287 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2288 AsmStr->replace(Pos, 1, ";");
2289 }
2290}
2291
2293 Function *F, IRBuilder<> &Builder) {
2294 Value *Rep = nullptr;
2295
2296 if (Name == "abs.i" || Name == "abs.ll") {
2297 Value *Arg = CI->getArgOperand(0);
2298 Value *Neg = Builder.CreateNeg(Arg, "neg");
2299 Value *Cmp = Builder.CreateICmpSGE(
2300 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2301 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2302 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2303 Name.starts_with("atomic.load.add.f64.p")) {
2304 Value *Ptr = CI->getArgOperand(0);
2305 Value *Val = CI->getArgOperand(1);
2306 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2307 AtomicOrdering::SequentiallyConsistent);
2308 } else if (Name.consume_front("max.") &&
2309 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2310 Name == "ui" || Name == "ull")) {
2311 Value *Arg0 = CI->getArgOperand(0);
2312 Value *Arg1 = CI->getArgOperand(1);
2313 Value *Cmp = Name.starts_with("u")
2314 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2315 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2316 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2317 } else if (Name.consume_front("min.") &&
2318 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2319 Name == "ui" || Name == "ull")) {
2320 Value *Arg0 = CI->getArgOperand(0);
2321 Value *Arg1 = CI->getArgOperand(1);
2322 Value *Cmp = Name.starts_with("u")
2323 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2324 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2325 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2326 } else if (Name == "clz.ll") {
2327 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2328 Value *Arg = CI->getArgOperand(0);
2329 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2330 {Arg, Builder.getFalse()},
2331 /*FMFSource=*/nullptr, "ctlz");
2332 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2333 } else if (Name == "popc.ll") {
2334 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2335 // i64.
2336 Value *Arg = CI->getArgOperand(0);
2337 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2338 Arg, /*FMFSource=*/nullptr, "ctpop");
2339 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2340 } else if (Name == "h2f") {
2341 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2342 {Builder.getFloatTy()}, CI->getArgOperand(0),
2343 /*FMFSource=*/nullptr, "h2f");
2344 } else if (Name.consume_front("bitcast.") &&
2345 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2346 Name == "d2ll")) {
2347 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2348 } else if (Name == "rotate.b32") {
2349 Value *Arg = CI->getOperand(0);
2350 Value *ShiftAmt = CI->getOperand(1);
2351 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2352 {Arg, Arg, ShiftAmt});
2353 } else if (Name == "rotate.b64") {
2354 Type *Int64Ty = Builder.getInt64Ty();
2355 Value *Arg = CI->getOperand(0);
2356 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2357 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2358 {Arg, Arg, ZExtShiftAmt});
2359 } else if (Name == "rotate.right.b64") {
2360 Type *Int64Ty = Builder.getInt64Ty();
2361 Value *Arg = CI->getOperand(0);
2362 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2363 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2364 {Arg, Arg, ZExtShiftAmt});
2365 } else if ((Name.consume_front("ptr.gen.to.") &&
2366 (Name.starts_with("local") || Name.starts_with("shared") ||
2367 Name.starts_with("global") || Name.starts_with("constant"))) ||
2368 (Name.consume_front("ptr.") &&
2369 (Name.consume_front("local") || Name.consume_front("shared") ||
2370 Name.consume_front("global") ||
2371 Name.consume_front("constant")) &&
2372 Name.starts_with(".to.gen"))) {
2373 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2374 } else if (Name.consume_front("ldg.global")) {
2375 Value *Ptr = CI->getArgOperand(0);
2376 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2377 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2378 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2379 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2380 MDNode *MD = MDNode::get(Builder.getContext(), {});
2381 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2382 return LD;
2383 } else {
2385 if (IID != Intrinsic::not_intrinsic &&
2386 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2387 rename(F);
2388 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2390 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2391 Value *Arg = CI->getArgOperand(I);
2392 Type *OldType = Arg->getType();
2393 Type *NewType = NewFn->getArg(I)->getType();
2394 Args.push_back(
2395 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2396 ? Builder.CreateBitCast(Arg, NewType)
2397 : Arg);
2398 }
2399 Rep = Builder.CreateCall(NewFn, Args);
2400 if (F->getReturnType()->isIntegerTy())
2401 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2402 }
2403 }
2404
2405 return Rep;
2406}
2407
2409 IRBuilder<> &Builder) {
2410 LLVMContext &C = F->getContext();
2411 Value *Rep = nullptr;
2412
2413 if (Name.starts_with("sse4a.movnt.")) {
2415 Elts.push_back(
2416 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2417 MDNode *Node = MDNode::get(C, Elts);
2418
2419 Value *Arg0 = CI->getArgOperand(0);
2420 Value *Arg1 = CI->getArgOperand(1);
2421
2422 // Nontemporal (unaligned) store of the 0'th element of the float/double
2423 // vector.
2424 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2425 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2426 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2427 Value *Extract =
2428 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2429
2430 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2431 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2432 } else if (Name.starts_with("avx.movnt.") ||
2433 Name.starts_with("avx512.storent.")) {
2435 Elts.push_back(
2436 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2437 MDNode *Node = MDNode::get(C, Elts);
2438
2439 Value *Arg0 = CI->getArgOperand(0);
2440 Value *Arg1 = CI->getArgOperand(1);
2441
2442 // Convert the type of the pointer to a pointer to the stored type.
2443 Value *BC = Builder.CreateBitCast(
2444 Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
2445 StoreInst *SI = Builder.CreateAlignedStore(
2446 Arg1, BC,
2448 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2449 } else if (Name == "sse2.storel.dq") {
2450 Value *Arg0 = CI->getArgOperand(0);
2451 Value *Arg1 = CI->getArgOperand(1);
2452
2453 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2454 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2455 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2456 Value *BC = Builder.CreateBitCast(
2457 Arg0, PointerType::getUnqual(Elt->getType()), "cast");
2458 Builder.CreateAlignedStore(Elt, BC, Align(1));
2459 } else if (Name.starts_with("sse.storeu.") ||
2460 Name.starts_with("sse2.storeu.") ||
2461 Name.starts_with("avx.storeu.")) {
2462 Value *Arg0 = CI->getArgOperand(0);
2463 Value *Arg1 = CI->getArgOperand(1);
2464
2465 Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
2466 "cast");
2467 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2468 } else if (Name == "avx512.mask.store.ss") {
2469 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2470 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2471 Mask, false);
2472 } else if (Name.starts_with("avx512.mask.store")) {
2473 // "avx512.mask.storeu." or "avx512.mask.store."
2474 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2475 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2476 CI->getArgOperand(2), Aligned);
2477 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2478 // Upgrade packed integer vector compare intrinsics to compare instructions.
2479 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2480 bool CmpEq = Name[9] == 'e';
2481 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2482 CI->getArgOperand(0), CI->getArgOperand(1));
2483 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2484 } else if (Name.starts_with("avx512.broadcastm")) {
2485 Type *ExtTy = Type::getInt32Ty(C);
2486 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2487 ExtTy = Type::getInt64Ty(C);
2488 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2489 ExtTy->getPrimitiveSizeInBits();
2490 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2491 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2492 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2493 Value *Vec = CI->getArgOperand(0);
2494 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2495 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2496 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2497 } else if (Name.starts_with("avx.sqrt.p") ||
2498 Name.starts_with("sse2.sqrt.p") ||
2499 Name.starts_with("sse.sqrt.p")) {
2500 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2501 {CI->getArgOperand(0)});
2502 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2503 if (CI->arg_size() == 4 &&
2504 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2505 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2506 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2507 : Intrinsic::x86_avx512_sqrt_pd_512;
2508
2509 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2510 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2511 } else {
2512 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2513 {CI->getArgOperand(0)});
2514 }
2515 Rep =
2516 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2517 } else if (Name.starts_with("avx512.ptestm") ||
2518 Name.starts_with("avx512.ptestnm")) {
2519 Value *Op0 = CI->getArgOperand(0);
2520 Value *Op1 = CI->getArgOperand(1);
2521 Value *Mask = CI->getArgOperand(2);
2522 Rep = Builder.CreateAnd(Op0, Op1);
2523 llvm::Type *Ty = Op0->getType();
2525 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2526 ? ICmpInst::ICMP_NE
2527 : ICmpInst::ICMP_EQ;
2528 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2529 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2530 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2531 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2532 ->getNumElements();
2533 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2534 Rep =
2535 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2536 } else if (Name.starts_with("avx512.kunpck")) {
2537 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2538 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2539 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2540 int Indices[64];
2541 for (unsigned i = 0; i != NumElts; ++i)
2542 Indices[i] = i;
2543
2544 // First extract half of each vector. This gives better codegen than
2545 // doing it in a single shuffle.
2546 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2547 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2548 // Concat the vectors.
2549 // NOTE: Operands have to be swapped to match intrinsic definition.
2550 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2551 Rep = Builder.CreateBitCast(Rep, CI->getType());
2552 } else if (Name == "avx512.kand.w") {
2553 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2554 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2555 Rep = Builder.CreateAnd(LHS, RHS);
2556 Rep = Builder.CreateBitCast(Rep, CI->getType());
2557 } else if (Name == "avx512.kandn.w") {
2558 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2559 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2560 LHS = Builder.CreateNot(LHS);
2561 Rep = Builder.CreateAnd(LHS, RHS);
2562 Rep = Builder.CreateBitCast(Rep, CI->getType());
2563 } else if (Name == "avx512.kor.w") {
2564 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2565 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2566 Rep = Builder.CreateOr(LHS, RHS);
2567 Rep = Builder.CreateBitCast(Rep, CI->getType());
2568 } else if (Name == "avx512.kxor.w") {
2569 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2570 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2571 Rep = Builder.CreateXor(LHS, RHS);
2572 Rep = Builder.CreateBitCast(Rep, CI->getType());
2573 } else if (Name == "avx512.kxnor.w") {
2574 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2575 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2576 LHS = Builder.CreateNot(LHS);
2577 Rep = Builder.CreateXor(LHS, RHS);
2578 Rep = Builder.CreateBitCast(Rep, CI->getType());
2579 } else if (Name == "avx512.knot.w") {
2580 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2581 Rep = Builder.CreateNot(Rep);
2582 Rep = Builder.CreateBitCast(Rep, CI->getType());
2583 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2584 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2585 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2586 Rep = Builder.CreateOr(LHS, RHS);
2587 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2588 Value *C;
2589 if (Name[14] == 'c')
2590 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2591 else
2592 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2593 Rep = Builder.CreateICmpEQ(Rep, C);
2594 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2595 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2596 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2597 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2598 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2599 Type *I32Ty = Type::getInt32Ty(C);
2600 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2601 ConstantInt::get(I32Ty, 0));
2602 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2603 ConstantInt::get(I32Ty, 0));
2604 Value *EltOp;
2605 if (Name.contains(".add."))
2606 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2607 else if (Name.contains(".sub."))
2608 EltOp = Builder.CreateFSub(Elt0, Elt1);
2609 else if (Name.contains(".mul."))
2610 EltOp = Builder.CreateFMul(Elt0, Elt1);
2611 else
2612 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2613 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2614 ConstantInt::get(I32Ty, 0));
2615 } else if (Name.starts_with("avx512.mask.pcmp")) {
2616 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2617 bool CmpEq = Name[16] == 'e';
2618 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2619 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2620 Type *OpTy = CI->getArgOperand(0)->getType();
2621 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2622 Intrinsic::ID IID;
2623 switch (VecWidth) {
2624 default:
2625 llvm_unreachable("Unexpected intrinsic");
2626 case 128:
2627 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2628 break;
2629 case 256:
2630 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2631 break;
2632 case 512:
2633 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2634 break;
2635 }
2636
2637 Rep = Builder.CreateIntrinsic(IID, {},
2638 {CI->getOperand(0), CI->getArgOperand(1)});
2639 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2640 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2641 Type *OpTy = CI->getArgOperand(0)->getType();
2642 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2643 unsigned EltWidth = OpTy->getScalarSizeInBits();
2644 Intrinsic::ID IID;
2645 if (VecWidth == 128 && EltWidth == 32)
2646 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2647 else if (VecWidth == 256 && EltWidth == 32)
2648 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2649 else if (VecWidth == 512 && EltWidth == 32)
2650 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2651 else if (VecWidth == 128 && EltWidth == 64)
2652 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2653 else if (VecWidth == 256 && EltWidth == 64)
2654 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2655 else if (VecWidth == 512 && EltWidth == 64)
2656 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2657 else
2658 llvm_unreachable("Unexpected intrinsic");
2659
2660 Rep = Builder.CreateIntrinsic(IID, {},
2661 {CI->getOperand(0), CI->getArgOperand(1)});
2662 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2663 } else if (Name.starts_with("avx512.cmp.p")) {
2664 SmallVector<Value *, 4> Args(CI->args());
2665 Type *OpTy = Args[0]->getType();
2666 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2667 unsigned EltWidth = OpTy->getScalarSizeInBits();
2668 Intrinsic::ID IID;
2669 if (VecWidth == 128 && EltWidth == 32)
2670 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2671 else if (VecWidth == 256 && EltWidth == 32)
2672 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2673 else if (VecWidth == 512 && EltWidth == 32)
2674 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2675 else if (VecWidth == 128 && EltWidth == 64)
2676 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2677 else if (VecWidth == 256 && EltWidth == 64)
2678 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2679 else if (VecWidth == 512 && EltWidth == 64)
2680 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2681 else
2682 llvm_unreachable("Unexpected intrinsic");
2683
2685 if (VecWidth == 512)
2686 std::swap(Mask, Args.back());
2687 Args.push_back(Mask);
2688
2689 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2690 } else if (Name.starts_with("avx512.mask.cmp.")) {
2691 // Integer compare intrinsics.
2692 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2693 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2694 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2695 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2696 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2697 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2698 Name.starts_with("avx512.cvtw2mask.") ||
2699 Name.starts_with("avx512.cvtd2mask.") ||
2700 Name.starts_with("avx512.cvtq2mask.")) {
2701 Value *Op = CI->getArgOperand(0);
2702 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2703 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2704 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2705 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2706 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2707 Name.starts_with("avx512.mask.pabs")) {
2708 Rep = upgradeAbs(Builder, *CI);
2709 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2710 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2711 Name.starts_with("avx512.mask.pmaxs")) {
2712 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2713 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2714 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2715 Name.starts_with("avx512.mask.pmaxu")) {
2716 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2717 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2718 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2719 Name.starts_with("avx512.mask.pmins")) {
2720 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2721 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2722 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2723 Name.starts_with("avx512.mask.pminu")) {
2724 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2725 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2726 Name == "avx512.pmulu.dq.512" ||
2727 Name.starts_with("avx512.mask.pmulu.dq.")) {
2728 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2729 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2730 Name == "avx512.pmul.dq.512" ||
2731 Name.starts_with("avx512.mask.pmul.dq.")) {
2732 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2733 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2734 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2735 Rep =
2736 Builder.CreateSIToFP(CI->getArgOperand(1),
2737 cast<VectorType>(CI->getType())->getElementType());
2738 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2739 } else if (Name == "avx512.cvtusi2sd") {
2740 Rep =
2741 Builder.CreateUIToFP(CI->getArgOperand(1),
2742 cast<VectorType>(CI->getType())->getElementType());
2743 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2744 } else if (Name == "sse2.cvtss2sd") {
2745 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2746 Rep = Builder.CreateFPExt(
2747 Rep, cast<VectorType>(CI->getType())->getElementType());
2748 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2749 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2750 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2751 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2752 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2753 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2754 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2755 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2756 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2757 Name == "avx512.mask.cvtqq2ps.256" ||
2758 Name == "avx512.mask.cvtqq2ps.512" ||
2759 Name == "avx512.mask.cvtuqq2ps.256" ||
2760 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2761 Name == "avx.cvt.ps2.pd.256" ||
2762 Name == "avx512.mask.cvtps2pd.128" ||
2763 Name == "avx512.mask.cvtps2pd.256") {
2764 auto *DstTy = cast<FixedVectorType>(CI->getType());
2765 Rep = CI->getArgOperand(0);
2766 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2767
2768 unsigned NumDstElts = DstTy->getNumElements();
2769 if (NumDstElts < SrcTy->getNumElements()) {
2770 assert(NumDstElts == 2 && "Unexpected vector size");
2771 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2772 }
2773
2774 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2775 bool IsUnsigned = Name.contains("cvtu");
2776 if (IsPS2PD)
2777 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2778 else if (CI->arg_size() == 4 &&
2779 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2780 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2781 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2782 : Intrinsic::x86_avx512_sitofp_round;
2783 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2784 {Rep, CI->getArgOperand(3)});
2785 } else {
2786 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2787 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2788 }
2789
2790 if (CI->arg_size() >= 3)
2791 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2792 CI->getArgOperand(1));
2793 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2794 Name.starts_with("vcvtph2ps.")) {
2795 auto *DstTy = cast<FixedVectorType>(CI->getType());
2796 Rep = CI->getArgOperand(0);
2797 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2798 unsigned NumDstElts = DstTy->getNumElements();
2799 if (NumDstElts != SrcTy->getNumElements()) {
2800 assert(NumDstElts == 4 && "Unexpected vector size");
2801 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2802 }
2803 Rep = Builder.CreateBitCast(
2804 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2805 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2806 if (CI->arg_size() >= 3)
2807 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2808 CI->getArgOperand(1));
2809 } else if (Name.starts_with("avx512.mask.load")) {
2810 // "avx512.mask.loadu." or "avx512.mask.load."
2811 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2812 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2813 CI->getArgOperand(2), Aligned);
2814 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2815 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2816 Type *PtrTy = ResultTy->getElementType();
2817
2818 // Cast the pointer to element type.
2819 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2821
2822 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2823 ResultTy->getNumElements());
2824
2825 Rep = Builder.CreateIntrinsic(Intrinsic::masked_expandload, ResultTy,
2826 {Ptr, MaskVec, CI->getOperand(1)});
2827 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2828 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2829 Type *PtrTy = ResultTy->getElementType();
2830
2831 // Cast the pointer to element type.
2832 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2834
2835 Value *MaskVec =
2836 getX86MaskVec(Builder, CI->getArgOperand(2),
2837 cast<FixedVectorType>(ResultTy)->getNumElements());
2838
2839 Rep = Builder.CreateIntrinsic(Intrinsic::masked_compressstore, ResultTy,
2840 {CI->getArgOperand(1), Ptr, MaskVec});
2841 } else if (Name.starts_with("avx512.mask.compress.") ||
2842 Name.starts_with("avx512.mask.expand.")) {
2843 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2844
2845 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2846 ResultTy->getNumElements());
2847
2848 bool IsCompress = Name[12] == 'c';
2849 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2850 : Intrinsic::x86_avx512_mask_expand;
2851 Rep = Builder.CreateIntrinsic(
2852 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2853 } else if (Name.starts_with("xop.vpcom")) {
2854 bool IsSigned;
2855 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2856 Name.ends_with("uq"))
2857 IsSigned = false;
2858 else if (Name.ends_with("b") || Name.ends_with("w") ||
2859 Name.ends_with("d") || Name.ends_with("q"))
2860 IsSigned = true;
2861 else
2862 llvm_unreachable("Unknown suffix");
2863
2864 unsigned Imm;
2865 if (CI->arg_size() == 3) {
2866 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2867 } else {
2868 Name = Name.substr(9); // strip off "xop.vpcom"
2869 if (Name.starts_with("lt"))
2870 Imm = 0;
2871 else if (Name.starts_with("le"))
2872 Imm = 1;
2873 else if (Name.starts_with("gt"))
2874 Imm = 2;
2875 else if (Name.starts_with("ge"))
2876 Imm = 3;
2877 else if (Name.starts_with("eq"))
2878 Imm = 4;
2879 else if (Name.starts_with("ne"))
2880 Imm = 5;
2881 else if (Name.starts_with("false"))
2882 Imm = 6;
2883 else if (Name.starts_with("true"))
2884 Imm = 7;
2885 else
2886 llvm_unreachable("Unknown condition");
2887 }
2888
2889 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2890 } else if (Name.starts_with("xop.vpcmov")) {
2891 Value *Sel = CI->getArgOperand(2);
2892 Value *NotSel = Builder.CreateNot(Sel);
2893 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2894 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2895 Rep = Builder.CreateOr(Sel0, Sel1);
2896 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2897 Name.starts_with("avx512.mask.prol")) {
2898 Rep = upgradeX86Rotate(Builder, *CI, false);
2899 } else if (Name.starts_with("avx512.pror") ||
2900 Name.starts_with("avx512.mask.pror")) {
2901 Rep = upgradeX86Rotate(Builder, *CI, true);
2902 } else if (Name.starts_with("avx512.vpshld.") ||
2903 Name.starts_with("avx512.mask.vpshld") ||
2904 Name.starts_with("avx512.maskz.vpshld")) {
2905 bool ZeroMask = Name[11] == 'z';
2906 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2907 } else if (Name.starts_with("avx512.vpshrd.") ||
2908 Name.starts_with("avx512.mask.vpshrd") ||
2909 Name.starts_with("avx512.maskz.vpshrd")) {
2910 bool ZeroMask = Name[11] == 'z';
2911 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2912 } else if (Name == "sse42.crc32.64.8") {
2913 Value *Trunc0 =
2914 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2915 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8, {},
2916 {Trunc0, CI->getArgOperand(1)});
2917 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2918 } else if (Name.starts_with("avx.vbroadcast.s") ||
2919 Name.starts_with("avx512.vbroadcast.s")) {
2920 // Replace broadcasts with a series of insertelements.
2921 auto *VecTy = cast<FixedVectorType>(CI->getType());
2922 Type *EltTy = VecTy->getElementType();
2923 unsigned EltNum = VecTy->getNumElements();
2924 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2925 Type *I32Ty = Type::getInt32Ty(C);
2926 Rep = PoisonValue::get(VecTy);
2927 for (unsigned I = 0; I < EltNum; ++I)
2928 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2929 } else if (Name.starts_with("sse41.pmovsx") ||
2930 Name.starts_with("sse41.pmovzx") ||
2931 Name.starts_with("avx2.pmovsx") ||
2932 Name.starts_with("avx2.pmovzx") ||
2933 Name.starts_with("avx512.mask.pmovsx") ||
2934 Name.starts_with("avx512.mask.pmovzx")) {
2935 auto *DstTy = cast<FixedVectorType>(CI->getType());
2936 unsigned NumDstElts = DstTy->getNumElements();
2937
2938 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2939 SmallVector<int, 8> ShuffleMask(NumDstElts);
2940 for (unsigned i = 0; i != NumDstElts; ++i)
2941 ShuffleMask[i] = i;
2942
2943 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2944
2945 bool DoSext = Name.contains("pmovsx");
2946 Rep =
2947 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2948 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2949 if (CI->arg_size() == 3)
2950 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2951 CI->getArgOperand(1));
2952 } else if (Name == "avx512.mask.pmov.qd.256" ||
2953 Name == "avx512.mask.pmov.qd.512" ||
2954 Name == "avx512.mask.pmov.wb.256" ||
2955 Name == "avx512.mask.pmov.wb.512") {
2956 Type *Ty = CI->getArgOperand(1)->getType();
2957 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2958 Rep =
2959 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2960 } else if (Name.starts_with("avx.vbroadcastf128") ||
2961 Name == "avx2.vbroadcasti128") {
2962 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2963 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2964 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2965 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2966 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2967 PointerType::getUnqual(VT));
2968 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2969 if (NumSrcElts == 2)
2970 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2971 else
2972 Rep = Builder.CreateShuffleVector(Load,
2973 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2974 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2975 Name.starts_with("avx512.mask.shuf.f")) {
2976 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2977 Type *VT = CI->getType();
2978 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2979 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2980 unsigned ControlBitsMask = NumLanes - 1;
2981 unsigned NumControlBits = NumLanes / 2;
2982 SmallVector<int, 8> ShuffleMask(0);
2983
2984 for (unsigned l = 0; l != NumLanes; ++l) {
2985 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2986 // We actually need the other source.
2987 if (l >= NumLanes / 2)
2988 LaneMask += NumLanes;
2989 for (unsigned i = 0; i != NumElementsInLane; ++i)
2990 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2991 }
2992 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2993 CI->getArgOperand(1), ShuffleMask);
2994 Rep =
2995 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2996 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2997 Name.starts_with("avx512.mask.broadcasti")) {
2998 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2999 ->getNumElements();
3000 unsigned NumDstElts =
3001 cast<FixedVectorType>(CI->getType())->getNumElements();
3002
3003 SmallVector<int, 8> ShuffleMask(NumDstElts);
3004 for (unsigned i = 0; i != NumDstElts; ++i)
3005 ShuffleMask[i] = i % NumSrcElts;
3006
3007 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3008 CI->getArgOperand(0), ShuffleMask);
3009 Rep =
3010 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3011 } else if (Name.starts_with("avx2.pbroadcast") ||
3012 Name.starts_with("avx2.vbroadcast") ||
3013 Name.starts_with("avx512.pbroadcast") ||
3014 Name.starts_with("avx512.mask.broadcast.s")) {
3015 // Replace vp?broadcasts with a vector shuffle.
3016 Value *Op = CI->getArgOperand(0);
3017 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3018 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3021 Rep = Builder.CreateShuffleVector(Op, M);
3022
3023 if (CI->arg_size() == 3)
3024 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3025 CI->getArgOperand(1));
3026 } else if (Name.starts_with("sse2.padds.") ||
3027 Name.starts_with("avx2.padds.") ||
3028 Name.starts_with("avx512.padds.") ||
3029 Name.starts_with("avx512.mask.padds.")) {
3030 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3031 } else if (Name.starts_with("sse2.psubs.") ||
3032 Name.starts_with("avx2.psubs.") ||
3033 Name.starts_with("avx512.psubs.") ||
3034 Name.starts_with("avx512.mask.psubs.")) {
3035 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3036 } else if (Name.starts_with("sse2.paddus.") ||
3037 Name.starts_with("avx2.paddus.") ||
3038 Name.starts_with("avx512.mask.paddus.")) {
3039 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3040 } else if (Name.starts_with("sse2.psubus.") ||
3041 Name.starts_with("avx2.psubus.") ||
3042 Name.starts_with("avx512.mask.psubus.")) {
3043 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3044 } else if (Name.starts_with("avx512.mask.palignr.")) {
3045 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3046 CI->getArgOperand(1), CI->getArgOperand(2),
3047 CI->getArgOperand(3), CI->getArgOperand(4),
3048 false);
3049 } else if (Name.starts_with("avx512.mask.valign.")) {
3051 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3052 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3053 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3054 // 128/256-bit shift left specified in bits.
3055 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3056 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3057 Shift / 8); // Shift is in bits.
3058 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3059 // 128/256-bit shift right specified in bits.
3060 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3061 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3062 Shift / 8); // Shift is in bits.
3063 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3064 Name == "avx512.psll.dq.512") {
3065 // 128/256/512-bit shift left specified in bytes.
3066 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3067 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3068 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3069 Name == "avx512.psrl.dq.512") {
3070 // 128/256/512-bit shift right specified in bytes.
3071 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3072 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3073 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3074 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3075 Name.starts_with("avx2.pblendd.")) {
3076 Value *Op0 = CI->getArgOperand(0);
3077 Value *Op1 = CI->getArgOperand(1);
3078 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3079 auto *VecTy = cast<FixedVectorType>(CI->getType());
3080 unsigned NumElts = VecTy->getNumElements();
3081
3082 SmallVector<int, 16> Idxs(NumElts);
3083 for (unsigned i = 0; i != NumElts; ++i)
3084 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3085
3086 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3087 } else if (Name.starts_with("avx.vinsertf128.") ||
3088 Name == "avx2.vinserti128" ||
3089 Name.starts_with("avx512.mask.insert")) {
3090 Value *Op0 = CI->getArgOperand(0);
3091 Value *Op1 = CI->getArgOperand(1);
3092 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3093 unsigned DstNumElts =
3094 cast<FixedVectorType>(CI->getType())->getNumElements();
3095 unsigned SrcNumElts =
3096 cast<FixedVectorType>(Op1->getType())->getNumElements();
3097 unsigned Scale = DstNumElts / SrcNumElts;
3098
3099 // Mask off the high bits of the immediate value; hardware ignores those.
3100 Imm = Imm % Scale;
3101
3102 // Extend the second operand into a vector the size of the destination.
3103 SmallVector<int, 8> Idxs(DstNumElts);
3104 for (unsigned i = 0; i != SrcNumElts; ++i)
3105 Idxs[i] = i;
3106 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3107 Idxs[i] = SrcNumElts;
3108 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3109
3110 // Insert the second operand into the first operand.
3111
3112 // Note that there is no guarantee that instruction lowering will actually
3113 // produce a vinsertf128 instruction for the created shuffles. In
3114 // particular, the 0 immediate case involves no lane changes, so it can
3115 // be handled as a blend.
3116
3117 // Example of shuffle mask for 32-bit elements:
3118 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3119 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3120
3121 // First fill with identify mask.
3122 for (unsigned i = 0; i != DstNumElts; ++i)
3123 Idxs[i] = i;
3124 // Then replace the elements where we need to insert.
3125 for (unsigned i = 0; i != SrcNumElts; ++i)
3126 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3127 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3128
3129 // If the intrinsic has a mask operand, handle that.
3130 if (CI->arg_size() == 5)
3131 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3132 CI->getArgOperand(3));
3133 } else if (Name.starts_with("avx.vextractf128.") ||
3134 Name == "avx2.vextracti128" ||
3135 Name.starts_with("avx512.mask.vextract")) {
3136 Value *Op0 = CI->getArgOperand(0);
3137 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3138 unsigned DstNumElts =
3139 cast<FixedVectorType>(CI->getType())->getNumElements();
3140 unsigned SrcNumElts =
3141 cast<FixedVectorType>(Op0->getType())->getNumElements();
3142 unsigned Scale = SrcNumElts / DstNumElts;
3143
3144 // Mask off the high bits of the immediate value; hardware ignores those.
3145 Imm = Imm % Scale;
3146
3147 // Get indexes for the subvector of the input vector.
3148 SmallVector<int, 8> Idxs(DstNumElts);
3149 for (unsigned i = 0; i != DstNumElts; ++i) {
3150 Idxs[i] = i + (Imm * DstNumElts);
3151 }
3152 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3153
3154 // If the intrinsic has a mask operand, handle that.
3155 if (CI->arg_size() == 4)
3156 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3157 CI->getArgOperand(2));
3158 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3159 Name.starts_with("avx512.mask.perm.di.")) {
3160 Value *Op0 = CI->getArgOperand(0);
3161 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3162 auto *VecTy = cast<FixedVectorType>(CI->getType());
3163 unsigned NumElts = VecTy->getNumElements();
3164
3165 SmallVector<int, 8> Idxs(NumElts);
3166 for (unsigned i = 0; i != NumElts; ++i)
3167 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3168
3169 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3170
3171 if (CI->arg_size() == 4)
3172 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3173 CI->getArgOperand(2));
3174 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3175 // The immediate permute control byte looks like this:
3176 // [1:0] - select 128 bits from sources for low half of destination
3177 // [2] - ignore
3178 // [3] - zero low half of destination
3179 // [5:4] - select 128 bits from sources for high half of destination
3180 // [6] - ignore
3181 // [7] - zero high half of destination
3182
3183 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3184
3185 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3186 unsigned HalfSize = NumElts / 2;
3187 SmallVector<int, 8> ShuffleMask(NumElts);
3188
3189 // Determine which operand(s) are actually in use for this instruction.
3190 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3191 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3192
3193 // If needed, replace operands based on zero mask.
3194 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3195 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3196
3197 // Permute low half of result.
3198 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3199 for (unsigned i = 0; i < HalfSize; ++i)
3200 ShuffleMask[i] = StartIndex + i;
3201
3202 // Permute high half of result.
3203 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3204 for (unsigned i = 0; i < HalfSize; ++i)
3205 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3206
3207 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3208
3209 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3210 Name.starts_with("avx512.mask.vpermil.p") ||
3211 Name.starts_with("avx512.mask.pshuf.d.")) {
3212 Value *Op0 = CI->getArgOperand(0);
3213 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3214 auto *VecTy = cast<FixedVectorType>(CI->getType());
3215 unsigned NumElts = VecTy->getNumElements();
3216 // Calculate the size of each index in the immediate.
3217 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3218 unsigned IdxMask = ((1 << IdxSize) - 1);
3219
3220 SmallVector<int, 8> Idxs(NumElts);
3221 // Lookup the bits for this element, wrapping around the immediate every
3222 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3223 // to offset by the first index of each group.
3224 for (unsigned i = 0; i != NumElts; ++i)
3225 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3226
3227 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3228
3229 if (CI->arg_size() == 4)
3230 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3231 CI->getArgOperand(2));
3232 } else if (Name == "sse2.pshufl.w" ||
3233 Name.starts_with("avx512.mask.pshufl.w.")) {
3234 Value *Op0 = CI->getArgOperand(0);
3235 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3236 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3237
3238 SmallVector<int, 16> Idxs(NumElts);
3239 for (unsigned l = 0; l != NumElts; l += 8) {
3240 for (unsigned i = 0; i != 4; ++i)
3241 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3242 for (unsigned i = 4; i != 8; ++i)
3243 Idxs[i + l] = i + l;
3244 }
3245
3246 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3247
3248 if (CI->arg_size() == 4)
3249 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3250 CI->getArgOperand(2));
3251 } else if (Name == "sse2.pshufh.w" ||
3252 Name.starts_with("avx512.mask.pshufh.w.")) {
3253 Value *Op0 = CI->getArgOperand(0);
3254 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3255 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3256
3257 SmallVector<int, 16> Idxs(NumElts);
3258 for (unsigned l = 0; l != NumElts; l += 8) {
3259 for (unsigned i = 0; i != 4; ++i)
3260 Idxs[i + l] = i + l;
3261 for (unsigned i = 0; i != 4; ++i)
3262 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3263 }
3264
3265 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3266
3267 if (CI->arg_size() == 4)
3268 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3269 CI->getArgOperand(2));
3270 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3271 Value *Op0 = CI->getArgOperand(0);
3272 Value *Op1 = CI->getArgOperand(1);
3273 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3274 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3275
3276 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3277 unsigned HalfLaneElts = NumLaneElts / 2;
3278
3279 SmallVector<int, 16> Idxs(NumElts);
3280 for (unsigned i = 0; i != NumElts; ++i) {
3281 // Base index is the starting element of the lane.
3282 Idxs[i] = i - (i % NumLaneElts);
3283 // If we are half way through the lane switch to the other source.
3284 if ((i % NumLaneElts) >= HalfLaneElts)
3285 Idxs[i] += NumElts;
3286 // Now select the specific element. By adding HalfLaneElts bits from
3287 // the immediate. Wrapping around the immediate every 8-bits.
3288 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3289 }
3290
3291 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3292
3293 Rep =
3294 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3295 } else if (Name.starts_with("avx512.mask.movddup") ||
3296 Name.starts_with("avx512.mask.movshdup") ||
3297 Name.starts_with("avx512.mask.movsldup")) {
3298 Value *Op0 = CI->getArgOperand(0);
3299 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3300 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3301
3302 unsigned Offset = 0;
3303 if (Name.starts_with("avx512.mask.movshdup."))
3304 Offset = 1;
3305
3306 SmallVector<int, 16> Idxs(NumElts);
3307 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3308 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3309 Idxs[i + l + 0] = i + l + Offset;
3310 Idxs[i + l + 1] = i + l + Offset;
3311 }
3312
3313 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3314
3315 Rep =
3316 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3317 } else if (Name.starts_with("avx512.mask.punpckl") ||
3318 Name.starts_with("avx512.mask.unpckl.")) {
3319 Value *Op0 = CI->getArgOperand(0);
3320 Value *Op1 = CI->getArgOperand(1);
3321 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3322 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3323
3324 SmallVector<int, 64> Idxs(NumElts);
3325 for (int l = 0; l != NumElts; l += NumLaneElts)
3326 for (int i = 0; i != NumLaneElts; ++i)
3327 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3328
3329 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3330
3331 Rep =
3332 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3333 } else if (Name.starts_with("avx512.mask.punpckh") ||
3334 Name.starts_with("avx512.mask.unpckh.")) {
3335 Value *Op0 = CI->getArgOperand(0);
3336 Value *Op1 = CI->getArgOperand(1);
3337 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3338 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3339
3340 SmallVector<int, 64> Idxs(NumElts);
3341 for (int l = 0; l != NumElts; l += NumLaneElts)
3342 for (int i = 0; i != NumLaneElts; ++i)
3343 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3344
3345 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3346
3347 Rep =
3348 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3349 } else if (Name.starts_with("avx512.mask.and.") ||
3350 Name.starts_with("avx512.mask.pand.")) {
3351 VectorType *FTy = cast<VectorType>(CI->getType());
3352 VectorType *ITy = VectorType::getInteger(FTy);
3353 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3354 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3355 Rep = Builder.CreateBitCast(Rep, FTy);
3356 Rep =
3357 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3358 } else if (Name.starts_with("avx512.mask.andn.") ||
3359 Name.starts_with("avx512.mask.pandn.")) {
3360 VectorType *FTy = cast<VectorType>(CI->getType());
3361 VectorType *ITy = VectorType::getInteger(FTy);
3362 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3363 Rep = Builder.CreateAnd(Rep,
3364 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3365 Rep = Builder.CreateBitCast(Rep, FTy);
3366 Rep =
3367 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3368 } else if (Name.starts_with("avx512.mask.or.") ||
3369 Name.starts_with("avx512.mask.por.")) {
3370 VectorType *FTy = cast<VectorType>(CI->getType());
3371 VectorType *ITy = VectorType::getInteger(FTy);
3372 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3373 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3374 Rep = Builder.CreateBitCast(Rep, FTy);
3375 Rep =
3376 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3377 } else if (Name.starts_with("avx512.mask.xor.") ||
3378 Name.starts_with("avx512.mask.pxor.")) {
3379 VectorType *FTy = cast<VectorType>(CI->getType());
3380 VectorType *ITy = VectorType::getInteger(FTy);
3381 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3382 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3383 Rep = Builder.CreateBitCast(Rep, FTy);
3384 Rep =
3385 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3386 } else if (Name.starts_with("avx512.mask.padd.")) {
3387 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3388 Rep =
3389 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3390 } else if (Name.starts_with("avx512.mask.psub.")) {
3391 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3392 Rep =
3393 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3394 } else if (Name.starts_with("avx512.mask.pmull.")) {
3395 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3396 Rep =
3397 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3398 } else if (Name.starts_with("avx512.mask.add.p")) {
3399 if (Name.ends_with(".512")) {
3400 Intrinsic::ID IID;
3401 if (Name[17] == 's')
3402 IID = Intrinsic::x86_avx512_add_ps_512;
3403 else
3404 IID = Intrinsic::x86_avx512_add_pd_512;
3405
3406 Rep = Builder.CreateIntrinsic(
3407 IID, {},
3408 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3409 } else {
3410 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3411 }
3412 Rep =
3413 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3414 } else if (Name.starts_with("avx512.mask.div.p")) {
3415 if (Name.ends_with(".512")) {
3416 Intrinsic::ID IID;
3417 if (Name[17] == 's')
3418 IID = Intrinsic::x86_avx512_div_ps_512;
3419 else
3420 IID = Intrinsic::x86_avx512_div_pd_512;
3421
3422 Rep = Builder.CreateIntrinsic(
3423 IID, {},
3424 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3425 } else {
3426 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3427 }
3428 Rep =
3429 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3430 } else if (Name.starts_with("avx512.mask.mul.p")) {
3431 if (Name.ends_with(".512")) {
3432 Intrinsic::ID IID;
3433 if (Name[17] == 's')
3434 IID = Intrinsic::x86_avx512_mul_ps_512;
3435 else
3436 IID = Intrinsic::x86_avx512_mul_pd_512;
3437
3438 Rep = Builder.CreateIntrinsic(
3439 IID, {},
3440 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3441 } else {
3442 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3443 }
3444 Rep =
3445 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3446 } else if (Name.starts_with("avx512.mask.sub.p")) {
3447 if (Name.ends_with(".512")) {
3448 Intrinsic::ID IID;
3449 if (Name[17] == 's')
3450 IID = Intrinsic::x86_avx512_sub_ps_512;
3451 else
3452 IID = Intrinsic::x86_avx512_sub_pd_512;
3453
3454 Rep = Builder.CreateIntrinsic(
3455 IID, {},
3456 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3457 } else {
3458 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3459 }
3460 Rep =
3461 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3462 } else if ((Name.starts_with("avx512.mask.max.p") ||
3463 Name.starts_with("avx512.mask.min.p")) &&
3464 Name.drop_front(18) == ".512") {
3465 bool IsDouble = Name[17] == 'd';
3466 bool IsMin = Name[13] == 'i';
3467 static const Intrinsic::ID MinMaxTbl[2][2] = {
3468 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3469 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3470 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3471
3472 Rep = Builder.CreateIntrinsic(
3473 IID, {},
3474 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3475 Rep =
3476 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3477 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3478 Rep =
3479 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3480 {CI->getArgOperand(0), Builder.getInt1(false)});
3481 Rep =
3482 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3483 } else if (Name.starts_with("avx512.mask.psll")) {
3484 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3485 bool IsVariable = Name[16] == 'v';
3486 char Size = Name[16] == '.' ? Name[17]
3487 : Name[17] == '.' ? Name[18]
3488 : Name[18] == '.' ? Name[19]
3489 : Name[20];
3490
3491 Intrinsic::ID IID;
3492 if (IsVariable && Name[17] != '.') {
3493 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3494 IID = Intrinsic::x86_avx2_psllv_q;
3495 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3496 IID = Intrinsic::x86_avx2_psllv_q_256;
3497 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3498 IID = Intrinsic::x86_avx2_psllv_d;
3499 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3500 IID = Intrinsic::x86_avx2_psllv_d_256;
3501 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3502 IID = Intrinsic::x86_avx512_psllv_w_128;
3503 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3504 IID = Intrinsic::x86_avx512_psllv_w_256;
3505 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3506 IID = Intrinsic::x86_avx512_psllv_w_512;
3507 else
3508 llvm_unreachable("Unexpected size");
3509 } else if (Name.ends_with(".128")) {
3510 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3511 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3512 : Intrinsic::x86_sse2_psll_d;
3513 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3514 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3515 : Intrinsic::x86_sse2_psll_q;
3516 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3517 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3518 : Intrinsic::x86_sse2_psll_w;
3519 else
3520 llvm_unreachable("Unexpected size");
3521 } else if (Name.ends_with(".256")) {
3522 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3523 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3524 : Intrinsic::x86_avx2_psll_d;
3525 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3526 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3527 : Intrinsic::x86_avx2_psll_q;
3528 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3529 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3530 : Intrinsic::x86_avx2_psll_w;
3531 else
3532 llvm_unreachable("Unexpected size");
3533 } else {
3534 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3535 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3536 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3537 : Intrinsic::x86_avx512_psll_d_512;
3538 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3539 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3540 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3541 : Intrinsic::x86_avx512_psll_q_512;
3542 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3543 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3544 : Intrinsic::x86_avx512_psll_w_512;
3545 else
3546 llvm_unreachable("Unexpected size");
3547 }
3548
3549 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3550 } else if (Name.starts_with("avx512.mask.psrl")) {
3551 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3552 bool IsVariable = Name[16] == 'v';
3553 char Size = Name[16] == '.' ? Name[17]
3554 : Name[17] == '.' ? Name[18]
3555 : Name[18] == '.' ? Name[19]
3556 : Name[20];
3557
3558 Intrinsic::ID IID;
3559 if (IsVariable && Name[17] != '.') {
3560 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3561 IID = Intrinsic::x86_avx2_psrlv_q;
3562 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3563 IID = Intrinsic::x86_avx2_psrlv_q_256;
3564 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3565 IID = Intrinsic::x86_avx2_psrlv_d;
3566 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3567 IID = Intrinsic::x86_avx2_psrlv_d_256;
3568 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3569 IID = Intrinsic::x86_avx512_psrlv_w_128;
3570 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3571 IID = Intrinsic::x86_avx512_psrlv_w_256;
3572 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3573 IID = Intrinsic::x86_avx512_psrlv_w_512;
3574 else
3575 llvm_unreachable("Unexpected size");
3576 } else if (Name.ends_with(".128")) {
3577 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3578 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3579 : Intrinsic::x86_sse2_psrl_d;
3580 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3581 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3582 : Intrinsic::x86_sse2_psrl_q;
3583 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3584 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3585 : Intrinsic::x86_sse2_psrl_w;
3586 else
3587 llvm_unreachable("Unexpected size");
3588 } else if (Name.ends_with(".256")) {
3589 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3590 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3591 : Intrinsic::x86_avx2_psrl_d;
3592 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3593 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3594 : Intrinsic::x86_avx2_psrl_q;
3595 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3596 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3597 : Intrinsic::x86_avx2_psrl_w;
3598 else
3599 llvm_unreachable("Unexpected size");
3600 } else {
3601 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3602 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3603 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3604 : Intrinsic::x86_avx512_psrl_d_512;
3605 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3606 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3607 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3608 : Intrinsic::x86_avx512_psrl_q_512;
3609 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3610 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3611 : Intrinsic::x86_avx512_psrl_w_512;
3612 else
3613 llvm_unreachable("Unexpected size");
3614 }
3615
3616 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3617 } else if (Name.starts_with("avx512.mask.psra")) {
3618 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3619 bool IsVariable = Name[16] == 'v';
3620 char Size = Name[16] == '.' ? Name[17]
3621 : Name[17] == '.' ? Name[18]
3622 : Name[18] == '.' ? Name[19]
3623 : Name[20];
3624
3625 Intrinsic::ID IID;
3626 if (IsVariable && Name[17] != '.') {
3627 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3628 IID = Intrinsic::x86_avx2_psrav_d;
3629 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3630 IID = Intrinsic::x86_avx2_psrav_d_256;
3631 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3632 IID = Intrinsic::x86_avx512_psrav_w_128;
3633 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3634 IID = Intrinsic::x86_avx512_psrav_w_256;
3635 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3636 IID = Intrinsic::x86_avx512_psrav_w_512;
3637 else
3638 llvm_unreachable("Unexpected size");
3639 } else if (Name.ends_with(".128")) {
3640 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3641 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3642 : Intrinsic::x86_sse2_psra_d;
3643 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3644 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3645 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3646 : Intrinsic::x86_avx512_psra_q_128;
3647 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3648 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3649 : Intrinsic::x86_sse2_psra_w;
3650 else
3651 llvm_unreachable("Unexpected size");
3652 } else if (Name.ends_with(".256")) {
3653 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3654 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3655 : Intrinsic::x86_avx2_psra_d;
3656 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3657 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3658 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3659 : Intrinsic::x86_avx512_psra_q_256;
3660 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3661 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3662 : Intrinsic::x86_avx2_psra_w;
3663 else
3664 llvm_unreachable("Unexpected size");
3665 } else {
3666 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3667 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3668 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3669 : Intrinsic::x86_avx512_psra_d_512;
3670 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3671 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3672 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3673 : Intrinsic::x86_avx512_psra_q_512;
3674 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3675 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3676 : Intrinsic::x86_avx512_psra_w_512;
3677 else
3678 llvm_unreachable("Unexpected size");
3679 }
3680
3681 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3682 } else if (Name.starts_with("avx512.mask.move.s")) {
3683 Rep = upgradeMaskedMove(Builder, *CI);
3684 } else if (Name.starts_with("avx512.cvtmask2")) {
3685 Rep = upgradeMaskToInt(Builder, *CI);
3686 } else if (Name.ends_with(".movntdqa")) {
3688 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3689
3690 Value *Ptr = CI->getArgOperand(0);
3691
3692 // Convert the type of the pointer to a pointer to the stored type.
3693 Value *BC = Builder.CreateBitCast(
3694 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3695 LoadInst *LI = Builder.CreateAlignedLoad(
3696 CI->getType(), BC,
3698 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3699 Rep = LI;
3700 } else if (Name.starts_with("fma.vfmadd.") ||
3701 Name.starts_with("fma.vfmsub.") ||
3702 Name.starts_with("fma.vfnmadd.") ||
3703 Name.starts_with("fma.vfnmsub.")) {
3704 bool NegMul = Name[6] == 'n';
3705 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3706 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3707
3708 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3709 CI->getArgOperand(2)};
3710
3711 if (IsScalar) {
3712 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3713 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3714 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3715 }
3716
3717 if (NegMul && !IsScalar)
3718 Ops[0] = Builder.CreateFNeg(Ops[0]);
3719 if (NegMul && IsScalar)
3720 Ops[1] = Builder.CreateFNeg(Ops[1]);
3721 if (NegAcc)
3722 Ops[2] = Builder.CreateFNeg(Ops[2]);
3723
3724 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3725
3726 if (IsScalar)
3727 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3728 } else if (Name.starts_with("fma4.vfmadd.s")) {
3729 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3730 CI->getArgOperand(2)};
3731
3732 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3733 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3734 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3735
3736 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3737
3739 Rep, (uint64_t)0);
3740 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3741 Name.starts_with("avx512.maskz.vfmadd.s") ||
3742 Name.starts_with("avx512.mask3.vfmadd.s") ||
3743 Name.starts_with("avx512.mask3.vfmsub.s") ||
3744 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3745 bool IsMask3 = Name[11] == '3';
3746 bool IsMaskZ = Name[11] == 'z';
3747 // Drop the "avx512.mask." to make it easier.
3748 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3749 bool NegMul = Name[2] == 'n';
3750 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3751
3752 Value *A = CI->getArgOperand(0);
3753 Value *B = CI->getArgOperand(1);
3754 Value *C = CI->getArgOperand(2);
3755
3756 if (NegMul && (IsMask3 || IsMaskZ))
3757 A = Builder.CreateFNeg(A);
3758 if (NegMul && !(IsMask3 || IsMaskZ))
3759 B = Builder.CreateFNeg(B);
3760 if (NegAcc)
3761 C = Builder.CreateFNeg(C);
3762
3763 A = Builder.CreateExtractElement(A, (uint64_t)0);
3764 B = Builder.CreateExtractElement(B, (uint64_t)0);
3765 C = Builder.CreateExtractElement(C, (uint64_t)0);
3766
3767 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3768 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3769 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3770
3771 Intrinsic::ID IID;
3772 if (Name.back() == 'd')
3773 IID = Intrinsic::x86_avx512_vfmadd_f64;
3774 else
3775 IID = Intrinsic::x86_avx512_vfmadd_f32;
3776 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3777 } else {
3778 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3779 }
3780
3781 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3782 : IsMask3 ? C
3783 : A;
3784
3785 // For Mask3 with NegAcc, we need to create a new extractelement that
3786 // avoids the negation above.
3787 if (NegAcc && IsMask3)
3788 PassThru =
3789 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3790
3791 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3792 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3793 (uint64_t)0);
3794 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3795 Name.starts_with("avx512.mask.vfnmadd.p") ||
3796 Name.starts_with("avx512.mask.vfnmsub.p") ||
3797 Name.starts_with("avx512.mask3.vfmadd.p") ||
3798 Name.starts_with("avx512.mask3.vfmsub.p") ||
3799 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3800 Name.starts_with("avx512.maskz.vfmadd.p")) {
3801 bool IsMask3 = Name[11] == '3';
3802 bool IsMaskZ = Name[11] == 'z';
3803 // Drop the "avx512.mask." to make it easier.
3804 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3805 bool NegMul = Name[2] == 'n';
3806 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3807
3808 Value *A = CI->getArgOperand(0);
3809 Value *B = CI->getArgOperand(1);
3810 Value *C = CI->getArgOperand(2);
3811
3812 if (NegMul && (IsMask3 || IsMaskZ))
3813 A = Builder.CreateFNeg(A);
3814 if (NegMul && !(IsMask3 || IsMaskZ))
3815 B = Builder.CreateFNeg(B);
3816 if (NegAcc)
3817 C = Builder.CreateFNeg(C);
3818
3819 if (CI->arg_size() == 5 &&
3820 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3821 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3822 Intrinsic::ID IID;
3823 // Check the character before ".512" in string.
3824 if (Name[Name.size() - 5] == 's')
3825 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3826 else
3827 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3828
3829 Rep = Builder.CreateIntrinsic(IID, {}, {A, B, C, CI->getArgOperand(4)});
3830 } else {
3831 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3832 }
3833
3834 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3835 : IsMask3 ? CI->getArgOperand(2)
3836 : CI->getArgOperand(0);
3837
3838 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3839 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3840 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3841 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3842 Intrinsic::ID IID;
3843 if (VecWidth == 128 && EltWidth == 32)
3844 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3845 else if (VecWidth == 256 && EltWidth == 32)
3846 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3847 else if (VecWidth == 128 && EltWidth == 64)
3848 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3849 else if (VecWidth == 256 && EltWidth == 64)
3850 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3851 else
3852 llvm_unreachable("Unexpected intrinsic");
3853
3854 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3855 CI->getArgOperand(2)};
3856 Ops[2] = Builder.CreateFNeg(Ops[2]);
3857 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3858 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3859 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3860 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3861 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3862 bool IsMask3 = Name[11] == '3';
3863 bool IsMaskZ = Name[11] == 'z';
3864 // Drop the "avx512.mask." to make it easier.
3865 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3866 bool IsSubAdd = Name[3] == 's';
3867 if (CI->arg_size() == 5) {
3868 Intrinsic::ID IID;
3869 // Check the character before ".512" in string.
3870 if (Name[Name.size() - 5] == 's')
3871 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3872 else
3873 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3874
3875 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3876 CI->getArgOperand(2), CI->getArgOperand(4)};
3877 if (IsSubAdd)
3878 Ops[2] = Builder.CreateFNeg(Ops[2]);
3879
3880 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3881 } else {
3882 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3883
3884 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3885 CI->getArgOperand(2)};
3886
3888 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
3889 Value *Odd = Builder.CreateCall(FMA, Ops);
3890 Ops[2] = Builder.CreateFNeg(Ops[2]);
3891 Value *Even = Builder.CreateCall(FMA, Ops);
3892
3893 if (IsSubAdd)
3894 std::swap(Even, Odd);
3895
3896 SmallVector<int, 32> Idxs(NumElts);
3897 for (int i = 0; i != NumElts; ++i)
3898 Idxs[i] = i + (i % 2) * NumElts;
3899
3900 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3901 }
3902
3903 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3904 : IsMask3 ? CI->getArgOperand(2)
3905 : CI->getArgOperand(0);
3906
3907 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3908 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3909 Name.starts_with("avx512.maskz.pternlog.")) {
3910 bool ZeroMask = Name[11] == 'z';
3911 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3912 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3913 Intrinsic::ID IID;
3914 if (VecWidth == 128 && EltWidth == 32)
3915 IID = Intrinsic::x86_avx512_pternlog_d_128;
3916 else if (VecWidth == 256 && EltWidth == 32)
3917 IID = Intrinsic::x86_avx512_pternlog_d_256;
3918 else if (VecWidth == 512 && EltWidth == 32)
3919 IID = Intrinsic::x86_avx512_pternlog_d_512;
3920 else if (VecWidth == 128 && EltWidth == 64)
3921 IID = Intrinsic::x86_avx512_pternlog_q_128;
3922 else if (VecWidth == 256 && EltWidth == 64)
3923 IID = Intrinsic::x86_avx512_pternlog_q_256;
3924 else if (VecWidth == 512 && EltWidth == 64)
3925 IID = Intrinsic::x86_avx512_pternlog_q_512;
3926 else
3927 llvm_unreachable("Unexpected intrinsic");
3928
3929 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3930 CI->getArgOperand(2), CI->getArgOperand(3)};
3931 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3932 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3933 : CI->getArgOperand(0);
3934 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3935 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3936 Name.starts_with("avx512.maskz.vpmadd52")) {
3937 bool ZeroMask = Name[11] == 'z';
3938 bool High = Name[20] == 'h' || Name[21] == 'h';
3939 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3940 Intrinsic::ID IID;
3941 if (VecWidth == 128 && !High)
3942 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3943 else if (VecWidth == 256 && !High)
3944 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3945 else if (VecWidth == 512 && !High)
3946 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3947 else if (VecWidth == 128 && High)
3948 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3949 else if (VecWidth == 256 && High)
3950 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3951 else if (VecWidth == 512 && High)
3952 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3953 else
3954 llvm_unreachable("Unexpected intrinsic");
3955
3956 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3957 CI->getArgOperand(2)};
3958 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3959 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3960 : CI->getArgOperand(0);
3961 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3962 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3963 Name.starts_with("avx512.mask.vpermt2var.") ||
3964 Name.starts_with("avx512.maskz.vpermt2var.")) {
3965 bool ZeroMask = Name[11] == 'z';
3966 bool IndexForm = Name[17] == 'i';
3967 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3968 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3969 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3970 Name.starts_with("avx512.mask.vpdpbusds.") ||
3971 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3972 bool ZeroMask = Name[11] == 'z';
3973 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3974 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3975 Intrinsic::ID IID;
3976 if (VecWidth == 128 && !IsSaturating)
3977 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3978 else if (VecWidth == 256 && !IsSaturating)
3979 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3980 else if (VecWidth == 512 && !IsSaturating)
3981 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3982 else if (VecWidth == 128 && IsSaturating)
3983 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3984 else if (VecWidth == 256 && IsSaturating)
3985 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3986 else if (VecWidth == 512 && IsSaturating)
3987 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3988 else
3989 llvm_unreachable("Unexpected intrinsic");
3990
3991 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3992 CI->getArgOperand(2)};
3993 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3994 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3995 : CI->getArgOperand(0);
3996 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3997 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3998 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3999 Name.starts_with("avx512.mask.vpdpwssds.") ||
4000 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4001 bool ZeroMask = Name[11] == 'z';
4002 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4003 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4004 Intrinsic::ID IID;
4005 if (VecWidth == 128 && !IsSaturating)
4006 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4007 else if (VecWidth == 256 && !IsSaturating)
4008 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4009 else if (VecWidth == 512 && !IsSaturating)
4010 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4011 else if (VecWidth == 128 && IsSaturating)
4012 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4013 else if (VecWidth == 256 && IsSaturating)
4014 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4015 else if (VecWidth == 512 && IsSaturating)
4016 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4017 else
4018 llvm_unreachable("Unexpected intrinsic");
4019
4020 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4021 CI->getArgOperand(2)};
4022 Rep = Builder.CreateIntrinsic(IID, {}, Args);
4023 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4024 : CI->getArgOperand(0);
4025 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4026 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4027 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4028 Name == "subborrow.u32" || Name == "subborrow.u64") {
4029 Intrinsic::ID IID;
4030 if (Name[0] == 'a' && Name.back() == '2')
4031 IID = Intrinsic::x86_addcarry_32;
4032 else if (Name[0] == 'a' && Name.back() == '4')
4033 IID = Intrinsic::x86_addcarry_64;
4034 else if (Name[0] == 's' && Name.back() == '2')
4035 IID = Intrinsic::x86_subborrow_32;
4036 else if (Name[0] == 's' && Name.back() == '4')
4037 IID = Intrinsic::x86_subborrow_64;
4038 else
4039 llvm_unreachable("Unexpected intrinsic");
4040
4041 // Make a call with 3 operands.
4042 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4043 CI->getArgOperand(2)};
4044 Value *NewCall = Builder.CreateIntrinsic(IID, {}, Args);
4045
4046 // Extract the second result and store it.
4047 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4048 // Cast the pointer to the right type.
4049 Value *Ptr = Builder.CreateBitCast(
4050 CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));
4051 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4052 // Replace the original call result with the first result of the new call.
4053 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4054
4055 CI->replaceAllUsesWith(CF);
4056 Rep = nullptr;
4057 } else if (Name.starts_with("avx512.mask.") &&
4058 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4059 // Rep will be updated by the call in the condition.
4060 }
4061
4062 return Rep;
4063}
4064
4066 Function *F, IRBuilder<> &Builder) {
4067 Intrinsic::ID NewID =
4069 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4070 .Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4072 if (NewID == Intrinsic::not_intrinsic)
4073 llvm_unreachable("Unhandled Intrinsic!");
4074
4075 SmallVector<Value *, 3> Args(CI->args());
4076
4077 // The original intrinsics incorrectly used a predicate based on the smallest
4078 // element type rather than the largest.
4079 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4080 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4081
4082 if (Args[1]->getType() != BadPredTy)
4083 llvm_unreachable("Unexpected predicate type!");
4084
4085 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4086 BadPredTy, Args[1]);
4087 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
4088 GoodPredTy, Args[1]);
4089
4090 return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr,
4091 CI->getName());
4092}
4093
4095 IRBuilder<> &Builder) {
4096 if (Name == "mve.vctp64.old") {
4097 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4098 // correct type.
4099 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4100 CI->getArgOperand(0),
4101 /*FMFSource=*/nullptr, CI->getName());
4102 Value *C1 = Builder.CreateIntrinsic(
4103 Intrinsic::arm_mve_pred_v2i,
4104 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4105 return Builder.CreateIntrinsic(
4106 Intrinsic::arm_mve_pred_i2v,
4107 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4108 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4109 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4110 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4111 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4112 Name ==
4113 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4114 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4115 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4116 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4117 Name ==
4118 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4119 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4120 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4121 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4122 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4123 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4124 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4125 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4126 std::vector<Type *> Tys;
4127 unsigned ID = CI->getIntrinsicID();
4128 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4129 switch (ID) {
4130 case Intrinsic::arm_mve_mull_int_predicated:
4131 case Intrinsic::arm_mve_vqdmull_predicated:
4132 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4133 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4134 break;
4135 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4136 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4137 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4138 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4139 V2I1Ty};
4140 break;
4141 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4142 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4143 CI->getOperand(1)->getType(), V2I1Ty};
4144 break;
4145 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4146 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4147 CI->getOperand(2)->getType(), V2I1Ty};
4148 break;
4149 case Intrinsic::arm_cde_vcx1q_predicated:
4150 case Intrinsic::arm_cde_vcx1qa_predicated:
4151 case Intrinsic::arm_cde_vcx2q_predicated:
4152 case Intrinsic::arm_cde_vcx2qa_predicated:
4153 case Intrinsic::arm_cde_vcx3q_predicated:
4154 case Intrinsic::arm_cde_vcx3qa_predicated:
4155 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4156 break;
4157 default:
4158 llvm_unreachable("Unhandled Intrinsic!");
4159 }
4160
4161 std::vector<Value *> Ops;
4162 for (Value *Op : CI->args()) {
4163 Type *Ty = Op->getType();
4164 if (Ty->getScalarSizeInBits() == 1) {
4165 Value *C1 = Builder.CreateIntrinsic(
4166 Intrinsic::arm_mve_pred_v2i,
4167 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4168 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4169 }
4170 Ops.push_back(Op);
4171 }
4172
4173 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4174 CI->getName());
4175 }
4176 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4177}
4178
4179// These are expected to have the arguments:
4180// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4181//
4182// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4183//
4185 Function *F, IRBuilder<> &Builder) {
4186 AtomicRMWInst::BinOp RMWOp =
4188 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4189 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4190 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4191 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4192 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4193 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4194 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4195 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4196 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4197 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4198 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4199
4200 unsigned NumOperands = CI->getNumOperands();
4201 if (NumOperands < 3) // Malformed bitcode.
4202 return nullptr;
4203
4204 Value *Ptr = CI->getArgOperand(0);
4205 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4206 if (!PtrTy) // Malformed.
4207 return nullptr;
4208
4209 Value *Val = CI->getArgOperand(1);
4210 if (Val->getType() != CI->getType()) // Malformed.
4211 return nullptr;
4212
4213 ConstantInt *OrderArg = nullptr;
4214 bool IsVolatile = false;
4215
4216 // These should have 5 arguments (plus the callee). A separate version of the
4217 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4218 if (NumOperands > 3)
4219 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4220
4221 // Ignore scope argument at 3
4222
4223 if (NumOperands > 5) {
4224 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4225 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4226 }
4227
4228 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4229 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4230 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4231 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4232 Order = AtomicOrdering::SequentiallyConsistent;
4233
4234 LLVMContext &Ctx = F->getContext();
4235
4236 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4237 Type *RetTy = CI->getType();
4238 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4239 if (VT->getElementType()->isIntegerTy(16)) {
4240 VectorType *AsBF16 =
4241 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4242 Val = Builder.CreateBitCast(Val, AsBF16);
4243 }
4244 }
4245
4246 // The scope argument never really worked correctly. Use agent as the most
4247 // conservative option which should still always produce the instruction.
4248 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4249 AtomicRMWInst *RMW =
4250 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4251
4252 unsigned AddrSpace = PtrTy->getAddressSpace();
4253 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4254 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4255 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4256 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4257 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4258 }
4259
4260 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4261 MDBuilder MDB(F->getContext());
4262 MDNode *RangeNotPrivate =
4265 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4266 }
4267
4268 if (IsVolatile)
4269 RMW->setVolatile(true);
4270
4271 return Builder.CreateBitCast(RMW, RetTy);
4272}
4273
4274/// Helper to unwrap intrinsic call MetadataAsValue operands.
4275template <typename MDType>
4276static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4277 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4278 return dyn_cast<MDType>(MAV->getMetadata());
4279 return nullptr;
4280}
4281
4282/// Convert debug intrinsic calls to non-instruction debug records.
4283/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4284/// \p CI - The debug intrinsic call.
4286 DbgRecord *DR = nullptr;
4287 if (Name == "label") {
4288 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4289 } else if (Name == "assign") {
4290 DR = new DbgVariableRecord(
4291 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4292 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4293 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4294 CI->getDebugLoc());
4295 } else if (Name == "declare") {
4296 DR = new DbgVariableRecord(
4297 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4298 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4299 DbgVariableRecord::LocationType::Declare);
4300 } else if (Name == "addr") {
4301 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4302 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4303 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4304 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4305 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4306 CI->getDebugLoc());
4307 } else if (Name == "value") {
4308 // An old version of dbg.value had an extra offset argument.
4309 unsigned VarOp = 1;
4310 unsigned ExprOp = 2;
4311 if (CI->arg_size() == 4) {
4312 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4313 // Nonzero offset dbg.values get dropped without a replacement.
4314 if (!Offset || !Offset->isZeroValue())
4315 return;
4316 VarOp = 2;
4317 ExprOp = 3;
4318 }
4319 DR = new DbgVariableRecord(
4320 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4321 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4322 }
4323 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4324 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4325}
4326
4327/// Upgrade a call to an old intrinsic. All argument and return casting must be
4328/// provided to seamlessly integrate with existing context.
4330 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4331 // checks the callee's function type matches. It's likely we need to handle
4332 // type changes here.
4333 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4334 if (!F)
4335 return;
4336
4337 LLVMContext &C = CI->getContext();
4338 IRBuilder<> Builder(C);
4339 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4340
4341 if (!NewFn) {
4342 bool FallthroughToDefaultUpgrade = false;
4343 // Get the Function's name.
4344 StringRef Name = F->getName();
4345
4346 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4347 Name = Name.substr(5);
4348
4349 bool IsX86 = Name.consume_front("x86.");
4350 bool IsNVVM = Name.consume_front("nvvm.");
4351 bool IsAArch64 = Name.consume_front("aarch64.");
4352 bool IsARM = Name.consume_front("arm.");
4353 bool IsAMDGCN = Name.consume_front("amdgcn.");
4354 bool IsDbg = Name.consume_front("dbg.");
4355 Value *Rep = nullptr;
4356
4357 if (!IsX86 && Name == "stackprotectorcheck") {
4358 Rep = nullptr;
4359 } else if (IsNVVM) {
4360 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4361 } else if (IsX86) {
4362 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4363 } else if (IsAArch64) {
4364 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4365 } else if (IsARM) {
4366 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4367 } else if (IsAMDGCN) {
4368 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4369 } else if (IsDbg) {
4370 // We might have decided we don't want the new format after all between
4371 // first requesting the upgrade and now; skip the conversion if that is
4372 // the case, and check here to see if the intrinsic needs to be upgraded
4373 // normally.
4374 if (!CI->getModule()->IsNewDbgInfoFormat) {
4375 bool NeedsUpgrade =
4376 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4377 if (!NeedsUpgrade)
4378 return;
4379 FallthroughToDefaultUpgrade = true;
4380 } else {
4382 }
4383 } else {
4384 llvm_unreachable("Unknown function for CallBase upgrade.");
4385 }
4386
4387 if (!FallthroughToDefaultUpgrade) {
4388 if (Rep)
4389 CI->replaceAllUsesWith(Rep);
4390 CI->eraseFromParent();
4391 return;
4392 }
4393 }
4394
4395 const auto &DefaultCase = [&]() -> void {
4396 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4397 // Handle generic mangling change.
4398 assert(
4399 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4400 "Unknown function for CallBase upgrade and isn't just a name change");
4401 CI->setCalledFunction(NewFn);
4402 return;
4403 }
4404
4405 // This must be an upgrade from a named to a literal struct.
4406 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4407 assert(OldST != NewFn->getReturnType() &&
4408 "Return type must have changed");
4409 assert(OldST->getNumElements() ==
4410 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4411 "Must have same number of elements");
4412
4413 SmallVector<Value *> Args(CI->args());
4414 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4415 NewCI->setAttributes(CI->getAttributes());
4416 Value *Res = PoisonValue::get(OldST);
4417 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4418 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4419 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4420 }
4421 CI->replaceAllUsesWith(Res);
4422 CI->eraseFromParent();
4423 return;
4424 }
4425
4426 // We're probably about to produce something invalid. Let the verifier catch
4427 // it instead of dying here.
4428 CI->setCalledOperand(
4430 return;
4431 };
4432 CallInst *NewCall = nullptr;
4433 switch (NewFn->getIntrinsicID()) {
4434 default: {
4435 DefaultCase();
4436 return;
4437 }
4438 case Intrinsic::arm_neon_vst1:
4439 case Intrinsic::arm_neon_vst2:
4440 case Intrinsic::arm_neon_vst3:
4441 case Intrinsic::arm_neon_vst4:
4442 case Intrinsic::arm_neon_vst2lane:
4443 case Intrinsic::arm_neon_vst3lane:
4444 case Intrinsic::arm_neon_vst4lane: {
4445 SmallVector<Value *, 4> Args(CI->args());
4446 NewCall = Builder.CreateCall(NewFn, Args);
4447 break;
4448 }
4449 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4450 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4451 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4452 LLVMContext &Ctx = F->getParent()->getContext();
4453 SmallVector<Value *, 4> Args(CI->args());
4454 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4455 cast<ConstantInt>(Args[3])->getZExtValue());
4456 NewCall = Builder.CreateCall(NewFn, Args);
4457 break;
4458 }
4459 case Intrinsic::aarch64_sve_ld3_sret:
4460 case Intrinsic::aarch64_sve_ld4_sret:
4461 case Intrinsic::aarch64_sve_ld2_sret: {
4462 StringRef Name = F->getName();
4463 Name = Name.substr(5);
4464 unsigned N = StringSwitch<unsigned>(Name)
4465 .StartsWith("aarch64.sve.ld2", 2)
4466 .StartsWith("aarch64.sve.ld3", 3)
4467 .StartsWith("aarch64.sve.ld4", 4)
4468 .Default(0);
4469 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4470 unsigned MinElts = RetTy->getMinNumElements() / N;
4471 SmallVector<Value *, 2> Args(CI->args());
4472 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4474 for (unsigned I = 0; I < N; I++) {
4475 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4476 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4477 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4478 }
4479 NewCall = dyn_cast<CallInst>(Ret);
4480 break;
4481 }
4482
4483 case Intrinsic::coro_end: {
4484 SmallVector<Value *, 3> Args(CI->args());
4485 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4486 NewCall = Builder.CreateCall(NewFn, Args);
4487 break;
4488 }
4489
4490 case Intrinsic::vector_extract: {
4491 StringRef Name = F->getName();
4492 Name = Name.substr(5); // Strip llvm
4493 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4494 DefaultCase();
4495 return;
4496 }
4497 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4498 unsigned MinElts = RetTy->getMinNumElements();
4499 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4500 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4501 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4502 break;
4503 }
4504
4505 case Intrinsic::vector_insert: {
4506 StringRef Name = F->getName();
4507 Name = Name.substr(5);
4508 if (!Name.starts_with("aarch64.sve.tuple")) {
4509 DefaultCase();
4510 return;
4511 }
4512 if (Name.starts_with("aarch64.sve.tuple.set")) {
4513 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4514 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4515 Value *NewIdx =
4516 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4517 NewCall = Builder.CreateCall(
4518 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4519 break;
4520 }
4521 if (Name.starts_with("aarch64.sve.tuple.create")) {
4522 unsigned N = StringSwitch<unsigned>(Name)
4523 .StartsWith("aarch64.sve.tuple.create2", 2)
4524 .StartsWith("aarch64.sve.tuple.create3", 3)
4525 .StartsWith("aarch64.sve.tuple.create4", 4)
4526 .Default(0);
4527 assert(N > 1 && "Create is expected to be between 2-4");
4528 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4530 unsigned MinElts = RetTy->getMinNumElements() / N;
4531 for (unsigned I = 0; I < N; I++) {
4532 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4533 Value *V = CI->getArgOperand(I);
4534 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4535 }
4536 NewCall = dyn_cast<CallInst>(Ret);
4537 }
4538 break;
4539 }
4540
4541 case Intrinsic::arm_neon_bfdot:
4542 case Intrinsic::arm_neon_bfmmla:
4543 case Intrinsic::arm_neon_bfmlalb:
4544 case Intrinsic::arm_neon_bfmlalt:
4545 case Intrinsic::aarch64_neon_bfdot:
4546 case Intrinsic::aarch64_neon_bfmmla:
4547 case Intrinsic::aarch64_neon_bfmlalb:
4548 case Intrinsic::aarch64_neon_bfmlalt: {
4550 assert(CI->arg_size() == 3 &&
4551 "Mismatch between function args and call args");
4552 size_t OperandWidth =
4554 assert((OperandWidth == 64 || OperandWidth == 128) &&
4555 "Unexpected operand width");
4556 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4557 auto Iter = CI->args().begin();
4558 Args.push_back(*Iter++);
4559 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4560 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4561 NewCall = Builder.CreateCall(NewFn, Args);
4562 break;
4563 }
4564
4565 case Intrinsic::bitreverse:
4566 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4567 break;
4568
4569 case Intrinsic::ctlz:
4570 case Intrinsic::cttz:
4571 assert(CI->arg_size() == 1 &&
4572 "Mismatch between function args and call args");
4573 NewCall =
4574 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4575 break;
4576
4577 case Intrinsic::objectsize: {
4578 Value *NullIsUnknownSize =
4579 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4580 Value *Dynamic =
4581 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4582 NewCall = Builder.CreateCall(
4583 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4584 break;
4585 }
4586
4587 case Intrinsic::ctpop:
4588 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4589 break;
4590
4591 case Intrinsic::convert_from_fp16:
4592 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4593 break;
4594
4595 case Intrinsic::dbg_value: {
4596 StringRef Name = F->getName();
4597 Name = Name.substr(5); // Strip llvm.
4598 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4599 if (Name.starts_with("dbg.addr")) {
4600 DIExpression *Expr = cast<DIExpression>(
4601 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4602 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4603 NewCall =
4604 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4605 MetadataAsValue::get(C, Expr)});
4606 break;
4607 }
4608
4609 // Upgrade from the old version that had an extra offset argument.
4610 assert(CI->arg_size() == 4);
4611 // Drop nonzero offsets instead of attempting to upgrade them.
4612 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4613 if (Offset->isZeroValue()) {
4614 NewCall = Builder.CreateCall(
4615 NewFn,
4616 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4617 break;
4618 }
4619 CI->eraseFromParent();
4620 return;
4621 }
4622
4623 case Intrinsic::ptr_annotation:
4624 // Upgrade from versions that lacked the annotation attribute argument.
4625 if (CI->arg_size() != 4) {
4626 DefaultCase();
4627 return;
4628 }
4629
4630 // Create a new call with an added null annotation attribute argument.
4631 NewCall =
4632 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4633 CI->getArgOperand(2), CI->getArgOperand(3),
4634 Constant::getNullValue(Builder.getPtrTy())});
4635 NewCall->takeName(CI);
4636 CI->replaceAllUsesWith(NewCall);
4637 CI->eraseFromParent();
4638 return;
4639
4640 case Intrinsic::var_annotation:
4641 // Upgrade from versions that lacked the annotation attribute argument.
4642 if (CI->arg_size() != 4) {
4643 DefaultCase();
4644 return;
4645 }
4646 // Create a new call with an added null annotation attribute argument.
4647 NewCall =
4648 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4649 CI->getArgOperand(2), CI->getArgOperand(3),
4650 Constant::getNullValue(Builder.getPtrTy())});
4651 NewCall->takeName(CI);
4652 CI->replaceAllUsesWith(NewCall);
4653 CI->eraseFromParent();
4654 return;
4655
4656 case Intrinsic::riscv_aes32dsi:
4657 case Intrinsic::riscv_aes32dsmi:
4658 case Intrinsic::riscv_aes32esi:
4659 case Intrinsic::riscv_aes32esmi:
4660 case Intrinsic::riscv_sm4ks:
4661 case Intrinsic::riscv_sm4ed: {
4662 // The last argument to these intrinsics used to be i8 and changed to i32.
4663 // The type overload for sm4ks and sm4ed was removed.
4664 Value *Arg2 = CI->getArgOperand(2);
4665 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4666 return;
4667
4668 Value *Arg0 = CI->getArgOperand(0);
4669 Value *Arg1 = CI->getArgOperand(1);
4670 if (CI->getType()->isIntegerTy(64)) {
4671 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4672 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4673 }
4674
4675 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4676 cast<ConstantInt>(Arg2)->getZExtValue());
4677
4678 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4679 Value *Res = NewCall;
4680 if (Res->getType() != CI->getType())
4681 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4682 NewCall->takeName(CI);
4683 CI->replaceAllUsesWith(Res);
4684 CI->eraseFromParent();
4685 return;
4686 }
4687 case Intrinsic::riscv_sha256sig0:
4688 case Intrinsic::riscv_sha256sig1:
4689 case Intrinsic::riscv_sha256sum0:
4690 case Intrinsic::riscv_sha256sum1:
4691 case Intrinsic::riscv_sm3p0:
4692 case Intrinsic::riscv_sm3p1: {
4693 // The last argument to these intrinsics used to be i8 and changed to i32.
4694 // The type overload for sm4ks and sm4ed was removed.
4695 if (!CI->getType()->isIntegerTy(64))
4696 return;
4697
4698 Value *Arg =
4699 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4700
4701 NewCall = Builder.CreateCall(NewFn, Arg);
4702 Value *Res =
4703 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4704 NewCall->takeName(CI);
4705 CI->replaceAllUsesWith(Res);
4706 CI->eraseFromParent();
4707 return;
4708 }
4709
4710 case Intrinsic::x86_xop_vfrcz_ss:
4711 case Intrinsic::x86_xop_vfrcz_sd:
4712 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4713 break;
4714
4715 case Intrinsic::x86_xop_vpermil2pd:
4716 case Intrinsic::x86_xop_vpermil2ps:
4717 case Intrinsic::x86_xop_vpermil2pd_256:
4718 case Intrinsic::x86_xop_vpermil2ps_256: {
4719 SmallVector<Value *, 4> Args(CI->args());
4720 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4721 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4722 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4723 NewCall = Builder.CreateCall(NewFn, Args);
4724 break;
4725 }
4726
4727 case Intrinsic::x86_sse41_ptestc:
4728 case Intrinsic::x86_sse41_ptestz:
4729 case Intrinsic::x86_sse41_ptestnzc: {
4730 // The arguments for these intrinsics used to be v4f32, and changed
4731 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4732 // So, the only thing required is a bitcast for both arguments.
4733 // First, check the arguments have the old type.
4734 Value *Arg0 = CI->getArgOperand(0);
4735 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4736 return;
4737
4738 // Old intrinsic, add bitcasts
4739 Value *Arg1 = CI->getArgOperand(1);
4740
4741 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4742
4743 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4744 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4745
4746 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4747 break;
4748 }
4749
4750 case Intrinsic::x86_rdtscp: {
4751 // This used to take 1 arguments. If we have no arguments, it is already
4752 // upgraded.
4753 if (CI->getNumOperands() == 0)
4754 return;
4755
4756 NewCall = Builder.CreateCall(NewFn);
4757 // Extract the second result and store it.
4758 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4759 // Cast the pointer to the right type.
4760 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4761 llvm::PointerType::getUnqual(Data->getType()));
4762 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4763 // Replace the original call result with the first result of the new call.
4764 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4765
4766 NewCall->takeName(CI);
4767 CI->replaceAllUsesWith(TSC);
4768 CI->eraseFromParent();
4769 return;
4770 }
4771
4772 case Intrinsic::x86_sse41_insertps:
4773 case Intrinsic::x86_sse41_dppd:
4774 case Intrinsic::x86_sse41_dpps:
4775 case Intrinsic::x86_sse41_mpsadbw:
4776 case Intrinsic::x86_avx_dp_ps_256:
4777 case Intrinsic::x86_avx2_mpsadbw: {
4778 // Need to truncate the last argument from i32 to i8 -- this argument models
4779 // an inherently 8-bit immediate operand to these x86 instructions.
4780 SmallVector<Value *, 4> Args(CI->args());
4781
4782 // Replace the last argument with a trunc.
4783 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4784 NewCall = Builder.CreateCall(NewFn, Args);
4785 break;
4786 }
4787
4788 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4789 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4790 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4791 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4792 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4793 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4794 SmallVector<Value *, 4> Args(CI->args());
4795 unsigned NumElts =
4796 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4797 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4798
4799 NewCall = Builder.CreateCall(NewFn, Args);
4800 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4801
4802 NewCall->takeName(CI);
4803 CI->replaceAllUsesWith(Res);
4804 CI->eraseFromParent();
4805 return;
4806 }
4807
4808 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4809 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4810 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4811 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4812 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4813 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4814 SmallVector<Value *, 4> Args(CI->args());
4815 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4816 if (NewFn->getIntrinsicID() ==
4817 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4818 Args[1] = Builder.CreateBitCast(
4819 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4820
4821 NewCall = Builder.CreateCall(NewFn, Args);
4822 Value *Res = Builder.CreateBitCast(
4823 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4824
4825 NewCall->takeName(CI);
4826 CI->replaceAllUsesWith(Res);
4827 CI->eraseFromParent();
4828 return;
4829 }
4830 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4831 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4832 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4833 SmallVector<Value *, 4> Args(CI->args());
4834 unsigned NumElts =
4835 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4836 Args[1] = Builder.CreateBitCast(
4837 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4838 Args[2] = Builder.CreateBitCast(
4839 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4840
4841 NewCall = Builder.CreateCall(NewFn, Args);
4842 break;
4843 }
4844
4845 case Intrinsic::thread_pointer: {
4846 NewCall = Builder.CreateCall(NewFn, {});
4847 break;
4848 }
4849
4850 case Intrinsic::memcpy:
4851 case Intrinsic::memmove:
4852 case Intrinsic::memset: {
4853 // We have to make sure that the call signature is what we're expecting.
4854 // We only want to change the old signatures by removing the alignment arg:
4855 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4856 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4857 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4858 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4859 // Note: i8*'s in the above can be any pointer type
4860 if (CI->arg_size() != 5) {
4861 DefaultCase();
4862 return;
4863 }
4864 // Remove alignment argument (3), and add alignment attributes to the
4865 // dest/src pointers.
4866 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4867 CI->getArgOperand(2), CI->getArgOperand(4)};
4868 NewCall = Builder.CreateCall(NewFn, Args);
4869 AttributeList OldAttrs = CI->getAttributes();
4871 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4872 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4873 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4874 NewCall->setAttributes(NewAttrs);
4875 auto *MemCI = cast<MemIntrinsic>(NewCall);
4876 // All mem intrinsics support dest alignment.
4877 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4878 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4879 // Memcpy/Memmove also support source alignment.
4880 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4881 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4882 break;
4883 }
4884 }
4885 assert(NewCall && "Should have either set this variable or returned through "
4886 "the default case");
4887 NewCall->takeName(CI);
4888 CI->replaceAllUsesWith(NewCall);
4889 CI->eraseFromParent();
4890}
4891
4893 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4894
4895 // Check if this function should be upgraded and get the replacement function
4896 // if there is one.
4897 Function *NewFn;
4898 if (UpgradeIntrinsicFunction(F, NewFn)) {
4899 // Replace all users of the old function with the new function or new
4900 // instructions. This is not a range loop because the call is deleted.
4901 for (User *U : make_early_inc_range(F->users()))
4902 if (CallBase *CB = dyn_cast<CallBase>(U))
4903 UpgradeIntrinsicCall(CB, NewFn);
4904
4905 // Remove old function, no longer used, from the module.
4906 F->eraseFromParent();
4907 }
4908}
4909
4911 const unsigned NumOperands = MD.getNumOperands();
4912 if (NumOperands == 0)
4913 return &MD; // Invalid, punt to a verifier error.
4914
4915 // Check if the tag uses struct-path aware TBAA format.
4916 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4917 return &MD;
4918
4919 auto &Context = MD.getContext();
4920 if (NumOperands == 3) {
4921 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4922 MDNode *ScalarType = MDNode::get(Context, Elts);
4923 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4924 Metadata *Elts2[] = {ScalarType, ScalarType,
4927 MD.getOperand(2)};
4928 return MDNode::get(Context, Elts2);
4929 }
4930 // Create a MDNode <MD, MD, offset 0>
4932 Type::getInt64Ty(Context)))};
4933 return MDNode::get(Context, Elts);
4934}
4935
4937 Instruction *&Temp) {
4938 if (Opc != Instruction::BitCast)
4939 return nullptr;
4940
4941 Temp = nullptr;
4942 Type *SrcTy = V->getType();
4943 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4944 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4945 LLVMContext &Context = V->getContext();
4946
4947 // We have no information about target data layout, so we assume that
4948 // the maximum pointer size is 64bit.
4949 Type *MidTy = Type::getInt64Ty(Context);
4950 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4951
4952 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4953 }
4954
4955 return nullptr;
4956}
4957
4959 if (Opc != Instruction::BitCast)
4960 return nullptr;
4961
4962 Type *SrcTy = C->getType();
4963 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4964 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4965 LLVMContext &Context = C->getContext();
4966
4967 // We have no information about target data layout, so we assume that
4968 // the maximum pointer size is 64bit.
4969 Type *MidTy = Type::getInt64Ty(Context);
4970
4972 DestTy);
4973 }
4974
4975 return nullptr;
4976}
4977
4978/// Check the debug info version number, if it is out-dated, drop the debug
4979/// info. Return true if module is modified.
4982 return false;
4983
4984 // We need to get metadata before the module is verified (i.e., getModuleFlag
4985 // makes assumptions that we haven't verified yet). Carefully extract the flag
4986 // from the metadata.
4987 unsigned Version = 0;
4988 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
4989 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
4990 if (Flag->getNumOperands() < 3)
4991 return false;
4992 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
4993 return K->getString() == "Debug Info Version";
4994 return false;
4995 });
4996 if (OpIt != ModFlags->op_end()) {
4997 const MDOperand &ValOp = (*OpIt)->getOperand(2);
4998 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
4999 Version = CI->getZExtValue();
5000 }
5001 }
5002
5003 if (Version == DEBUG_METADATA_VERSION) {
5004 bool BrokenDebugInfo = false;
5005 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5006 report_fatal_error("Broken module found, compilation aborted!");
5007 if (!BrokenDebugInfo)
5008 // Everything is ok.
5009 return false;
5010 else {
5011 // Diagnose malformed debug info.
5013 M.getContext().diagnose(Diag);
5014 }
5015 }
5016 bool Modified = StripDebugInfo(M);
5017 if (Modified && Version != DEBUG_METADATA_VERSION) {
5018 // Diagnose a version mismatch.
5019 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5020 M.getContext().diagnose(DiagVersion);
5021 }
5022 return Modified;
5023}
5024
5025/// This checks for objc retain release marker which should be upgraded. It
5026/// returns true if module is modified.
5028 bool Changed = false;
5029 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5030 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5031 if (ModRetainReleaseMarker) {
5032 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5033 if (Op) {
5034 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5035 if (ID) {
5036 SmallVector<StringRef, 4> ValueComp;
5037 ID->getString().split(ValueComp, "#");
5038 if (ValueComp.size() == 2) {
5039 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5040 ID = MDString::get(M.getContext(), NewValue);
5041 }
5042 M.addModuleFlag(Module::Error, MarkerKey, ID);
5043 M.eraseNamedMetadata(ModRetainReleaseMarker);
5044 Changed = true;
5045 }
5046 }
5047 }
5048 return Changed;
5049}
5050
5052 // This lambda converts normal function calls to ARC runtime functions to
5053 // intrinsic calls.
5054 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5055 llvm::Intrinsic::ID IntrinsicFunc) {
5056 Function *Fn = M.getFunction(OldFunc);
5057
5058 if (!Fn)
5059 return;
5060
5061 Function *NewFn =
5062 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5063
5064 for (User *U : make_early_inc_range(Fn->users())) {
5065 CallInst *CI = dyn_cast<CallInst>(U);
5066 if (!CI || CI->getCalledFunction() != Fn)
5067 continue;
5068
5069 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5070 FunctionType *NewFuncTy = NewFn->getFunctionType();
5072
5073 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5074 // value to the return type of the old function.
5075 if (NewFuncTy->getReturnType() != CI->getType() &&
5076 !CastInst::castIsValid(Instruction::BitCast, CI,
5077 NewFuncTy->getReturnType()))
5078 continue;
5079
5080 bool InvalidCast = false;
5081
5082 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5083 Value *Arg = CI->getArgOperand(I);
5084
5085 // Bitcast argument to the parameter type of the new function if it's
5086 // not a variadic argument.
5087 if (I < NewFuncTy->getNumParams()) {
5088 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5089 // to the parameter type of the new function.
5090 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5091 NewFuncTy->getParamType(I))) {
5092 InvalidCast = true;
5093 break;
5094 }
5095 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5096 }
5097 Args.push_back(Arg);
5098 }
5099
5100 if (InvalidCast)
5101 continue;
5102
5103 // Create a call instruction that calls the new function.
5104 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5105 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5106 NewCall->takeName(CI);
5107
5108 // Bitcast the return value back to the type of the old call.
5109 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5110
5111 if (!CI->use_empty())
5112 CI->replaceAllUsesWith(NewRetVal);
5113 CI->eraseFromParent();
5114 }
5115
5116 if (Fn->use_empty())
5117 Fn->eraseFromParent();
5118 };
5119
5120 // Unconditionally convert a call to "clang.arc.use" to a call to
5121 // "llvm.objc.clang.arc.use".
5122 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5123
5124 // Upgrade the retain release marker. If there is no need to upgrade
5125 // the marker, that means either the module is already new enough to contain
5126 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5128 return;
5129
5130 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5131 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5132 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5133 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5134 {"objc_autoreleaseReturnValue",
5135 llvm::Intrinsic::objc_autoreleaseReturnValue},
5136 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5137 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5138 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5139 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5140 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5141 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5142 {"objc_release", llvm::Intrinsic::objc_release},
5143 {"objc_retain", llvm::Intrinsic::objc_retain},
5144 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5145 {"objc_retainAutoreleaseReturnValue",
5146 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5147 {"objc_retainAutoreleasedReturnValue",
5148 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5149 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5150 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5151 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5152 {"objc_unsafeClaimAutoreleasedReturnValue",
5153 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5154 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5155 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5156 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5157 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5158 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5159 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5160 {"objc_arc_annotation_topdown_bbstart",
5161 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5162 {"objc_arc_annotation_topdown_bbend",
5163 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5164 {"objc_arc_annotation_bottomup_bbstart",
5165 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5166 {"objc_arc_annotation_bottomup_bbend",
5167 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5168
5169 for (auto &I : RuntimeFuncs)
5170 UpgradeToIntrinsic(I.first, I.second);
5171}
5172
5174 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5175 if (!ModFlags)
5176 return false;
5177
5178 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5179 bool HasSwiftVersionFlag = false;
5180 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5181 uint32_t SwiftABIVersion;
5182 auto Int8Ty = Type::getInt8Ty(M.getContext());
5183 auto Int32Ty = Type::getInt32Ty(M.getContext());
5184
5185 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5186 MDNode *Op = ModFlags->getOperand(I);
5187 if (Op->getNumOperands() != 3)
5188 continue;
5189 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5190 if (!ID)
5191 continue;
5192 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5193 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5194 Type::getInt32Ty(M.getContext()), B)),
5195 MDString::get(M.getContext(), ID->getString()),
5196 Op->getOperand(2)};
5197 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5198 Changed = true;
5199 };
5200
5201 if (ID->getString() == "Objective-C Image Info Version")
5202 HasObjCFlag = true;
5203 if (ID->getString() == "Objective-C Class Properties")
5204 HasClassProperties = true;
5205 // Upgrade PIC from Error/Max to Min.
5206 if (ID->getString() == "PIC Level") {
5207 if (auto *Behavior =
5208 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5209 uint64_t V = Behavior->getLimitedValue();
5210 if (V == Module::Error || V == Module::Max)
5211 SetBehavior(Module::Min);
5212 }
5213 }
5214 // Upgrade "PIE Level" from Error to Max.
5215 if (ID->getString() == "PIE Level")
5216 if (auto *Behavior =
5217 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5218 if (Behavior->getLimitedValue() == Module::Error)
5219 SetBehavior(Module::Max);
5220
5221 // Upgrade branch protection and return address signing module flags. The
5222 // module flag behavior for these fields were Error and now they are Min.
5223 if (ID->getString() == "branch-target-enforcement" ||
5224 ID->getString().starts_with("sign-return-address")) {
5225 if (auto *Behavior =
5226 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5227 if (Behavior->getLimitedValue() == Module::Error) {
5228 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5229 Metadata *Ops[3] = {
5230 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5231 Op->getOperand(1), Op->getOperand(2)};
5232 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5233 Changed = true;
5234 }
5235 }
5236 }
5237
5238 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5239 // section name so that llvm-lto will not complain about mismatching
5240 // module flags that is functionally the same.
5241 if (ID->getString() == "Objective-C Image Info Section") {
5242 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5243 SmallVector<StringRef, 4> ValueComp;
5244 Value->getString().split(ValueComp, " ");
5245 if (ValueComp.size() != 1) {
5246 std::string NewValue;
5247 for (auto &S : ValueComp)
5248 NewValue += S.str();
5249 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5250 MDString::get(M.getContext(), NewValue)};
5251 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5252 Changed = true;
5253 }
5254 }
5255 }
5256
5257 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5258 // If the higher bits are set, it adds new module flag for swift info.
5259 if (ID->getString() == "Objective-C Garbage Collection") {
5260 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5261 if (Md) {
5262 assert(Md->getValue() && "Expected non-empty metadata");
5263 auto Type = Md->getValue()->getType();
5264 if (Type == Int8Ty)
5265 continue;
5266 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5267 if ((Val & 0xff) != Val) {
5268 HasSwiftVersionFlag = true;
5269 SwiftABIVersion = (Val & 0xff00) >> 8;
5270 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5271 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5272 }
5273 Metadata *Ops[3] = {
5274 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5275 Op->getOperand(1),
5276 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5277 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5278 Changed = true;
5279 }
5280 }
5281
5282 if (ID->getString() == "amdgpu_code_object_version") {
5283 Metadata *Ops[3] = {
5284 Op->getOperand(0),
5285 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5286 Op->getOperand(2)};
5287 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5288 Changed = true;
5289 }
5290 }
5291
5292 // "Objective-C Class Properties" is recently added for Objective-C. We
5293 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5294 // flag of value 0, so we can correclty downgrade this flag when trying to
5295 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5296 // this module flag.
5297 if (HasObjCFlag && !HasClassProperties) {
5298 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5299 (uint32_t)0);
5300 Changed = true;
5301 }
5302
5303 if (HasSwiftVersionFlag) {
5304 M.addModuleFlag(Module::Error, "Swift ABI Version",
5305 SwiftABIVersion);
5306 M.addModuleFlag(Module::Error, "Swift Major Version",
5307 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5308 M.addModuleFlag(Module::Error, "Swift Minor Version",
5309 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5310 Changed = true;
5311 }
5312
5313 return Changed;
5314}
5315
5317 auto TrimSpaces = [](StringRef Section) -> std::string {
5318 SmallVector<StringRef, 5> Components;
5319 Section.split(Components, ',');
5320
5321 SmallString<32> Buffer;
5322 raw_svector_ostream OS(Buffer);
5323
5324 for (auto Component : Components)
5325 OS << ',' << Component.trim();
5326
5327 return std::string(OS.str().substr(1));
5328 };
5329
5330 for (auto &GV : M.globals()) {
5331 if (!GV.hasSection())
5332 continue;
5333
5334 StringRef Section = GV.getSection();
5335
5336 if (!Section.starts_with("__DATA, __objc_catlist"))
5337 continue;
5338
5339 // __DATA, __objc_catlist, regular, no_dead_strip
5340 // __DATA,__objc_catlist,regular,no_dead_strip
5341 GV.setSection(TrimSpaces(Section));
5342 }
5343}
5344
5345namespace {
5346// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5347// callsites within a function that did not also have the strictfp attribute.
5348// Since 10.0, if strict FP semantics are needed within a function, the
5349// function must have the strictfp attribute and all calls within the function
5350// must also have the strictfp attribute. This latter restriction is
5351// necessary to prevent unwanted libcall simplification when a function is
5352// being cloned (such as for inlining).
5353//
5354// The "dangling" strictfp attribute usage was only used to prevent constant
5355// folding and other libcall simplification. The nobuiltin attribute on the
5356// callsite has the same effect.
5357struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5358 StrictFPUpgradeVisitor() = default;
5359
5360 void visitCallBase(CallBase &Call) {
5361 if (!Call.isStrictFP())
5362 return;
5363 if (isa<ConstrainedFPIntrinsic>(&Call))
5364 return;
5365 // If we get here, the caller doesn't have the strictfp attribute
5366 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5367 Call.removeFnAttr(Attribute::StrictFP);
5368 Call.addFnAttr(Attribute::NoBuiltin);
5369 }
5370};
5371
5372/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5373struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5374 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5375 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5376
5378 if (!RMW.isFloatingPointOperation())
5379 return;
5380
5381 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5382 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5383 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5384 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5385 }
5386};
5387} // namespace
5388
5390 // If a function definition doesn't have the strictfp attribute,
5391 // convert any callsite strictfp attributes to nobuiltin.
5392 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5393 StrictFPUpgradeVisitor SFPV;
5394 SFPV.visit(F);
5395 }
5396
5397 // Remove all incompatibile attributes from function.
5398 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5399 F.getReturnType(), F.getAttributes().getRetAttrs()));
5400 for (auto &Arg : F.args())
5401 Arg.removeAttrs(
5402 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5403
5404 // Older versions of LLVM treated an "implicit-section-name" attribute
5405 // similarly to directly setting the section on a Function.
5406 if (Attribute A = F.getFnAttribute("implicit-section-name");
5407 A.isValid() && A.isStringAttribute()) {
5408 F.setSection(A.getValueAsString());
5409 F.removeFnAttr("implicit-section-name");
5410 }
5411
5412 if (!F.empty()) {
5413 // For some reason this is called twice, and the first time is before any
5414 // instructions are loaded into the body.
5415
5416 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5417 A.isValid()) {
5418
5419 if (A.getValueAsBool()) {
5420 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5421 Visitor.visit(F);
5422 }
5423
5424 // We will leave behind dead attribute uses on external declarations, but
5425 // clang never added these to declarations anyway.
5426 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5427 }
5428 }
5429}
5430
5431static bool isOldLoopArgument(Metadata *MD) {
5432 auto *T = dyn_cast_or_null<MDTuple>(MD);
5433 if (!T)
5434 return false;
5435 if (T->getNumOperands() < 1)
5436 return false;
5437 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5438 if (!S)
5439 return false;
5440 return S->getString().starts_with("llvm.vectorizer.");
5441}
5442
5444 StringRef OldPrefix = "llvm.vectorizer.";
5445 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5446
5447 if (OldTag == "llvm.vectorizer.unroll")
5448 return MDString::get(C, "llvm.loop.interleave.count");
5449
5450 return MDString::get(
5451 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5452 .str());
5453}
5454
5456 auto *T = dyn_cast_or_null<MDTuple>(MD);
5457 if (!T)
5458 return MD;
5459 if (T->getNumOperands() < 1)
5460 return MD;
5461 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5462 if (!OldTag)
5463 return MD;
5464 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5465 return MD;
5466
5467 // This has an old tag. Upgrade it.
5469 Ops.reserve(T->getNumOperands());
5470 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5471 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5472 Ops.push_back(T->getOperand(I));
5473
5474 return MDTuple::get(T->getContext(), Ops);
5475}
5476
5478 auto *T = dyn_cast<MDTuple>(&N);
5479 if (!T)
5480 return &N;
5481
5482 if (none_of(T->operands(), isOldLoopArgument))
5483 return &N;
5484
5486 Ops.reserve(T->getNumOperands());
5487 for (Metadata *MD : T->operands())
5489
5490 return MDTuple::get(T->getContext(), Ops);
5491}
5492
5494 Triple T(TT);
5495 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5496 // the address space of globals to 1. This does not apply to SPIRV Logical.
5497 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5498 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5499 !DL.contains("-G") && !DL.starts_with("G")) {
5500 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5501 }
5502
5503 if (T.isLoongArch64() || T.isRISCV64()) {
5504 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5505 auto I = DL.find("-n64-");
5506 if (I != StringRef::npos)
5507 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5508 return DL.str();
5509 }
5510
5511 std::string Res = DL.str();
5512 // AMDGCN data layout upgrades.
5513 if (T.isAMDGCN()) {
5514 // Define address spaces for constants.
5515 if (!DL.contains("-G") && !DL.starts_with("G"))
5516 Res.append(Res.empty() ? "G1" : "-G1");
5517
5518 // Add missing non-integral declarations.
5519 // This goes before adding new address spaces to prevent incoherent string
5520 // values.
5521 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5522 Res.append("-ni:7:8:9");
5523 // Update ni:7 to ni:7:8:9.
5524 if (DL.ends_with("ni:7"))
5525 Res.append(":8:9");
5526 if (DL.ends_with("ni:7:8"))
5527 Res.append(":9");
5528
5529 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5530 // resources) An empty data layout has already been upgraded to G1 by now.
5531 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5532 Res.append("-p7:160:256:256:32");
5533 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5534 Res.append("-p8:128:128");
5535 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5536 Res.append("-p9:192:256:256:32");
5537
5538 return Res;
5539 }
5540
5541 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5542 // If the datalayout matches the expected format, add pointer size address
5543 // spaces to the datalayout.
5544 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5545 if (!DL.contains(AddrSpaces)) {
5547 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5548 if (R.match(Res, &Groups))
5549 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5550 }
5551 };
5552
5553 // AArch64 data layout upgrades.
5554 if (T.isAArch64()) {
5555 // Add "-Fn32"
5556 if (!DL.empty() && !DL.contains("-Fn32"))
5557 Res.append("-Fn32");
5558 AddPtr32Ptr64AddrSpaces();
5559 return Res;
5560 }
5561
5562 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5563 T.isWasm()) {
5564 // Mips64 with o32 ABI did not add "-i128:128".
5565 // Add "-i128:128"
5566 std::string I64 = "-i64:64";
5567 std::string I128 = "-i128:128";
5568 if (!StringRef(Res).contains(I128)) {
5569 size_t Pos = Res.find(I64);
5570 if (Pos != size_t(-1))
5571 Res.insert(Pos + I64.size(), I128);
5572 }
5573 return Res;
5574 }
5575
5576 if (!T.isX86())
5577 return Res;
5578
5579 AddPtr32Ptr64AddrSpaces();
5580
5581 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5582 // for i128 operations prior to this being reflected in the data layout, and
5583 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5584 // boundaries, so although this is a breaking change, the upgrade is expected
5585 // to fix more IR than it breaks.
5586 // Intel MCU is an exception and uses 4-byte-alignment.
5587 if (!T.isOSIAMCU()) {
5588 std::string I128 = "-i128:128";
5589 if (StringRef Ref = Res; !Ref.contains(I128)) {
5591 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5592 if (R.match(Res, &Groups))
5593 Res = (Groups[1] + I128 + Groups[3]).str();
5594 }
5595 }
5596
5597 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5598 // Raising the alignment is safe because Clang did not produce f80 values in
5599 // the MSVC environment before this upgrade was added.
5600 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5601 StringRef Ref = Res;
5602 auto I = Ref.find("-f80:32-");
5603 if (I != StringRef::npos)
5604 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5605 }
5606
5607 return Res;
5608}
5609
5611 StringRef FramePointer;
5612 Attribute A = B.getAttribute("no-frame-pointer-elim");
5613 if (A.isValid()) {
5614 // The value can be "true" or "false".
5615 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5616 B.removeAttribute("no-frame-pointer-elim");
5617 }
5618 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5619 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5620 if (FramePointer != "all")
5621 FramePointer = "non-leaf";
5622 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5623 }
5624 if (!FramePointer.empty())
5625 B.addAttribute("frame-pointer", FramePointer);
5626
5627 A = B.getAttribute("null-pointer-is-valid");
5628 if (A.isValid()) {
5629 // The value can be "true" or "false".
5630 bool NullPointerIsValid = A.getValueAsString() == "true";
5631 B.removeAttribute("null-pointer-is-valid");
5632 if (NullPointerIsValid)
5633 B.addAttribute(Attribute::NullPointerIsValid);
5634 }
5635}
5636
5637void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5638 // clang.arc.attachedcall bundles are now required to have an operand.
5639 // If they don't, it's okay to drop them entirely: when there is an operand,
5640 // the "attachedcall" is meaningful and required, but without an operand,
5641 // it's just a marker NOP. Dropping it merely prevents an optimization.
5642 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5643 return OBD.getTag() == "clang.arc.attachedcall" &&
5644 OBD.inputs().empty();
5645 });
5646}
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:91
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:75
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDType * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:55
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:59
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
std::string Name
uint64_t Size
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
uint64_t High
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:395
Type * getElementType() const
Definition: DerivedTypes.h:408
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:841
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ FAdd
*p = old + v
Definition: Instructions.h:741
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:752
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
bool isFloatingPointOperation() const
Definition: Instructions.h:882
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
Value * getCalledOperand() const
Definition: InstrTypes.h:1342
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1428
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1207
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1285
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1385
unsigned arg_size() const
Definition: InstrTypes.h:1292
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1388
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2307
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2253
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2293
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1378
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1522
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
DWARF expression.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
Records a position in IR for a source label (DILabel).
Base class for non-instruction debug metadata records that have positions within IR.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:251
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:458
size_t arg_size() const
Definition: Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
Argument * getArg(unsigned i) const
Definition: Function.h:886
LinkageTypes getLinkage() const
Definition: GlobalValue.h:546
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:458
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1583
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:508
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1637
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1060
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2121
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1830
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1556
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2201
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1152
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2301
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:473
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2108
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:518
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1744
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2309
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1813
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:468
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition: IRBuilder.h:551
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2580
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1877
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2285
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2227
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1849
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2137
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1479
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1542
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2293
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1610
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1753
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2160
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:546
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1384
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:270
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
Definition: Instructions.h:176
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:95
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
LLVMContext & getContext() const
Definition: Metadata.h:1233
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
A single uniqued string.
Definition: Metadata.h:720
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1500
Metadata wrapper in the Value hierarchy.
Definition: Metadata.h:176
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:115
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:118
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:150
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:147
bool IsNewDbgInfoFormat
Is this Module using intrinsics to record the position of debugging information, or non-intrinsic rec...
Definition: Module.h:217
A tuple of MDNodes.
Definition: Metadata.h:1731
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1433
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1073
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1088
StringRef getTag() const
Definition: InstrTypes.h:1096
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
ArrayRef< int > getShuffleMask() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:78
void reserve(size_type N)
Definition: SmallVector.h:663
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:609
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
static constexpr size_t npos
Definition: StringRef.h:53
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:83
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:365
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:366
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getBFloatTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Intrinsics.cpp:446
std::optional< Function * > remangleIntrinsicFunction(Function *F)
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Intrinsics.cpp:46
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:608
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:52
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7308
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117