LLVM 20.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Verifier.h"
42#include "llvm/Support/Regex.h"
44#include <cstring>
45
46using namespace llvm;
47
48static cl::opt<bool>
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
51
52static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55// changed their type from v4f32 to v2i64.
57 Function *&NewFn) {
58 // Check whether this is an old version of the function, which received
59 // v4f32 arguments.
60 Type *Arg0Type = F->getFunctionType()->getParamType(0);
61 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62 return false;
63
64 // Yes, it's old, replace it with new version.
65 rename(F);
66 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 return true;
68}
69
70// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71// arguments have changed their type from i32 to i8.
73 Function *&NewFn) {
74 // Check that the last argument is an i32.
75 Type *LastArgType = F->getFunctionType()->getParamType(
76 F->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType->isIntegerTy(32))
78 return false;
79
80 // Move this function aside and map down.
81 rename(F);
82 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83 return true;
84}
85
86// Upgrade the declaration of fp compare intrinsics that change return type
87// from scalar to vXi1 mask.
89 Function *&NewFn) {
90 // Check if the return type is a vector.
91 if (F->getReturnType()->isVectorTy())
92 return false;
93
94 rename(F);
95 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96 return true;
97}
98
100 Function *&NewFn) {
101 if (F->getReturnType()->getScalarType()->isBFloatTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106 return true;
107}
108
110 Function *&NewFn) {
111 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116 return true;
117}
118
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
124 // point.
125 if (Name.consume_front("avx."))
126 return (Name.starts_with("blend.p") || // Added in 3.7
127 Name == "cvt.ps2.pd.256" || // Added in 3.9
128 Name == "cvtdq2.pd.256" || // Added in 3.9
129 Name == "cvtdq2.ps.256" || // Added in 7.0
130 Name.starts_with("movnt.") || // Added in 3.2
131 Name.starts_with("sqrt.p") || // Added in 7.0
132 Name.starts_with("storeu.") || // Added in 3.9
133 Name.starts_with("vbroadcast.s") || // Added in 3.5
134 Name.starts_with("vbroadcastf128") || // Added in 4.0
135 Name.starts_with("vextractf128.") || // Added in 3.7
136 Name.starts_with("vinsertf128.") || // Added in 3.7
137 Name.starts_with("vperm2f128.") || // Added in 6.0
138 Name.starts_with("vpermil.")); // Added in 3.1
139
140 if (Name.consume_front("avx2."))
141 return (Name == "movntdqa" || // Added in 5.0
142 Name.starts_with("pabs.") || // Added in 6.0
143 Name.starts_with("padds.") || // Added in 8.0
144 Name.starts_with("paddus.") || // Added in 8.0
145 Name.starts_with("pblendd.") || // Added in 3.7
146 Name == "pblendw" || // Added in 3.7
147 Name.starts_with("pbroadcast") || // Added in 3.8
148 Name.starts_with("pcmpeq.") || // Added in 3.1
149 Name.starts_with("pcmpgt.") || // Added in 3.1
150 Name.starts_with("pmax") || // Added in 3.9
151 Name.starts_with("pmin") || // Added in 3.9
152 Name.starts_with("pmovsx") || // Added in 3.9
153 Name.starts_with("pmovzx") || // Added in 3.9
154 Name == "pmul.dq" || // Added in 7.0
155 Name == "pmulu.dq" || // Added in 7.0
156 Name.starts_with("psll.dq") || // Added in 3.7
157 Name.starts_with("psrl.dq") || // Added in 3.7
158 Name.starts_with("psubs.") || // Added in 8.0
159 Name.starts_with("psubus.") || // Added in 8.0
160 Name.starts_with("vbroadcast") || // Added in 3.8
161 Name == "vbroadcasti128" || // Added in 3.7
162 Name == "vextracti128" || // Added in 3.7
163 Name == "vinserti128" || // Added in 3.7
164 Name == "vperm2i128"); // Added in 6.0
165
166 if (Name.consume_front("avx512.")) {
167 if (Name.consume_front("mask."))
168 // 'avx512.mask.*'
169 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
170 Name.starts_with("and.") || // Added in 3.9
171 Name.starts_with("andn.") || // Added in 3.9
172 Name.starts_with("broadcast.s") || // Added in 3.9
173 Name.starts_with("broadcastf32x4.") || // Added in 6.0
174 Name.starts_with("broadcastf32x8.") || // Added in 6.0
175 Name.starts_with("broadcastf64x2.") || // Added in 6.0
176 Name.starts_with("broadcastf64x4.") || // Added in 6.0
177 Name.starts_with("broadcasti32x4.") || // Added in 6.0
178 Name.starts_with("broadcasti32x8.") || // Added in 6.0
179 Name.starts_with("broadcasti64x2.") || // Added in 6.0
180 Name.starts_with("broadcasti64x4.") || // Added in 6.0
181 Name.starts_with("cmp.b") || // Added in 5.0
182 Name.starts_with("cmp.d") || // Added in 5.0
183 Name.starts_with("cmp.q") || // Added in 5.0
184 Name.starts_with("cmp.w") || // Added in 5.0
185 Name.starts_with("compress.b") || // Added in 9.0
186 Name.starts_with("compress.d") || // Added in 9.0
187 Name.starts_with("compress.p") || // Added in 9.0
188 Name.starts_with("compress.q") || // Added in 9.0
189 Name.starts_with("compress.store.") || // Added in 7.0
190 Name.starts_with("compress.w") || // Added in 9.0
191 Name.starts_with("conflict.") || // Added in 9.0
192 Name.starts_with("cvtdq2pd.") || // Added in 4.0
193 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
194 Name == "cvtpd2dq.256" || // Added in 7.0
195 Name == "cvtpd2ps.256" || // Added in 7.0
196 Name == "cvtps2pd.128" || // Added in 7.0
197 Name == "cvtps2pd.256" || // Added in 7.0
198 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
199 Name == "cvtqq2ps.256" || // Added in 9.0
200 Name == "cvtqq2ps.512" || // Added in 9.0
201 Name == "cvttpd2dq.256" || // Added in 7.0
202 Name == "cvttps2dq.128" || // Added in 7.0
203 Name == "cvttps2dq.256" || // Added in 7.0
204 Name.starts_with("cvtudq2pd.") || // Added in 4.0
205 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
206 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name == "cvtuqq2ps.256" || // Added in 9.0
208 Name == "cvtuqq2ps.512" || // Added in 9.0
209 Name.starts_with("dbpsadbw.") || // Added in 7.0
210 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
211 Name.starts_with("expand.b") || // Added in 9.0
212 Name.starts_with("expand.d") || // Added in 9.0
213 Name.starts_with("expand.load.") || // Added in 7.0
214 Name.starts_with("expand.p") || // Added in 9.0
215 Name.starts_with("expand.q") || // Added in 9.0
216 Name.starts_with("expand.w") || // Added in 9.0
217 Name.starts_with("fpclass.p") || // Added in 7.0
218 Name.starts_with("insert") || // Added in 4.0
219 Name.starts_with("load.") || // Added in 3.9
220 Name.starts_with("loadu.") || // Added in 3.9
221 Name.starts_with("lzcnt.") || // Added in 5.0
222 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
223 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
224 Name.starts_with("movddup") || // Added in 3.9
225 Name.starts_with("move.s") || // Added in 4.0
226 Name.starts_with("movshdup") || // Added in 3.9
227 Name.starts_with("movsldup") || // Added in 3.9
228 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("or.") || // Added in 3.9
230 Name.starts_with("pabs.") || // Added in 6.0
231 Name.starts_with("packssdw.") || // Added in 5.0
232 Name.starts_with("packsswb.") || // Added in 5.0
233 Name.starts_with("packusdw.") || // Added in 5.0
234 Name.starts_with("packuswb.") || // Added in 5.0
235 Name.starts_with("padd.") || // Added in 4.0
236 Name.starts_with("padds.") || // Added in 8.0
237 Name.starts_with("paddus.") || // Added in 8.0
238 Name.starts_with("palignr.") || // Added in 3.9
239 Name.starts_with("pand.") || // Added in 3.9
240 Name.starts_with("pandn.") || // Added in 3.9
241 Name.starts_with("pavg") || // Added in 6.0
242 Name.starts_with("pbroadcast") || // Added in 6.0
243 Name.starts_with("pcmpeq.") || // Added in 3.9
244 Name.starts_with("pcmpgt.") || // Added in 3.9
245 Name.starts_with("perm.df.") || // Added in 3.9
246 Name.starts_with("perm.di.") || // Added in 3.9
247 Name.starts_with("permvar.") || // Added in 7.0
248 Name.starts_with("pmaddubs.w.") || // Added in 7.0
249 Name.starts_with("pmaddw.d.") || // Added in 7.0
250 Name.starts_with("pmax") || // Added in 4.0
251 Name.starts_with("pmin") || // Added in 4.0
252 Name == "pmov.qd.256" || // Added in 9.0
253 Name == "pmov.qd.512" || // Added in 9.0
254 Name == "pmov.wb.256" || // Added in 9.0
255 Name == "pmov.wb.512" || // Added in 9.0
256 Name.starts_with("pmovsx") || // Added in 4.0
257 Name.starts_with("pmovzx") || // Added in 4.0
258 Name.starts_with("pmul.dq.") || // Added in 4.0
259 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260 Name.starts_with("pmulh.w.") || // Added in 7.0
261 Name.starts_with("pmulhu.w.") || // Added in 7.0
262 Name.starts_with("pmull.") || // Added in 4.0
263 Name.starts_with("pmultishift.qb.") || // Added in 8.0
264 Name.starts_with("pmulu.dq.") || // Added in 4.0
265 Name.starts_with("por.") || // Added in 3.9
266 Name.starts_with("prol.") || // Added in 8.0
267 Name.starts_with("prolv.") || // Added in 8.0
268 Name.starts_with("pror.") || // Added in 8.0
269 Name.starts_with("prorv.") || // Added in 8.0
270 Name.starts_with("pshuf.b.") || // Added in 4.0
271 Name.starts_with("pshuf.d.") || // Added in 3.9
272 Name.starts_with("pshufh.w.") || // Added in 3.9
273 Name.starts_with("pshufl.w.") || // Added in 3.9
274 Name.starts_with("psll.d") || // Added in 4.0
275 Name.starts_with("psll.q") || // Added in 4.0
276 Name.starts_with("psll.w") || // Added in 4.0
277 Name.starts_with("pslli") || // Added in 4.0
278 Name.starts_with("psllv") || // Added in 4.0
279 Name.starts_with("psra.d") || // Added in 4.0
280 Name.starts_with("psra.q") || // Added in 4.0
281 Name.starts_with("psra.w") || // Added in 4.0
282 Name.starts_with("psrai") || // Added in 4.0
283 Name.starts_with("psrav") || // Added in 4.0
284 Name.starts_with("psrl.d") || // Added in 4.0
285 Name.starts_with("psrl.q") || // Added in 4.0
286 Name.starts_with("psrl.w") || // Added in 4.0
287 Name.starts_with("psrli") || // Added in 4.0
288 Name.starts_with("psrlv") || // Added in 4.0
289 Name.starts_with("psub.") || // Added in 4.0
290 Name.starts_with("psubs.") || // Added in 8.0
291 Name.starts_with("psubus.") || // Added in 8.0
292 Name.starts_with("pternlog.") || // Added in 7.0
293 Name.starts_with("punpckh") || // Added in 3.9
294 Name.starts_with("punpckl") || // Added in 3.9
295 Name.starts_with("pxor.") || // Added in 3.9
296 Name.starts_with("shuf.f") || // Added in 6.0
297 Name.starts_with("shuf.i") || // Added in 6.0
298 Name.starts_with("shuf.p") || // Added in 4.0
299 Name.starts_with("sqrt.p") || // Added in 7.0
300 Name.starts_with("store.b.") || // Added in 3.9
301 Name.starts_with("store.d.") || // Added in 3.9
302 Name.starts_with("store.p") || // Added in 3.9
303 Name.starts_with("store.q.") || // Added in 3.9
304 Name.starts_with("store.w.") || // Added in 3.9
305 Name == "store.ss" || // Added in 7.0
306 Name.starts_with("storeu.") || // Added in 3.9
307 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
308 Name.starts_with("ucmp.") || // Added in 5.0
309 Name.starts_with("unpckh.") || // Added in 3.9
310 Name.starts_with("unpckl.") || // Added in 3.9
311 Name.starts_with("valign.") || // Added in 4.0
312 Name == "vcvtph2ps.128" || // Added in 11.0
313 Name == "vcvtph2ps.256" || // Added in 11.0
314 Name.starts_with("vextract") || // Added in 4.0
315 Name.starts_with("vfmadd.") || // Added in 7.0
316 Name.starts_with("vfmaddsub.") || // Added in 7.0
317 Name.starts_with("vfnmadd.") || // Added in 7.0
318 Name.starts_with("vfnmsub.") || // Added in 7.0
319 Name.starts_with("vpdpbusd.") || // Added in 7.0
320 Name.starts_with("vpdpbusds.") || // Added in 7.0
321 Name.starts_with("vpdpwssd.") || // Added in 7.0
322 Name.starts_with("vpdpwssds.") || // Added in 7.0
323 Name.starts_with("vpermi2var.") || // Added in 7.0
324 Name.starts_with("vpermil.p") || // Added in 3.9
325 Name.starts_with("vpermilvar.") || // Added in 4.0
326 Name.starts_with("vpermt2var.") || // Added in 7.0
327 Name.starts_with("vpmadd52") || // Added in 7.0
328 Name.starts_with("vpshld.") || // Added in 7.0
329 Name.starts_with("vpshldv.") || // Added in 8.0
330 Name.starts_with("vpshrd.") || // Added in 7.0
331 Name.starts_with("vpshrdv.") || // Added in 8.0
332 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333 Name.starts_with("xor.")); // Added in 3.9
334
335 if (Name.consume_front("mask3."))
336 // 'avx512.mask3.*'
337 return (Name.starts_with("vfmadd.") || // Added in 7.0
338 Name.starts_with("vfmaddsub.") || // Added in 7.0
339 Name.starts_with("vfmsub.") || // Added in 7.0
340 Name.starts_with("vfmsubadd.") || // Added in 7.0
341 Name.starts_with("vfnmsub.")); // Added in 7.0
342
343 if (Name.consume_front("maskz."))
344 // 'avx512.maskz.*'
345 return (Name.starts_with("pternlog.") || // Added in 7.0
346 Name.starts_with("vfmadd.") || // Added in 7.0
347 Name.starts_with("vfmaddsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermt2var.") || // Added in 7.0
353 Name.starts_with("vpmadd52") || // Added in 7.0
354 Name.starts_with("vpshldv.") || // Added in 8.0
355 Name.starts_with("vpshrdv.")); // Added in 8.0
356
357 // 'avx512.*'
358 return (Name == "movntdqa" || // Added in 5.0
359 Name == "pmul.dq.512" || // Added in 7.0
360 Name == "pmulu.dq.512" || // Added in 7.0
361 Name.starts_with("broadcastm") || // Added in 6.0
362 Name.starts_with("cmp.p") || // Added in 12.0
363 Name.starts_with("cvtb2mask.") || // Added in 7.0
364 Name.starts_with("cvtd2mask.") || // Added in 7.0
365 Name.starts_with("cvtmask2") || // Added in 5.0
366 Name.starts_with("cvtq2mask.") || // Added in 7.0
367 Name == "cvtusi2sd" || // Added in 7.0
368 Name.starts_with("cvtw2mask.") || // Added in 7.0
369 Name == "kand.w" || // Added in 7.0
370 Name == "kandn.w" || // Added in 7.0
371 Name == "knot.w" || // Added in 7.0
372 Name == "kor.w" || // Added in 7.0
373 Name == "kortestc.w" || // Added in 7.0
374 Name == "kortestz.w" || // Added in 7.0
375 Name.starts_with("kunpck") || // added in 6.0
376 Name == "kxnor.w" || // Added in 7.0
377 Name == "kxor.w" || // Added in 7.0
378 Name.starts_with("padds.") || // Added in 8.0
379 Name.starts_with("pbroadcast") || // Added in 3.9
380 Name.starts_with("prol") || // Added in 8.0
381 Name.starts_with("pror") || // Added in 8.0
382 Name.starts_with("psll.dq") || // Added in 3.9
383 Name.starts_with("psrl.dq") || // Added in 3.9
384 Name.starts_with("psubs.") || // Added in 8.0
385 Name.starts_with("ptestm") || // Added in 6.0
386 Name.starts_with("ptestnm") || // Added in 6.0
387 Name.starts_with("storent.") || // Added in 3.9
388 Name.starts_with("vbroadcast.s") || // Added in 7.0
389 Name.starts_with("vpshld.") || // Added in 8.0
390 Name.starts_with("vpshrd.")); // Added in 8.0
391 }
392
393 if (Name.consume_front("fma."))
394 return (Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmsub.") || // Added in 7.0
396 Name.starts_with("vfmsubadd.") || // Added in 7.0
397 Name.starts_with("vfnmadd.") || // Added in 7.0
398 Name.starts_with("vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front("fma4."))
401 return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403 if (Name.consume_front("sse."))
404 return (Name == "add.ss" || // Added in 4.0
405 Name == "cvtsi2ss" || // Added in 7.0
406 Name == "cvtsi642ss" || // Added in 7.0
407 Name == "div.ss" || // Added in 4.0
408 Name == "mul.ss" || // Added in 4.0
409 Name.starts_with("sqrt.p") || // Added in 7.0
410 Name == "sqrt.ss" || // Added in 7.0
411 Name.starts_with("storeu.") || // Added in 3.9
412 Name == "sub.ss"); // Added in 4.0
413
414 if (Name.consume_front("sse2."))
415 return (Name == "add.sd" || // Added in 4.0
416 Name == "cvtdq2pd" || // Added in 3.9
417 Name == "cvtdq2ps" || // Added in 7.0
418 Name == "cvtps2pd" || // Added in 3.9
419 Name == "cvtsi2sd" || // Added in 7.0
420 Name == "cvtsi642sd" || // Added in 7.0
421 Name == "cvtss2sd" || // Added in 7.0
422 Name == "div.sd" || // Added in 4.0
423 Name == "mul.sd" || // Added in 4.0
424 Name.starts_with("padds.") || // Added in 8.0
425 Name.starts_with("paddus.") || // Added in 8.0
426 Name.starts_with("pcmpeq.") || // Added in 3.1
427 Name.starts_with("pcmpgt.") || // Added in 3.1
428 Name == "pmaxs.w" || // Added in 3.9
429 Name == "pmaxu.b" || // Added in 3.9
430 Name == "pmins.w" || // Added in 3.9
431 Name == "pminu.b" || // Added in 3.9
432 Name == "pmulu.dq" || // Added in 7.0
433 Name.starts_with("pshuf") || // Added in 3.9
434 Name.starts_with("psll.dq") || // Added in 3.7
435 Name.starts_with("psrl.dq") || // Added in 3.7
436 Name.starts_with("psubs.") || // Added in 8.0
437 Name.starts_with("psubus.") || // Added in 8.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.sd" || // Added in 7.0
440 Name == "storel.dq" || // Added in 3.9
441 Name.starts_with("storeu.") || // Added in 3.9
442 Name == "sub.sd"); // Added in 4.0
443
444 if (Name.consume_front("sse41."))
445 return (Name.starts_with("blendp") || // Added in 3.7
446 Name == "movntdqa" || // Added in 5.0
447 Name == "pblendw" || // Added in 3.7
448 Name == "pmaxsb" || // Added in 3.9
449 Name == "pmaxsd" || // Added in 3.9
450 Name == "pmaxud" || // Added in 3.9
451 Name == "pmaxuw" || // Added in 3.9
452 Name == "pminsb" || // Added in 3.9
453 Name == "pminsd" || // Added in 3.9
454 Name == "pminud" || // Added in 3.9
455 Name == "pminuw" || // Added in 3.9
456 Name.starts_with("pmovsx") || // Added in 3.8
457 Name.starts_with("pmovzx") || // Added in 3.9
458 Name == "pmuldq"); // Added in 7.0
459
460 if (Name.consume_front("sse42."))
461 return Name == "crc32.64.8"; // Added in 3.4
462
463 if (Name.consume_front("sse4a."))
464 return Name.starts_with("movnt."); // Added in 3.9
465
466 if (Name.consume_front("ssse3."))
467 return (Name == "pabs.b.128" || // Added in 6.0
468 Name == "pabs.d.128" || // Added in 6.0
469 Name == "pabs.w.128"); // Added in 6.0
470
471 if (Name.consume_front("xop."))
472 return (Name == "vpcmov" || // Added in 3.8
473 Name == "vpcmov.256" || // Added in 5.0
474 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475 Name.starts_with("vprot")); // Added in 8.0
476
477 return (Name == "addcarry.u32" || // Added in 8.0
478 Name == "addcarry.u64" || // Added in 8.0
479 Name == "addcarryx.u32" || // Added in 8.0
480 Name == "addcarryx.u64" || // Added in 8.0
481 Name == "subborrow.u32" || // Added in 8.0
482 Name == "subborrow.u64" || // Added in 8.0
483 Name.starts_with("vcvtph2ps.")); // Added in 11.0
484}
485
487 Function *&NewFn) {
488 // Only handle intrinsics that start with "x86.".
489 if (!Name.consume_front("x86."))
490 return false;
491
493 NewFn = nullptr;
494 return true;
495 }
496
497 if (Name == "rdtscp") { // Added in 8.0
498 // If this intrinsic has 0 operands, it's the new version.
499 if (F->getFunctionType()->getNumParams() == 0)
500 return false;
501
502 rename(F);
503 NewFn = Intrinsic::getDeclaration(F->getParent(),
504 Intrinsic::x86_rdtscp);
505 return true;
506 }
507
509
510 // SSE4.1 ptest functions may have an old signature.
511 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
513 .Case("c", Intrinsic::x86_sse41_ptestc)
514 .Case("z", Intrinsic::x86_sse41_ptestz)
515 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
518 return upgradePTESTIntrinsic(F, ID, NewFn);
519
520 return false;
521 }
522
523 // Several blend and other instructions with masks used the wrong number of
524 // bits.
525
526 // Added in 3.6
528 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
536 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538 if (Name.consume_front("avx512.mask.cmp.")) {
539 // Added in 7.0
541 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
549 return upgradeX86MaskedFPCompare(F, ID, NewFn);
550 return false; // No other 'x86.avx523.mask.cmp.*'.
551 }
552
553 if (Name.consume_front("avx512bf16.")) {
554 // Added in 9.0
556 .Case("cvtne2ps2bf16.128",
557 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558 .Case("cvtne2ps2bf16.256",
559 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560 .Case("cvtne2ps2bf16.512",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562 .Case("mask.cvtneps2bf16.128",
563 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564 .Case("cvtneps2bf16.256",
565 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566 .Case("cvtneps2bf16.512",
567 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
570 return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572 // Added in 9.0
574 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
579 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580 return false; // No other 'x86.avx512bf16.*'.
581 }
582
583 if (Name.consume_front("xop.")) {
585 if (Name.starts_with("vpermil2")) { // Added in 3.9
586 // Upgrade any XOP PERMIL2 index operand still using a float/double
587 // vector.
588 auto Idx = F->getFunctionType()->getParamType(2);
589 if (Idx->isFPOrFPVectorTy()) {
590 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591 unsigned EltSize = Idx->getScalarSizeInBits();
592 if (EltSize == 64 && IdxSize == 128)
593 ID = Intrinsic::x86_xop_vpermil2pd;
594 else if (EltSize == 32 && IdxSize == 128)
595 ID = Intrinsic::x86_xop_vpermil2ps;
596 else if (EltSize == 64 && IdxSize == 256)
597 ID = Intrinsic::x86_xop_vpermil2pd_256;
598 else
599 ID = Intrinsic::x86_xop_vpermil2ps_256;
600 }
601 } else if (F->arg_size() == 2)
602 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
604 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
607
609 rename(F);
610 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611 return true;
612 }
613 return false; // No other 'x86.xop.*'
614 }
615
616 if (Name == "seh.recoverfp") {
617 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618 return true;
619 }
620
621 return false;
622}
623
624// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
628 Function *&NewFn) {
629 if (Name.starts_with("rbit")) {
630 // '(arm|aarch64).rbit'.
631 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632 F->arg_begin()->getType());
633 return true;
634 }
635
636 if (Name == "thread.pointer") {
637 // '(arm|aarch64).thread.pointer'.
638 NewFn =
639 Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640 return true;
641 }
642
643 bool Neon = Name.consume_front("neon.");
644 if (Neon) {
645 // '(arm|aarch64).neon.*'.
646 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647 // v16i8 respectively.
648 if (Name.consume_front("bfdot.")) {
649 // (arm|aarch64).neon.bfdot.*'.
652 .Cases("v2f32.v8i8", "v4f32.v16i8",
653 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
657 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658 assert((OperandWidth == 64 || OperandWidth == 128) &&
659 "Unexpected operand width");
660 LLVMContext &Ctx = F->getParent()->getContext();
661 std::array<Type *, 2> Tys{
662 {F->getReturnType(),
663 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
664 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
665 return true;
666 }
667 return false; // No other '(arm|aarch64).neon.bfdot.*'.
668 }
669
670 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671 // anymore and accept v8bf16 instead of v16i8.
672 if (Name.consume_front("bfm")) {
673 // (arm|aarch64).neon.bfm*'.
674 if (Name.consume_back(".v4f32.v16i8")) {
675 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
678 .Case("mla",
679 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681 .Case("lalb",
682 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684 .Case("lalt",
685 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
689 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
690 return true;
691 }
692 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693 }
694 return false; // No other '(arm|aarch64).neon.bfm*.
695 }
696 // Continue on to Aarch64 Neon or Arm Neon.
697 }
698 // Continue on to Arm or Aarch64.
699
700 if (IsArm) {
701 // 'arm.*'.
702 if (Neon) {
703 // 'arm.neon.*'.
705 .StartsWith("vclz.", Intrinsic::ctlz)
706 .StartsWith("vcnt.", Intrinsic::ctpop)
707 .StartsWith("vqadds.", Intrinsic::sadd_sat)
708 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
709 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
710 .StartsWith("vqsubu.", Intrinsic::usub_sat)
713 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
714 F->arg_begin()->getType());
715 return true;
716 }
717
718 if (Name.consume_front("vst")) {
719 // 'arm.neon.vst*'.
720 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
722 if (vstRegex.match(Name, &Groups)) {
723 static const Intrinsic::ID StoreInts[] = {
724 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726
727 static const Intrinsic::ID StoreLaneInts[] = {
728 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729 Intrinsic::arm_neon_vst4lane};
730
731 auto fArgs = F->getFunctionType()->params();
732 Type *Tys[] = {fArgs[0], fArgs[1]};
733 if (Groups[1].size() == 1)
734 NewFn = Intrinsic::getDeclaration(F->getParent(),
735 StoreInts[fArgs.size() - 3], Tys);
736 else
738 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
739 return true;
740 }
741 return false; // No other 'arm.neon.vst*'.
742 }
743
744 return false; // No other 'arm.neon.*'.
745 }
746
747 if (Name.consume_front("mve.")) {
748 // 'arm.mve.*'.
749 if (Name == "vctp64") {
750 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
751 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752 // the function and deal with it below in UpgradeIntrinsicCall.
753 rename(F);
754 return true;
755 }
756 return false; // Not 'arm.mve.vctp64'.
757 }
758
759 // These too are changed to accept a v2i1 instead of the old v4i1.
760 if (Name.consume_back(".v4i1")) {
761 // 'arm.mve.*.v4i1'.
762 if (Name.consume_back(".predicated.v2i64.v4i32"))
763 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764 return Name == "mull.int" || Name == "vqdmull";
765
766 if (Name.consume_back(".v2i64")) {
767 // 'arm.mve.*.v2i64.v4i1'
768 bool IsGather = Name.consume_front("vldr.gather.");
769 if (IsGather || Name.consume_front("vstr.scatter.")) {
770 if (Name.consume_front("base.")) {
771 // Optional 'wb.' prefix.
772 Name.consume_front("wb.");
773 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774 // predicated.v2i64.v2i64.v4i1'.
775 return Name == "predicated.v2i64";
776 }
777
778 if (Name.consume_front("offset.predicated."))
779 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781
782 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783 return false;
784 }
785
786 return false; // No other 'arm.mve.*.v2i64.v4i1'.
787 }
788 return false; // No other 'arm.mve.*.v4i1'.
789 }
790 return false; // No other 'arm.mve.*'.
791 }
792
793 if (Name.consume_front("cde.vcx")) {
794 // 'arm.cde.vcx*'.
795 if (Name.consume_back(".predicated.v2i64.v4i1"))
796 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798 Name == "3q" || Name == "3qa";
799
800 return false; // No other 'arm.cde.vcx*'.
801 }
802 } else {
803 // 'aarch64.*'.
804 if (Neon) {
805 // 'aarch64.neon.*'.
807 .StartsWith("frintn", Intrinsic::roundeven)
808 .StartsWith("rbit", Intrinsic::bitreverse)
811 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
812 F->arg_begin()->getType());
813 return true;
814 }
815
816 if (Name.starts_with("addp")) {
817 // 'aarch64.neon.addp*'.
818 if (F->arg_size() != 2)
819 return false; // Invalid IR.
820 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
821 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822 NewFn = Intrinsic::getDeclaration(F->getParent(),
823 Intrinsic::aarch64_neon_faddp, Ty);
824 return true;
825 }
826 }
827 return false; // No other 'aarch64.neon.*'.
828 }
829 if (Name.consume_front("sve.")) {
830 // 'aarch64.sve.*'.
831 if (Name.consume_front("bf")) {
832 if (Name.consume_back(".lane")) {
833 // 'aarch64.sve.bf*.lane'.
836 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
841 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
842 return true;
843 }
844 return false; // No other 'aarch64.sve.bf*.lane'.
845 }
846 return false; // No other 'aarch64.sve.bf*'.
847 }
848
849 if (Name.consume_front("addqv")) {
850 // 'aarch64.sve.addqv'.
851 if (!F->getReturnType()->isFPOrFPVectorTy())
852 return false;
853
854 auto Args = F->getFunctionType()->params();
855 Type *Tys[] = {F->getReturnType(), Args[1]};
856 NewFn = Intrinsic::getDeclaration(F->getParent(),
857 Intrinsic::aarch64_sve_faddqv, Tys);
858 return true;
859 }
860
861 if (Name.consume_front("ld")) {
862 // 'aarch64.sve.ld*'.
863 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
864 if (LdRegex.match(Name)) {
865 Type *ScalarTy =
866 cast<VectorType>(F->getReturnType())->getElementType();
867 ElementCount EC =
868 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
869 Type *Ty = VectorType::get(ScalarTy, EC);
870 static const Intrinsic::ID LoadIDs[] = {
871 Intrinsic::aarch64_sve_ld2_sret,
872 Intrinsic::aarch64_sve_ld3_sret,
873 Intrinsic::aarch64_sve_ld4_sret,
874 };
875 NewFn = Intrinsic::getDeclaration(F->getParent(),
876 LoadIDs[Name[0] - '2'], Ty);
877 return true;
878 }
879 return false; // No other 'aarch64.sve.ld*'.
880 }
881
882 if (Name.consume_front("tuple.")) {
883 // 'aarch64.sve.tuple.*'.
884 if (Name.starts_with("get")) {
885 // 'aarch64.sve.tuple.get*'.
886 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
887 NewFn = Intrinsic::getDeclaration(F->getParent(),
888 Intrinsic::vector_extract, Tys);
889 return true;
890 }
891
892 if (Name.starts_with("set")) {
893 // 'aarch64.sve.tuple.set*'.
894 auto Args = F->getFunctionType()->params();
895 Type *Tys[] = {Args[0], Args[2], Args[1]};
896 NewFn = Intrinsic::getDeclaration(F->getParent(),
897 Intrinsic::vector_insert, Tys);
898 return true;
899 }
900
901 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
902 if (CreateTupleRegex.match(Name)) {
903 // 'aarch64.sve.tuple.create*'.
904 auto Args = F->getFunctionType()->params();
905 Type *Tys[] = {F->getReturnType(), Args[1]};
906 NewFn = Intrinsic::getDeclaration(F->getParent(),
907 Intrinsic::vector_insert, Tys);
908 return true;
909 }
910 return false; // No other 'aarch64.sve.tuple.*'.
911 }
912 return false; // No other 'aarch64.sve.*'.
913 }
914 }
915 return false; // No other 'arm.*', 'aarch64.*'.
916}
917
919 if (Name.consume_front("abs."))
921 .Case("bf16", Intrinsic::nvvm_abs_bf16)
922 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
924
925 if (Name.consume_front("fma.rn."))
927 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
928 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
929 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
930 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
931 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
932 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
933 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
934 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
935 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
936 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
937 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
938 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
940
941 if (Name.consume_front("fmax."))
943 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
944 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
945 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
946 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
947 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
948 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
949 .Case("ftz.nan.xorsign.abs.bf16",
950 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
951 .Case("ftz.nan.xorsign.abs.bf16x2",
952 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
953 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
954 .Case("ftz.xorsign.abs.bf16x2",
955 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
956 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
957 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
958 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
959 .Case("nan.xorsign.abs.bf16x2",
960 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
961 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
962 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
964
965 if (Name.consume_front("fmin."))
967 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
968 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
969 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
970 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
971 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
972 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
973 .Case("ftz.nan.xorsign.abs.bf16",
974 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
975 .Case("ftz.nan.xorsign.abs.bf16x2",
976 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
977 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
978 .Case("ftz.xorsign.abs.bf16x2",
979 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
980 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
981 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
982 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
983 .Case("nan.xorsign.abs.bf16x2",
984 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
985 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
986 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
988
989 if (Name.consume_front("neg."))
991 .Case("bf16", Intrinsic::nvvm_neg_bf16)
992 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
994
996}
997
999 bool CanUpgradeDebugIntrinsicsToRecords) {
1000 assert(F && "Illegal to upgrade a non-existent Function.");
1001
1002 StringRef Name = F->getName();
1003
1004 // Quickly eliminate it, if it's not a candidate.
1005 if (!Name.consume_front("llvm.") || Name.empty())
1006 return false;
1007
1008 switch (Name[0]) {
1009 default: break;
1010 case 'a': {
1011 bool IsArm = Name.consume_front("arm.");
1012 if (IsArm || Name.consume_front("aarch64.")) {
1013 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1014 return true;
1015 break;
1016 }
1017
1018 if (Name.consume_front("amdgcn.")) {
1019 if (Name == "alignbit") {
1020 // Target specific intrinsic became redundant
1021 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1022 {F->getReturnType()});
1023 return true;
1024 }
1025
1026 if (Name.consume_front("atomic.")) {
1027 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029 // there's no new declaration.
1030 NewFn = nullptr;
1031 return true;
1032 }
1033 break; // No other 'amdgcn.atomic.*'
1034 }
1035
1036 if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1037 Name.starts_with("ds.fmax")) {
1038 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1039 // declaration.
1040 NewFn = nullptr;
1041 return true;
1042 }
1043
1044 if (Name.starts_with("ldexp.")) {
1045 // Target specific intrinsic became redundant
1047 F->getParent(), Intrinsic::ldexp,
1048 {F->getReturnType(), F->getArg(1)->getType()});
1049 return true;
1050 }
1051 break; // No other 'amdgcn.*'
1052 }
1053
1054 break;
1055 }
1056 case 'c': {
1057 if (F->arg_size() == 1) {
1059 .StartsWith("ctlz.", Intrinsic::ctlz)
1060 .StartsWith("cttz.", Intrinsic::cttz)
1063 rename(F);
1064 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1065 F->arg_begin()->getType());
1066 return true;
1067 }
1068 }
1069
1070 if (F->arg_size() == 2 && Name == "coro.end") {
1071 rename(F);
1072 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1073 return true;
1074 }
1075
1076 break;
1077 }
1078 case 'd':
1079 if (Name.consume_front("dbg.")) {
1080 // Mark debug intrinsics for upgrade to new debug format.
1081 if (CanUpgradeDebugIntrinsicsToRecords &&
1082 F->getParent()->IsNewDbgInfoFormat) {
1083 if (Name == "addr" || Name == "value" || Name == "assign" ||
1084 Name == "declare" || Name == "label") {
1085 // There's no function to replace these with.
1086 NewFn = nullptr;
1087 // But we do want these to get upgraded.
1088 return true;
1089 }
1090 }
1091 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1092 // converted to DbgVariableRecords later.
1093 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1094 rename(F);
1095 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1096 return true;
1097 }
1098 break; // No other 'dbg.*'.
1099 }
1100 break;
1101 case 'e':
1102 if (Name.consume_front("experimental.vector.")) {
1105 .StartsWith("extract.", Intrinsic::vector_extract)
1106 .StartsWith("insert.", Intrinsic::vector_insert)
1107 .StartsWith("splice.", Intrinsic::vector_splice)
1108 .StartsWith("reverse.", Intrinsic::vector_reverse)
1109 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1110 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1113 const auto *FT = F->getFunctionType();
1115 if (ID == Intrinsic::vector_extract ||
1116 ID == Intrinsic::vector_interleave2)
1117 // Extracting overloads the return type.
1118 Tys.push_back(FT->getReturnType());
1119 if (ID != Intrinsic::vector_interleave2)
1120 Tys.push_back(FT->getParamType(0));
1121 if (ID == Intrinsic::vector_insert)
1122 // Inserting overloads the inserted type.
1123 Tys.push_back(FT->getParamType(1));
1124 rename(F);
1125 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1126 return true;
1127 }
1128
1129 if (Name.consume_front("reduce.")) {
1131 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1132 if (R.match(Name, &Groups))
1134 .Case("add", Intrinsic::vector_reduce_add)
1135 .Case("mul", Intrinsic::vector_reduce_mul)
1136 .Case("and", Intrinsic::vector_reduce_and)
1137 .Case("or", Intrinsic::vector_reduce_or)
1138 .Case("xor", Intrinsic::vector_reduce_xor)
1139 .Case("smax", Intrinsic::vector_reduce_smax)
1140 .Case("smin", Intrinsic::vector_reduce_smin)
1141 .Case("umax", Intrinsic::vector_reduce_umax)
1142 .Case("umin", Intrinsic::vector_reduce_umin)
1143 .Case("fmax", Intrinsic::vector_reduce_fmax)
1144 .Case("fmin", Intrinsic::vector_reduce_fmin)
1146
1147 bool V2 = false;
1149 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1150 Groups.clear();
1151 V2 = true;
1152 if (R2.match(Name, &Groups))
1154 .Case("fadd", Intrinsic::vector_reduce_fadd)
1155 .Case("fmul", Intrinsic::vector_reduce_fmul)
1157 }
1159 rename(F);
1160 auto Args = F->getFunctionType()->params();
1161 NewFn =
1162 Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1163 return true;
1164 }
1165 break; // No other 'expermental.vector.reduce.*'.
1166 }
1167 break; // No other 'experimental.vector.*'.
1168 }
1169 break; // No other 'e*'.
1170 case 'f':
1171 if (Name.starts_with("flt.rounds")) {
1172 rename(F);
1173 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1174 return true;
1175 }
1176 break;
1177 case 'i':
1178 if (Name.starts_with("invariant.group.barrier")) {
1179 // Rename invariant.group.barrier to launder.invariant.group
1180 auto Args = F->getFunctionType()->params();
1181 Type* ObjectPtr[1] = {Args[0]};
1182 rename(F);
1183 NewFn = Intrinsic::getDeclaration(F->getParent(),
1184 Intrinsic::launder_invariant_group, ObjectPtr);
1185 return true;
1186 }
1187 break;
1188 case 'm': {
1189 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1190 // alignment parameter to embedding the alignment as an attribute of
1191 // the pointer args.
1192 if (unsigned ID = StringSwitch<unsigned>(Name)
1193 .StartsWith("memcpy.", Intrinsic::memcpy)
1194 .StartsWith("memmove.", Intrinsic::memmove)
1195 .Default(0)) {
1196 if (F->arg_size() == 5) {
1197 rename(F);
1198 // Get the types of dest, src, and len
1199 ArrayRef<Type *> ParamTypes =
1200 F->getFunctionType()->params().slice(0, 3);
1201 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1202 return true;
1203 }
1204 }
1205 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1206 rename(F);
1207 // Get the types of dest, and len
1208 const auto *FT = F->getFunctionType();
1209 Type *ParamTypes[2] = {
1210 FT->getParamType(0), // Dest
1211 FT->getParamType(2) // len
1212 };
1213 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1214 ParamTypes);
1215 return true;
1216 }
1217 break;
1218 }
1219 case 'n': {
1220 if (Name.consume_front("nvvm.")) {
1221 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1222 if (F->arg_size() == 1) {
1223 Intrinsic::ID IID =
1225 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1226 .Case("clz.i", Intrinsic::ctlz)
1227 .Case("popc.i", Intrinsic::ctpop)
1229 if (IID != Intrinsic::not_intrinsic) {
1230 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1231 {F->getReturnType()});
1232 return true;
1233 }
1234 }
1235
1236 // Check for nvvm intrinsics that need a return type adjustment.
1237 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1239 if (IID != Intrinsic::not_intrinsic) {
1240 NewFn = nullptr;
1241 return true;
1242 }
1243 }
1244
1245 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1246 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1247 //
1248 // TODO: We could add lohi.i2d.
1249 bool Expand = false;
1250 if (Name.consume_front("abs."))
1251 // nvvm.abs.{i,ii}
1252 Expand = Name == "i" || Name == "ll";
1253 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1254 Expand = true;
1255 else if (Name.consume_front("max.") || Name.consume_front("min."))
1256 // nvvm.{min,max}.{i,ii,ui,ull}
1257 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1258 Name == "ui" || Name == "ull";
1259 else if (Name.consume_front("atomic.load.add."))
1260 // nvvm.atomic.load.add.{f32.p,f64.p}
1261 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1262 else
1263 Expand = false;
1264
1265 if (Expand) {
1266 NewFn = nullptr;
1267 return true;
1268 }
1269 break; // No other 'nvvm.*'.
1270 }
1271 break;
1272 }
1273 case 'o':
1274 // We only need to change the name to match the mangling including the
1275 // address space.
1276 if (Name.starts_with("objectsize.")) {
1277 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1278 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1279 F->getName() !=
1280 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1281 rename(F);
1282 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1283 Tys);
1284 return true;
1285 }
1286 }
1287 break;
1288
1289 case 'p':
1290 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1291 rename(F);
1293 F->getParent(), Intrinsic::ptr_annotation,
1294 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1295 return true;
1296 }
1297 break;
1298
1299 case 'r': {
1300 if (Name.consume_front("riscv.")) {
1303 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1304 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1305 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1306 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1309 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1310 rename(F);
1311 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1312 return true;
1313 }
1314 break; // No other applicable upgrades.
1315 }
1316
1318 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1319 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1322 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1323 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1324 rename(F);
1325 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1326 return true;
1327 }
1328 break; // No other applicable upgrades.
1329 }
1330
1332 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1333 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1334 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1335 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1336 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1337 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1340 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1341 rename(F);
1342 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1343 return true;
1344 }
1345 break; // No other applicable upgrades.
1346 }
1347 break; // No other 'riscv.*' intrinsics
1348 }
1349 } break;
1350
1351 case 's':
1352 if (Name == "stackprotectorcheck") {
1353 NewFn = nullptr;
1354 return true;
1355 }
1356 break;
1357
1358 case 'v': {
1359 if (Name == "var.annotation" && F->arg_size() == 4) {
1360 rename(F);
1362 F->getParent(), Intrinsic::var_annotation,
1363 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1364 return true;
1365 }
1366 break;
1367 }
1368
1369 case 'w':
1370 if (Name.consume_front("wasm.")) {
1373 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1374 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1375 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1378 rename(F);
1379 NewFn =
1380 Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1381 return true;
1382 }
1383
1384 if (Name.consume_front("dot.i8x16.i7x16.")) {
1386 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1387 .Case("add.signed",
1388 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1391 rename(F);
1392 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1393 return true;
1394 }
1395 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1396 }
1397 break; // No other 'wasm.*'.
1398 }
1399 break;
1400
1401 case 'x':
1402 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1403 return true;
1404 }
1405
1406 auto *ST = dyn_cast<StructType>(F->getReturnType());
1407 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1408 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1409 // Replace return type with literal non-packed struct. Only do this for
1410 // intrinsics declared to return a struct, not for intrinsics with
1411 // overloaded return type, in which case the exact struct type will be
1412 // mangled into the name.
1415 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1416 auto *FT = F->getFunctionType();
1417 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1418 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1419 std::string Name = F->getName().str();
1420 rename(F);
1421 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1422 Name, F->getParent());
1423
1424 // The new function may also need remangling.
1425 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1426 NewFn = *Result;
1427 return true;
1428 }
1429 }
1430
1431 // Remangle our intrinsic since we upgrade the mangling
1433 if (Result != std::nullopt) {
1434 NewFn = *Result;
1435 return true;
1436 }
1437
1438 // This may not belong here. This function is effectively being overloaded
1439 // to both detect an intrinsic which needs upgrading, and to provide the
1440 // upgraded form of the intrinsic. We should perhaps have two separate
1441 // functions for this.
1442 return false;
1443}
1444
1446 bool CanUpgradeDebugIntrinsicsToRecords) {
1447 NewFn = nullptr;
1448 bool Upgraded =
1449 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1450 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1451
1452 // Upgrade intrinsic attributes. This does not change the function.
1453 if (NewFn)
1454 F = NewFn;
1455 if (Intrinsic::ID id = F->getIntrinsicID())
1456 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1457 return Upgraded;
1458}
1459
1461 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1462 GV->getName() == "llvm.global_dtors")) ||
1463 !GV->hasInitializer())
1464 return nullptr;
1465 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1466 if (!ATy)
1467 return nullptr;
1468 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1469 if (!STy || STy->getNumElements() != 2)
1470 return nullptr;
1471
1472 LLVMContext &C = GV->getContext();
1473 IRBuilder<> IRB(C);
1474 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1475 IRB.getPtrTy());
1476 Constant *Init = GV->getInitializer();
1477 unsigned N = Init->getNumOperands();
1478 std::vector<Constant *> NewCtors(N);
1479 for (unsigned i = 0; i != N; ++i) {
1480 auto Ctor = cast<Constant>(Init->getOperand(i));
1481 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1482 Ctor->getAggregateElement(1),
1484 }
1485 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1486
1487 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1488 NewInit, GV->getName());
1489}
1490
1491// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1492// to byte shuffles.
1494 unsigned Shift) {
1495 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1496 unsigned NumElts = ResultTy->getNumElements() * 8;
1497
1498 // Bitcast from a 64-bit element type to a byte element type.
1499 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1500 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1501
1502 // We'll be shuffling in zeroes.
1503 Value *Res = Constant::getNullValue(VecTy);
1504
1505 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1506 // we'll just return the zero vector.
1507 if (Shift < 16) {
1508 int Idxs[64];
1509 // 256/512-bit version is split into 2/4 16-byte lanes.
1510 for (unsigned l = 0; l != NumElts; l += 16)
1511 for (unsigned i = 0; i != 16; ++i) {
1512 unsigned Idx = NumElts + i - Shift;
1513 if (Idx < NumElts)
1514 Idx -= NumElts - 16; // end of lane, switch operand.
1515 Idxs[l + i] = Idx + l;
1516 }
1517
1518 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1519 }
1520
1521 // Bitcast back to a 64-bit element type.
1522 return Builder.CreateBitCast(Res, ResultTy, "cast");
1523}
1524
1525// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1526// to byte shuffles.
1528 unsigned Shift) {
1529 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1530 unsigned NumElts = ResultTy->getNumElements() * 8;
1531
1532 // Bitcast from a 64-bit element type to a byte element type.
1533 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1534 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1535
1536 // We'll be shuffling in zeroes.
1537 Value *Res = Constant::getNullValue(VecTy);
1538
1539 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1540 // we'll just return the zero vector.
1541 if (Shift < 16) {
1542 int Idxs[64];
1543 // 256/512-bit version is split into 2/4 16-byte lanes.
1544 for (unsigned l = 0; l != NumElts; l += 16)
1545 for (unsigned i = 0; i != 16; ++i) {
1546 unsigned Idx = i + Shift;
1547 if (Idx >= 16)
1548 Idx += NumElts - 16; // end of lane, switch operand.
1549 Idxs[l + i] = Idx + l;
1550 }
1551
1552 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1553 }
1554
1555 // Bitcast back to a 64-bit element type.
1556 return Builder.CreateBitCast(Res, ResultTy, "cast");
1557}
1558
1559static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1560 unsigned NumElts) {
1561 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1563 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1564 Mask = Builder.CreateBitCast(Mask, MaskTy);
1565
1566 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1567 // i8 and we need to extract down to the right number of elements.
1568 if (NumElts <= 4) {
1569 int Indices[4];
1570 for (unsigned i = 0; i != NumElts; ++i)
1571 Indices[i] = i;
1572 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1573 "extract");
1574 }
1575
1576 return Mask;
1577}
1578
1579static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1580 Value *Op1) {
1581 // If the mask is all ones just emit the first operation.
1582 if (const auto *C = dyn_cast<Constant>(Mask))
1583 if (C->isAllOnesValue())
1584 return Op0;
1585
1586 Mask = getX86MaskVec(Builder, Mask,
1587 cast<FixedVectorType>(Op0->getType())->getNumElements());
1588 return Builder.CreateSelect(Mask, Op0, Op1);
1589}
1590
1591static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1592 Value *Op1) {
1593 // If the mask is all ones just emit the first operation.
1594 if (const auto *C = dyn_cast<Constant>(Mask))
1595 if (C->isAllOnesValue())
1596 return Op0;
1597
1598 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1599 Mask->getType()->getIntegerBitWidth());
1600 Mask = Builder.CreateBitCast(Mask, MaskTy);
1601 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1602 return Builder.CreateSelect(Mask, Op0, Op1);
1603}
1604
1605// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1606// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1607// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1609 Value *Op1, Value *Shift,
1610 Value *Passthru, Value *Mask,
1611 bool IsVALIGN) {
1612 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1613
1614 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1615 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1616 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1617 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1618
1619 // Mask the immediate for VALIGN.
1620 if (IsVALIGN)
1621 ShiftVal &= (NumElts - 1);
1622
1623 // If palignr is shifting the pair of vectors more than the size of two
1624 // lanes, emit zero.
1625 if (ShiftVal >= 32)
1627
1628 // If palignr is shifting the pair of input vectors more than one lane,
1629 // but less than two lanes, convert to shifting in zeroes.
1630 if (ShiftVal > 16) {
1631 ShiftVal -= 16;
1632 Op1 = Op0;
1634 }
1635
1636 int Indices[64];
1637 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1638 for (unsigned l = 0; l < NumElts; l += 16) {
1639 for (unsigned i = 0; i != 16; ++i) {
1640 unsigned Idx = ShiftVal + i;
1641 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1642 Idx += NumElts - 16; // End of lane, switch operand.
1643 Indices[l + i] = Idx + l;
1644 }
1645 }
1646
1647 Value *Align = Builder.CreateShuffleVector(
1648 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1649
1650 return emitX86Select(Builder, Mask, Align, Passthru);
1651}
1652
1654 bool ZeroMask, bool IndexForm) {
1655 Type *Ty = CI.getType();
1656 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1657 unsigned EltWidth = Ty->getScalarSizeInBits();
1658 bool IsFloat = Ty->isFPOrFPVectorTy();
1659 Intrinsic::ID IID;
1660 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1661 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1662 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1663 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1664 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1665 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1666 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1667 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1668 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1669 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1670 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1671 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1672 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1673 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1674 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1675 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1676 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1677 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1678 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1679 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1680 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1681 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1682 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1683 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1684 else if (VecWidth == 128 && EltWidth == 16)
1685 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1686 else if (VecWidth == 256 && EltWidth == 16)
1687 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1688 else if (VecWidth == 512 && EltWidth == 16)
1689 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1690 else if (VecWidth == 128 && EltWidth == 8)
1691 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1692 else if (VecWidth == 256 && EltWidth == 8)
1693 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1694 else if (VecWidth == 512 && EltWidth == 8)
1695 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1696 else
1697 llvm_unreachable("Unexpected intrinsic");
1698
1699 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1700 CI.getArgOperand(2) };
1701
1702 // If this isn't index form we need to swap operand 0 and 1.
1703 if (!IndexForm)
1704 std::swap(Args[0], Args[1]);
1705
1706 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1707 Args);
1708 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1709 : Builder.CreateBitCast(CI.getArgOperand(1),
1710 Ty);
1711 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1712}
1713
1715 Intrinsic::ID IID) {
1716 Type *Ty = CI.getType();
1717 Value *Op0 = CI.getOperand(0);
1718 Value *Op1 = CI.getOperand(1);
1719 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1720 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1721
1722 if (CI.arg_size() == 4) { // For masked intrinsics.
1723 Value *VecSrc = CI.getOperand(2);
1724 Value *Mask = CI.getOperand(3);
1725 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1726 }
1727 return Res;
1728}
1729
1731 bool IsRotateRight) {
1732 Type *Ty = CI.getType();
1733 Value *Src = CI.getArgOperand(0);
1734 Value *Amt = CI.getArgOperand(1);
1735
1736 // Amount may be scalar immediate, in which case create a splat vector.
1737 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1738 // we only care about the lowest log2 bits anyway.
1739 if (Amt->getType() != Ty) {
1740 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1741 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1742 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1743 }
1744
1745 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1746 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1747 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1748
1749 if (CI.arg_size() == 4) { // For masked intrinsics.
1750 Value *VecSrc = CI.getOperand(2);
1751 Value *Mask = CI.getOperand(3);
1752 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1753 }
1754 return Res;
1755}
1756
1757static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1758 bool IsSigned) {
1759 Type *Ty = CI.getType();
1760 Value *LHS = CI.getArgOperand(0);
1761 Value *RHS = CI.getArgOperand(1);
1762
1763 CmpInst::Predicate Pred;
1764 switch (Imm) {
1765 case 0x0:
1766 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1767 break;
1768 case 0x1:
1769 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1770 break;
1771 case 0x2:
1772 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1773 break;
1774 case 0x3:
1775 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1776 break;
1777 case 0x4:
1778 Pred = ICmpInst::ICMP_EQ;
1779 break;
1780 case 0x5:
1781 Pred = ICmpInst::ICMP_NE;
1782 break;
1783 case 0x6:
1784 return Constant::getNullValue(Ty); // FALSE
1785 case 0x7:
1786 return Constant::getAllOnesValue(Ty); // TRUE
1787 default:
1788 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1789 }
1790
1791 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1792 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1793 return Ext;
1794}
1795
1797 bool IsShiftRight, bool ZeroMask) {
1798 Type *Ty = CI.getType();
1799 Value *Op0 = CI.getArgOperand(0);
1800 Value *Op1 = CI.getArgOperand(1);
1801 Value *Amt = CI.getArgOperand(2);
1802
1803 if (IsShiftRight)
1804 std::swap(Op0, Op1);
1805
1806 // Amount may be scalar immediate, in which case create a splat vector.
1807 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1808 // we only care about the lowest log2 bits anyway.
1809 if (Amt->getType() != Ty) {
1810 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1811 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1812 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1813 }
1814
1815 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1816 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1817 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1818
1819 unsigned NumArgs = CI.arg_size();
1820 if (NumArgs >= 4) { // For masked intrinsics.
1821 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1822 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1823 CI.getArgOperand(0);
1824 Value *Mask = CI.getOperand(NumArgs - 1);
1825 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1826 }
1827 return Res;
1828}
1829
1831 Value *Mask, bool Aligned) {
1832 // Cast the pointer to the right type.
1833 Ptr = Builder.CreateBitCast(Ptr,
1834 llvm::PointerType::getUnqual(Data->getType()));
1835 const Align Alignment =
1836 Aligned
1837 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1838 : Align(1);
1839
1840 // If the mask is all ones just emit a regular store.
1841 if (const auto *C = dyn_cast<Constant>(Mask))
1842 if (C->isAllOnesValue())
1843 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1844
1845 // Convert the mask from an integer type to a vector of i1.
1846 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1847 Mask = getX86MaskVec(Builder, Mask, NumElts);
1848 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1849}
1850
1852 Value *Passthru, Value *Mask, bool Aligned) {
1853 Type *ValTy = Passthru->getType();
1854 // Cast the pointer to the right type.
1856 const Align Alignment =
1857 Aligned
1858 ? Align(
1860 8)
1861 : Align(1);
1862
1863 // If the mask is all ones just emit a regular store.
1864 if (const auto *C = dyn_cast<Constant>(Mask))
1865 if (C->isAllOnesValue())
1866 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1867
1868 // Convert the mask from an integer type to a vector of i1.
1869 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1870 Mask = getX86MaskVec(Builder, Mask, NumElts);
1871 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1872}
1873
1874static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1875 Type *Ty = CI.getType();
1876 Value *Op0 = CI.getArgOperand(0);
1877 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1878 Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1879 if (CI.arg_size() == 3)
1880 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1881 return Res;
1882}
1883
1884static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1885 Type *Ty = CI.getType();
1886
1887 // Arguments have a vXi32 type so cast to vXi64.
1888 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1889 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1890
1891 if (IsSigned) {
1892 // Shift left then arithmetic shift right.
1893 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1894 LHS = Builder.CreateShl(LHS, ShiftAmt);
1895 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1896 RHS = Builder.CreateShl(RHS, ShiftAmt);
1897 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1898 } else {
1899 // Clear the upper bits.
1900 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1901 LHS = Builder.CreateAnd(LHS, Mask);
1902 RHS = Builder.CreateAnd(RHS, Mask);
1903 }
1904
1905 Value *Res = Builder.CreateMul(LHS, RHS);
1906
1907 if (CI.arg_size() == 4)
1908 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1909
1910 return Res;
1911}
1912
1913// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1915 Value *Mask) {
1916 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1917 if (Mask) {
1918 const auto *C = dyn_cast<Constant>(Mask);
1919 if (!C || !C->isAllOnesValue())
1920 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1921 }
1922
1923 if (NumElts < 8) {
1924 int Indices[8];
1925 for (unsigned i = 0; i != NumElts; ++i)
1926 Indices[i] = i;
1927 for (unsigned i = NumElts; i != 8; ++i)
1928 Indices[i] = NumElts + i % NumElts;
1929 Vec = Builder.CreateShuffleVector(Vec,
1931 Indices);
1932 }
1933 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1934}
1935
1937 unsigned CC, bool Signed) {
1938 Value *Op0 = CI.getArgOperand(0);
1939 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1940
1941 Value *Cmp;
1942 if (CC == 3) {
1944 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1945 } else if (CC == 7) {
1947 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1948 } else {
1950 switch (CC) {
1951 default: llvm_unreachable("Unknown condition code");
1952 case 0: Pred = ICmpInst::ICMP_EQ; break;
1953 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1954 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1955 case 4: Pred = ICmpInst::ICMP_NE; break;
1956 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1957 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1958 }
1959 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1960 }
1961
1962 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1963
1964 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1965}
1966
1967// Replace a masked intrinsic with an older unmasked intrinsic.
1969 Intrinsic::ID IID) {
1970 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1971 Value *Rep = Builder.CreateCall(Intrin,
1972 { CI.getArgOperand(0), CI.getArgOperand(1) });
1973 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1974}
1975
1977 Value* A = CI.getArgOperand(0);
1978 Value* B = CI.getArgOperand(1);
1979 Value* Src = CI.getArgOperand(2);
1980 Value* Mask = CI.getArgOperand(3);
1981
1982 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1983 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1984 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1985 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1986 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1987 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1988}
1989
1991 Value* Op = CI.getArgOperand(0);
1992 Type* ReturnOp = CI.getType();
1993 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1994 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1995 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1996}
1997
1998// Replace intrinsic with unmasked version and a select.
2000 CallBase &CI, Value *&Rep) {
2001 Name = Name.substr(12); // Remove avx512.mask.
2002
2003 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2004 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2005 Intrinsic::ID IID;
2006 if (Name.starts_with("max.p")) {
2007 if (VecWidth == 128 && EltWidth == 32)
2008 IID = Intrinsic::x86_sse_max_ps;
2009 else if (VecWidth == 128 && EltWidth == 64)
2010 IID = Intrinsic::x86_sse2_max_pd;
2011 else if (VecWidth == 256 && EltWidth == 32)
2012 IID = Intrinsic::x86_avx_max_ps_256;
2013 else if (VecWidth == 256 && EltWidth == 64)
2014 IID = Intrinsic::x86_avx_max_pd_256;
2015 else
2016 llvm_unreachable("Unexpected intrinsic");
2017 } else if (Name.starts_with("min.p")) {
2018 if (VecWidth == 128 && EltWidth == 32)
2019 IID = Intrinsic::x86_sse_min_ps;
2020 else if (VecWidth == 128 && EltWidth == 64)
2021 IID = Intrinsic::x86_sse2_min_pd;
2022 else if (VecWidth == 256 && EltWidth == 32)
2023 IID = Intrinsic::x86_avx_min_ps_256;
2024 else if (VecWidth == 256 && EltWidth == 64)
2025 IID = Intrinsic::x86_avx_min_pd_256;
2026 else
2027 llvm_unreachable("Unexpected intrinsic");
2028 } else if (Name.starts_with("pshuf.b.")) {
2029 if (VecWidth == 128)
2030 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2031 else if (VecWidth == 256)
2032 IID = Intrinsic::x86_avx2_pshuf_b;
2033 else if (VecWidth == 512)
2034 IID = Intrinsic::x86_avx512_pshuf_b_512;
2035 else
2036 llvm_unreachable("Unexpected intrinsic");
2037 } else if (Name.starts_with("pmul.hr.sw.")) {
2038 if (VecWidth == 128)
2039 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2040 else if (VecWidth == 256)
2041 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2042 else if (VecWidth == 512)
2043 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2044 else
2045 llvm_unreachable("Unexpected intrinsic");
2046 } else if (Name.starts_with("pmulh.w.")) {
2047 if (VecWidth == 128)
2048 IID = Intrinsic::x86_sse2_pmulh_w;
2049 else if (VecWidth == 256)
2050 IID = Intrinsic::x86_avx2_pmulh_w;
2051 else if (VecWidth == 512)
2052 IID = Intrinsic::x86_avx512_pmulh_w_512;
2053 else
2054 llvm_unreachable("Unexpected intrinsic");
2055 } else if (Name.starts_with("pmulhu.w.")) {
2056 if (VecWidth == 128)
2057 IID = Intrinsic::x86_sse2_pmulhu_w;
2058 else if (VecWidth == 256)
2059 IID = Intrinsic::x86_avx2_pmulhu_w;
2060 else if (VecWidth == 512)
2061 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2062 else
2063 llvm_unreachable("Unexpected intrinsic");
2064 } else if (Name.starts_with("pmaddw.d.")) {
2065 if (VecWidth == 128)
2066 IID = Intrinsic::x86_sse2_pmadd_wd;
2067 else if (VecWidth == 256)
2068 IID = Intrinsic::x86_avx2_pmadd_wd;
2069 else if (VecWidth == 512)
2070 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with("pmaddubs.w.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with("packsswb.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_sse2_packsswb_128;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_packsswb;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_packsswb_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with("packssdw.")) {
2092 if (VecWidth == 128)
2093 IID = Intrinsic::x86_sse2_packssdw_128;
2094 else if (VecWidth == 256)
2095 IID = Intrinsic::x86_avx2_packssdw;
2096 else if (VecWidth == 512)
2097 IID = Intrinsic::x86_avx512_packssdw_512;
2098 else
2099 llvm_unreachable("Unexpected intrinsic");
2100 } else if (Name.starts_with("packuswb.")) {
2101 if (VecWidth == 128)
2102 IID = Intrinsic::x86_sse2_packuswb_128;
2103 else if (VecWidth == 256)
2104 IID = Intrinsic::x86_avx2_packuswb;
2105 else if (VecWidth == 512)
2106 IID = Intrinsic::x86_avx512_packuswb_512;
2107 else
2108 llvm_unreachable("Unexpected intrinsic");
2109 } else if (Name.starts_with("packusdw.")) {
2110 if (VecWidth == 128)
2111 IID = Intrinsic::x86_sse41_packusdw;
2112 else if (VecWidth == 256)
2113 IID = Intrinsic::x86_avx2_packusdw;
2114 else if (VecWidth == 512)
2115 IID = Intrinsic::x86_avx512_packusdw_512;
2116 else
2117 llvm_unreachable("Unexpected intrinsic");
2118 } else if (Name.starts_with("vpermilvar.")) {
2119 if (VecWidth == 128 && EltWidth == 32)
2120 IID = Intrinsic::x86_avx_vpermilvar_ps;
2121 else if (VecWidth == 128 && EltWidth == 64)
2122 IID = Intrinsic::x86_avx_vpermilvar_pd;
2123 else if (VecWidth == 256 && EltWidth == 32)
2124 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2125 else if (VecWidth == 256 && EltWidth == 64)
2126 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2127 else if (VecWidth == 512 && EltWidth == 32)
2128 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2129 else if (VecWidth == 512 && EltWidth == 64)
2130 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2131 else
2132 llvm_unreachable("Unexpected intrinsic");
2133 } else if (Name == "cvtpd2dq.256") {
2134 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2135 } else if (Name == "cvtpd2ps.256") {
2136 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2137 } else if (Name == "cvttpd2dq.256") {
2138 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2139 } else if (Name == "cvttps2dq.128") {
2140 IID = Intrinsic::x86_sse2_cvttps2dq;
2141 } else if (Name == "cvttps2dq.256") {
2142 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2143 } else if (Name.starts_with("permvar.")) {
2144 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2145 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146 IID = Intrinsic::x86_avx2_permps;
2147 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148 IID = Intrinsic::x86_avx2_permd;
2149 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150 IID = Intrinsic::x86_avx512_permvar_df_256;
2151 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152 IID = Intrinsic::x86_avx512_permvar_di_256;
2153 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154 IID = Intrinsic::x86_avx512_permvar_sf_512;
2155 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156 IID = Intrinsic::x86_avx512_permvar_si_512;
2157 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158 IID = Intrinsic::x86_avx512_permvar_df_512;
2159 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160 IID = Intrinsic::x86_avx512_permvar_di_512;
2161 else if (VecWidth == 128 && EltWidth == 16)
2162 IID = Intrinsic::x86_avx512_permvar_hi_128;
2163 else if (VecWidth == 256 && EltWidth == 16)
2164 IID = Intrinsic::x86_avx512_permvar_hi_256;
2165 else if (VecWidth == 512 && EltWidth == 16)
2166 IID = Intrinsic::x86_avx512_permvar_hi_512;
2167 else if (VecWidth == 128 && EltWidth == 8)
2168 IID = Intrinsic::x86_avx512_permvar_qi_128;
2169 else if (VecWidth == 256 && EltWidth == 8)
2170 IID = Intrinsic::x86_avx512_permvar_qi_256;
2171 else if (VecWidth == 512 && EltWidth == 8)
2172 IID = Intrinsic::x86_avx512_permvar_qi_512;
2173 else
2174 llvm_unreachable("Unexpected intrinsic");
2175 } else if (Name.starts_with("dbpsadbw.")) {
2176 if (VecWidth == 128)
2177 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2178 else if (VecWidth == 256)
2179 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2180 else if (VecWidth == 512)
2181 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2182 else
2183 llvm_unreachable("Unexpected intrinsic");
2184 } else if (Name.starts_with("pmultishift.qb.")) {
2185 if (VecWidth == 128)
2186 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2187 else if (VecWidth == 256)
2188 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2189 else if (VecWidth == 512)
2190 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2191 else
2192 llvm_unreachable("Unexpected intrinsic");
2193 } else if (Name.starts_with("conflict.")) {
2194 if (Name[9] == 'd' && VecWidth == 128)
2195 IID = Intrinsic::x86_avx512_conflict_d_128;
2196 else if (Name[9] == 'd' && VecWidth == 256)
2197 IID = Intrinsic::x86_avx512_conflict_d_256;
2198 else if (Name[9] == 'd' && VecWidth == 512)
2199 IID = Intrinsic::x86_avx512_conflict_d_512;
2200 else if (Name[9] == 'q' && VecWidth == 128)
2201 IID = Intrinsic::x86_avx512_conflict_q_128;
2202 else if (Name[9] == 'q' && VecWidth == 256)
2203 IID = Intrinsic::x86_avx512_conflict_q_256;
2204 else if (Name[9] == 'q' && VecWidth == 512)
2205 IID = Intrinsic::x86_avx512_conflict_q_512;
2206 else
2207 llvm_unreachable("Unexpected intrinsic");
2208 } else if (Name.starts_with("pavg.")) {
2209 if (Name[5] == 'b' && VecWidth == 128)
2210 IID = Intrinsic::x86_sse2_pavg_b;
2211 else if (Name[5] == 'b' && VecWidth == 256)
2212 IID = Intrinsic::x86_avx2_pavg_b;
2213 else if (Name[5] == 'b' && VecWidth == 512)
2214 IID = Intrinsic::x86_avx512_pavg_b_512;
2215 else if (Name[5] == 'w' && VecWidth == 128)
2216 IID = Intrinsic::x86_sse2_pavg_w;
2217 else if (Name[5] == 'w' && VecWidth == 256)
2218 IID = Intrinsic::x86_avx2_pavg_w;
2219 else if (Name[5] == 'w' && VecWidth == 512)
2220 IID = Intrinsic::x86_avx512_pavg_w_512;
2221 else
2222 llvm_unreachable("Unexpected intrinsic");
2223 } else
2224 return false;
2225
2226 SmallVector<Value *, 4> Args(CI.args());
2227 Args.pop_back();
2228 Args.pop_back();
2229 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2230 Args);
2231 unsigned NumArgs = CI.arg_size();
2232 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2233 CI.getArgOperand(NumArgs - 2));
2234 return true;
2235}
2236
2237/// Upgrade comment in call to inline asm that represents an objc retain release
2238/// marker.
2239void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2240 size_t Pos;
2241 if (AsmStr->find("mov\tfp") == 0 &&
2242 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2243 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2244 AsmStr->replace(Pos, 1, ";");
2245 }
2246}
2247
2249 IRBuilder<> &Builder) {
2250 LLVMContext &C = F->getContext();
2251 Value *Rep = nullptr;
2252
2253 if (Name.starts_with("sse4a.movnt.")) {
2255 Elts.push_back(
2256 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2257 MDNode *Node = MDNode::get(C, Elts);
2258
2259 Value *Arg0 = CI->getArgOperand(0);
2260 Value *Arg1 = CI->getArgOperand(1);
2261
2262 // Nontemporal (unaligned) store of the 0'th element of the float/double
2263 // vector.
2264 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2265 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2266 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2267 Value *Extract =
2268 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2269
2270 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2271 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2272 } else if (Name.starts_with("avx.movnt.") ||
2273 Name.starts_with("avx512.storent.")) {
2275 Elts.push_back(
2276 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2277 MDNode *Node = MDNode::get(C, Elts);
2278
2279 Value *Arg0 = CI->getArgOperand(0);
2280 Value *Arg1 = CI->getArgOperand(1);
2281
2282 // Convert the type of the pointer to a pointer to the stored type.
2283 Value *BC = Builder.CreateBitCast(
2284 Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
2285 StoreInst *SI = Builder.CreateAlignedStore(
2286 Arg1, BC,
2288 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2289 } else if (Name == "sse2.storel.dq") {
2290 Value *Arg0 = CI->getArgOperand(0);
2291 Value *Arg1 = CI->getArgOperand(1);
2292
2293 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2294 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2295 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2296 Value *BC = Builder.CreateBitCast(
2297 Arg0, PointerType::getUnqual(Elt->getType()), "cast");
2298 Builder.CreateAlignedStore(Elt, BC, Align(1));
2299 } else if (Name.starts_with("sse.storeu.") ||
2300 Name.starts_with("sse2.storeu.") ||
2301 Name.starts_with("avx.storeu.")) {
2302 Value *Arg0 = CI->getArgOperand(0);
2303 Value *Arg1 = CI->getArgOperand(1);
2304
2305 Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
2306 "cast");
2307 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2308 } else if (Name == "avx512.mask.store.ss") {
2309 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2310 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2311 Mask, false);
2312 } else if (Name.starts_with("avx512.mask.store")) {
2313 // "avx512.mask.storeu." or "avx512.mask.store."
2314 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2315 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2316 CI->getArgOperand(2), Aligned);
2317 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2318 // Upgrade packed integer vector compare intrinsics to compare instructions.
2319 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2320 bool CmpEq = Name[9] == 'e';
2321 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2322 CI->getArgOperand(0), CI->getArgOperand(1));
2323 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2324 } else if (Name.starts_with("avx512.broadcastm")) {
2325 Type *ExtTy = Type::getInt32Ty(C);
2326 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2327 ExtTy = Type::getInt64Ty(C);
2328 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2329 ExtTy->getPrimitiveSizeInBits();
2330 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2331 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2332 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2333 Value *Vec = CI->getArgOperand(0);
2334 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2335 Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,
2336 Elt0->getType());
2337 Elt0 = Builder.CreateCall(Intr, Elt0);
2338 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2339 } else if (Name.starts_with("avx.sqrt.p") ||
2340 Name.starts_with("sse2.sqrt.p") ||
2341 Name.starts_with("sse.sqrt.p")) {
2342 Rep =
2344 F->getParent(), Intrinsic::sqrt, CI->getType()),
2345 {CI->getArgOperand(0)});
2346 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2347 if (CI->arg_size() == 4 &&
2348 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2349 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2350 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2351 : Intrinsic::x86_avx512_sqrt_pd_512;
2352
2353 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2354 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
2355 Args);
2356 } else {
2357 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2358 Intrinsic::sqrt,
2359 CI->getType()),
2360 {CI->getArgOperand(0)});
2361 }
2362 Rep =
2363 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2364 } else if (Name.starts_with("avx512.ptestm") ||
2365 Name.starts_with("avx512.ptestnm")) {
2366 Value *Op0 = CI->getArgOperand(0);
2367 Value *Op1 = CI->getArgOperand(1);
2368 Value *Mask = CI->getArgOperand(2);
2369 Rep = Builder.CreateAnd(Op0, Op1);
2370 llvm::Type *Ty = Op0->getType();
2372 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2373 ? ICmpInst::ICMP_NE
2374 : ICmpInst::ICMP_EQ;
2375 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2376 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2377 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2378 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2379 ->getNumElements();
2380 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2381 Rep =
2382 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2383 } else if (Name.starts_with("avx512.kunpck")) {
2384 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2385 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2386 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2387 int Indices[64];
2388 for (unsigned i = 0; i != NumElts; ++i)
2389 Indices[i] = i;
2390
2391 // First extract half of each vector. This gives better codegen than
2392 // doing it in a single shuffle.
2393 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2394 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2395 // Concat the vectors.
2396 // NOTE: Operands have to be swapped to match intrinsic definition.
2397 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2398 Rep = Builder.CreateBitCast(Rep, CI->getType());
2399 } else if (Name == "avx512.kand.w") {
2400 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2401 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2402 Rep = Builder.CreateAnd(LHS, RHS);
2403 Rep = Builder.CreateBitCast(Rep, CI->getType());
2404 } else if (Name == "avx512.kandn.w") {
2405 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2406 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2407 LHS = Builder.CreateNot(LHS);
2408 Rep = Builder.CreateAnd(LHS, RHS);
2409 Rep = Builder.CreateBitCast(Rep, CI->getType());
2410 } else if (Name == "avx512.kor.w") {
2411 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2412 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2413 Rep = Builder.CreateOr(LHS, RHS);
2414 Rep = Builder.CreateBitCast(Rep, CI->getType());
2415 } else if (Name == "avx512.kxor.w") {
2416 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2417 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2418 Rep = Builder.CreateXor(LHS, RHS);
2419 Rep = Builder.CreateBitCast(Rep, CI->getType());
2420 } else if (Name == "avx512.kxnor.w") {
2421 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2422 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2423 LHS = Builder.CreateNot(LHS);
2424 Rep = Builder.CreateXor(LHS, RHS);
2425 Rep = Builder.CreateBitCast(Rep, CI->getType());
2426 } else if (Name == "avx512.knot.w") {
2427 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2428 Rep = Builder.CreateNot(Rep);
2429 Rep = Builder.CreateBitCast(Rep, CI->getType());
2430 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2431 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2432 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2433 Rep = Builder.CreateOr(LHS, RHS);
2434 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2435 Value *C;
2436 if (Name[14] == 'c')
2437 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2438 else
2439 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2440 Rep = Builder.CreateICmpEQ(Rep, C);
2441 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2442 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2443 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2444 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2445 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2446 Type *I32Ty = Type::getInt32Ty(C);
2447 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2448 ConstantInt::get(I32Ty, 0));
2449 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2450 ConstantInt::get(I32Ty, 0));
2451 Value *EltOp;
2452 if (Name.contains(".add."))
2453 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2454 else if (Name.contains(".sub."))
2455 EltOp = Builder.CreateFSub(Elt0, Elt1);
2456 else if (Name.contains(".mul."))
2457 EltOp = Builder.CreateFMul(Elt0, Elt1);
2458 else
2459 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2460 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2461 ConstantInt::get(I32Ty, 0));
2462 } else if (Name.starts_with("avx512.mask.pcmp")) {
2463 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2464 bool CmpEq = Name[16] == 'e';
2465 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2466 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2467 Type *OpTy = CI->getArgOperand(0)->getType();
2468 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2469 Intrinsic::ID IID;
2470 switch (VecWidth) {
2471 default:
2472 llvm_unreachable("Unexpected intrinsic");
2473 case 128:
2474 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2475 break;
2476 case 256:
2477 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2478 break;
2479 case 512:
2480 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2481 break;
2482 }
2483
2484 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2485 {CI->getOperand(0), CI->getArgOperand(1)});
2486 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2487 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2488 Type *OpTy = CI->getArgOperand(0)->getType();
2489 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2490 unsigned EltWidth = OpTy->getScalarSizeInBits();
2491 Intrinsic::ID IID;
2492 if (VecWidth == 128 && EltWidth == 32)
2493 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2494 else if (VecWidth == 256 && EltWidth == 32)
2495 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2496 else if (VecWidth == 512 && EltWidth == 32)
2497 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2498 else if (VecWidth == 128 && EltWidth == 64)
2499 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2500 else if (VecWidth == 256 && EltWidth == 64)
2501 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2502 else if (VecWidth == 512 && EltWidth == 64)
2503 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2504 else
2505 llvm_unreachable("Unexpected intrinsic");
2506
2507 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2508 {CI->getOperand(0), CI->getArgOperand(1)});
2509 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2510 } else if (Name.starts_with("avx512.cmp.p")) {
2511 SmallVector<Value *, 4> Args(CI->args());
2512 Type *OpTy = Args[0]->getType();
2513 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2514 unsigned EltWidth = OpTy->getScalarSizeInBits();
2515 Intrinsic::ID IID;
2516 if (VecWidth == 128 && EltWidth == 32)
2517 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2518 else if (VecWidth == 256 && EltWidth == 32)
2519 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2520 else if (VecWidth == 512 && EltWidth == 32)
2521 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2522 else if (VecWidth == 128 && EltWidth == 64)
2523 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2524 else if (VecWidth == 256 && EltWidth == 64)
2525 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2526 else if (VecWidth == 512 && EltWidth == 64)
2527 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2528 else
2529 llvm_unreachable("Unexpected intrinsic");
2530
2532 if (VecWidth == 512)
2533 std::swap(Mask, Args.back());
2534 Args.push_back(Mask);
2535
2536 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2537 Args);
2538 } else if (Name.starts_with("avx512.mask.cmp.")) {
2539 // Integer compare intrinsics.
2540 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2541 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2542 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2543 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2544 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2545 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2546 Name.starts_with("avx512.cvtw2mask.") ||
2547 Name.starts_with("avx512.cvtd2mask.") ||
2548 Name.starts_with("avx512.cvtq2mask.")) {
2549 Value *Op = CI->getArgOperand(0);
2550 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2551 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2552 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2553 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2554 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2555 Name.starts_with("avx512.mask.pabs")) {
2556 Rep = upgradeAbs(Builder, *CI);
2557 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2558 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2559 Name.starts_with("avx512.mask.pmaxs")) {
2560 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2561 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2562 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2563 Name.starts_with("avx512.mask.pmaxu")) {
2564 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2565 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2566 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2567 Name.starts_with("avx512.mask.pmins")) {
2568 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2569 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2570 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2571 Name.starts_with("avx512.mask.pminu")) {
2572 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2573 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2574 Name == "avx512.pmulu.dq.512" ||
2575 Name.starts_with("avx512.mask.pmulu.dq.")) {
2576 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2577 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2578 Name == "avx512.pmul.dq.512" ||
2579 Name.starts_with("avx512.mask.pmul.dq.")) {
2580 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2581 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2582 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2583 Rep =
2584 Builder.CreateSIToFP(CI->getArgOperand(1),
2585 cast<VectorType>(CI->getType())->getElementType());
2586 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2587 } else if (Name == "avx512.cvtusi2sd") {
2588 Rep =
2589 Builder.CreateUIToFP(CI->getArgOperand(1),
2590 cast<VectorType>(CI->getType())->getElementType());
2591 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2592 } else if (Name == "sse2.cvtss2sd") {
2593 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2594 Rep = Builder.CreateFPExt(
2595 Rep, cast<VectorType>(CI->getType())->getElementType());
2596 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2597 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2598 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2599 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2600 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2601 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2602 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2603 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2604 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2605 Name == "avx512.mask.cvtqq2ps.256" ||
2606 Name == "avx512.mask.cvtqq2ps.512" ||
2607 Name == "avx512.mask.cvtuqq2ps.256" ||
2608 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2609 Name == "avx.cvt.ps2.pd.256" ||
2610 Name == "avx512.mask.cvtps2pd.128" ||
2611 Name == "avx512.mask.cvtps2pd.256") {
2612 auto *DstTy = cast<FixedVectorType>(CI->getType());
2613 Rep = CI->getArgOperand(0);
2614 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2615
2616 unsigned NumDstElts = DstTy->getNumElements();
2617 if (NumDstElts < SrcTy->getNumElements()) {
2618 assert(NumDstElts == 2 && "Unexpected vector size");
2619 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2620 }
2621
2622 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2623 bool IsUnsigned = Name.contains("cvtu");
2624 if (IsPS2PD)
2625 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2626 else if (CI->arg_size() == 4 &&
2627 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2628 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2629 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2630 : Intrinsic::x86_avx512_sitofp_round;
2631 Function *F =
2632 Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});
2633 Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});
2634 } else {
2635 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2636 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2637 }
2638
2639 if (CI->arg_size() >= 3)
2640 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2641 CI->getArgOperand(1));
2642 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2643 Name.starts_with("vcvtph2ps.")) {
2644 auto *DstTy = cast<FixedVectorType>(CI->getType());
2645 Rep = CI->getArgOperand(0);
2646 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2647 unsigned NumDstElts = DstTy->getNumElements();
2648 if (NumDstElts != SrcTy->getNumElements()) {
2649 assert(NumDstElts == 4 && "Unexpected vector size");
2650 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2651 }
2652 Rep = Builder.CreateBitCast(
2653 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2654 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2655 if (CI->arg_size() >= 3)
2656 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2657 CI->getArgOperand(1));
2658 } else if (Name.starts_with("avx512.mask.load")) {
2659 // "avx512.mask.loadu." or "avx512.mask.load."
2660 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2661 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2662 CI->getArgOperand(2), Aligned);
2663 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2664 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2665 Type *PtrTy = ResultTy->getElementType();
2666
2667 // Cast the pointer to element type.
2668 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2670
2671 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2672 ResultTy->getNumElements());
2673
2675 F->getParent(), Intrinsic::masked_expandload, ResultTy);
2676 Rep = Builder.CreateCall(ELd, {Ptr, MaskVec, CI->getOperand(1)});
2677 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2678 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2679 Type *PtrTy = ResultTy->getElementType();
2680
2681 // Cast the pointer to element type.
2682 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2684
2685 Value *MaskVec =
2686 getX86MaskVec(Builder, CI->getArgOperand(2),
2687 cast<FixedVectorType>(ResultTy)->getNumElements());
2688
2690 F->getParent(), Intrinsic::masked_compressstore, ResultTy);
2691 Rep = Builder.CreateCall(CSt, {CI->getArgOperand(1), Ptr, MaskVec});
2692 } else if (Name.starts_with("avx512.mask.compress.") ||
2693 Name.starts_with("avx512.mask.expand.")) {
2694 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2695
2696 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2697 ResultTy->getNumElements());
2698
2699 bool IsCompress = Name[12] == 'c';
2700 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2701 : Intrinsic::x86_avx512_mask_expand;
2702 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2703 Rep = Builder.CreateCall(Intr,
2704 {CI->getOperand(0), CI->getOperand(1), MaskVec});
2705 } else if (Name.starts_with("xop.vpcom")) {
2706 bool IsSigned;
2707 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2708 Name.ends_with("uq"))
2709 IsSigned = false;
2710 else if (Name.ends_with("b") || Name.ends_with("w") ||
2711 Name.ends_with("d") || Name.ends_with("q"))
2712 IsSigned = true;
2713 else
2714 llvm_unreachable("Unknown suffix");
2715
2716 unsigned Imm;
2717 if (CI->arg_size() == 3) {
2718 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2719 } else {
2720 Name = Name.substr(9); // strip off "xop.vpcom"
2721 if (Name.starts_with("lt"))
2722 Imm = 0;
2723 else if (Name.starts_with("le"))
2724 Imm = 1;
2725 else if (Name.starts_with("gt"))
2726 Imm = 2;
2727 else if (Name.starts_with("ge"))
2728 Imm = 3;
2729 else if (Name.starts_with("eq"))
2730 Imm = 4;
2731 else if (Name.starts_with("ne"))
2732 Imm = 5;
2733 else if (Name.starts_with("false"))
2734 Imm = 6;
2735 else if (Name.starts_with("true"))
2736 Imm = 7;
2737 else
2738 llvm_unreachable("Unknown condition");
2739 }
2740
2741 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2742 } else if (Name.starts_with("xop.vpcmov")) {
2743 Value *Sel = CI->getArgOperand(2);
2744 Value *NotSel = Builder.CreateNot(Sel);
2745 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2746 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2747 Rep = Builder.CreateOr(Sel0, Sel1);
2748 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2749 Name.starts_with("avx512.mask.prol")) {
2750 Rep = upgradeX86Rotate(Builder, *CI, false);
2751 } else if (Name.starts_with("avx512.pror") ||
2752 Name.starts_with("avx512.mask.pror")) {
2753 Rep = upgradeX86Rotate(Builder, *CI, true);
2754 } else if (Name.starts_with("avx512.vpshld.") ||
2755 Name.starts_with("avx512.mask.vpshld") ||
2756 Name.starts_with("avx512.maskz.vpshld")) {
2757 bool ZeroMask = Name[11] == 'z';
2758 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2759 } else if (Name.starts_with("avx512.vpshrd.") ||
2760 Name.starts_with("avx512.mask.vpshrd") ||
2761 Name.starts_with("avx512.maskz.vpshrd")) {
2762 bool ZeroMask = Name[11] == 'z';
2763 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2764 } else if (Name == "sse42.crc32.64.8") {
2766 F->getParent(), Intrinsic::x86_sse42_crc32_32_8);
2767 Value *Trunc0 =
2768 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2769 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2770 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2771 } else if (Name.starts_with("avx.vbroadcast.s") ||
2772 Name.starts_with("avx512.vbroadcast.s")) {
2773 // Replace broadcasts with a series of insertelements.
2774 auto *VecTy = cast<FixedVectorType>(CI->getType());
2775 Type *EltTy = VecTy->getElementType();
2776 unsigned EltNum = VecTy->getNumElements();
2777 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2778 Type *I32Ty = Type::getInt32Ty(C);
2779 Rep = PoisonValue::get(VecTy);
2780 for (unsigned I = 0; I < EltNum; ++I)
2781 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2782 } else if (Name.starts_with("sse41.pmovsx") ||
2783 Name.starts_with("sse41.pmovzx") ||
2784 Name.starts_with("avx2.pmovsx") ||
2785 Name.starts_with("avx2.pmovzx") ||
2786 Name.starts_with("avx512.mask.pmovsx") ||
2787 Name.starts_with("avx512.mask.pmovzx")) {
2788 auto *DstTy = cast<FixedVectorType>(CI->getType());
2789 unsigned NumDstElts = DstTy->getNumElements();
2790
2791 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2792 SmallVector<int, 8> ShuffleMask(NumDstElts);
2793 for (unsigned i = 0; i != NumDstElts; ++i)
2794 ShuffleMask[i] = i;
2795
2796 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2797
2798 bool DoSext = Name.contains("pmovsx");
2799 Rep =
2800 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2801 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2802 if (CI->arg_size() == 3)
2803 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2804 CI->getArgOperand(1));
2805 } else if (Name == "avx512.mask.pmov.qd.256" ||
2806 Name == "avx512.mask.pmov.qd.512" ||
2807 Name == "avx512.mask.pmov.wb.256" ||
2808 Name == "avx512.mask.pmov.wb.512") {
2809 Type *Ty = CI->getArgOperand(1)->getType();
2810 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2811 Rep =
2812 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2813 } else if (Name.starts_with("avx.vbroadcastf128") ||
2814 Name == "avx2.vbroadcasti128") {
2815 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2816 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2817 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2818 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2819 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2820 PointerType::getUnqual(VT));
2821 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2822 if (NumSrcElts == 2)
2823 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2824 else
2825 Rep = Builder.CreateShuffleVector(Load,
2826 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2827 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2828 Name.starts_with("avx512.mask.shuf.f")) {
2829 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2830 Type *VT = CI->getType();
2831 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2832 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2833 unsigned ControlBitsMask = NumLanes - 1;
2834 unsigned NumControlBits = NumLanes / 2;
2835 SmallVector<int, 8> ShuffleMask(0);
2836
2837 for (unsigned l = 0; l != NumLanes; ++l) {
2838 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2839 // We actually need the other source.
2840 if (l >= NumLanes / 2)
2841 LaneMask += NumLanes;
2842 for (unsigned i = 0; i != NumElementsInLane; ++i)
2843 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2844 }
2845 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2846 CI->getArgOperand(1), ShuffleMask);
2847 Rep =
2848 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2849 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2850 Name.starts_with("avx512.mask.broadcasti")) {
2851 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2852 ->getNumElements();
2853 unsigned NumDstElts =
2854 cast<FixedVectorType>(CI->getType())->getNumElements();
2855
2856 SmallVector<int, 8> ShuffleMask(NumDstElts);
2857 for (unsigned i = 0; i != NumDstElts; ++i)
2858 ShuffleMask[i] = i % NumSrcElts;
2859
2860 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2861 CI->getArgOperand(0), ShuffleMask);
2862 Rep =
2863 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2864 } else if (Name.starts_with("avx2.pbroadcast") ||
2865 Name.starts_with("avx2.vbroadcast") ||
2866 Name.starts_with("avx512.pbroadcast") ||
2867 Name.starts_with("avx512.mask.broadcast.s")) {
2868 // Replace vp?broadcasts with a vector shuffle.
2869 Value *Op = CI->getArgOperand(0);
2870 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2871 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2874 Rep = Builder.CreateShuffleVector(Op, M);
2875
2876 if (CI->arg_size() == 3)
2877 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2878 CI->getArgOperand(1));
2879 } else if (Name.starts_with("sse2.padds.") ||
2880 Name.starts_with("avx2.padds.") ||
2881 Name.starts_with("avx512.padds.") ||
2882 Name.starts_with("avx512.mask.padds.")) {
2883 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2884 } else if (Name.starts_with("sse2.psubs.") ||
2885 Name.starts_with("avx2.psubs.") ||
2886 Name.starts_with("avx512.psubs.") ||
2887 Name.starts_with("avx512.mask.psubs.")) {
2888 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2889 } else if (Name.starts_with("sse2.paddus.") ||
2890 Name.starts_with("avx2.paddus.") ||
2891 Name.starts_with("avx512.mask.paddus.")) {
2892 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2893 } else if (Name.starts_with("sse2.psubus.") ||
2894 Name.starts_with("avx2.psubus.") ||
2895 Name.starts_with("avx512.mask.psubus.")) {
2896 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2897 } else if (Name.starts_with("avx512.mask.palignr.")) {
2898 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2899 CI->getArgOperand(1), CI->getArgOperand(2),
2900 CI->getArgOperand(3), CI->getArgOperand(4),
2901 false);
2902 } else if (Name.starts_with("avx512.mask.valign.")) {
2904 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2905 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
2906 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
2907 // 128/256-bit shift left specified in bits.
2908 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2909 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2910 Shift / 8); // Shift is in bits.
2911 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
2912 // 128/256-bit shift right specified in bits.
2913 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2914 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2915 Shift / 8); // Shift is in bits.
2916 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
2917 Name == "avx512.psll.dq.512") {
2918 // 128/256/512-bit shift left specified in bytes.
2919 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2920 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2921 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
2922 Name == "avx512.psrl.dq.512") {
2923 // 128/256/512-bit shift right specified in bytes.
2924 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2925 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2926 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
2927 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
2928 Name.starts_with("avx2.pblendd.")) {
2929 Value *Op0 = CI->getArgOperand(0);
2930 Value *Op1 = CI->getArgOperand(1);
2931 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2932 auto *VecTy = cast<FixedVectorType>(CI->getType());
2933 unsigned NumElts = VecTy->getNumElements();
2934
2935 SmallVector<int, 16> Idxs(NumElts);
2936 for (unsigned i = 0; i != NumElts; ++i)
2937 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
2938
2939 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2940 } else if (Name.starts_with("avx.vinsertf128.") ||
2941 Name == "avx2.vinserti128" ||
2942 Name.starts_with("avx512.mask.insert")) {
2943 Value *Op0 = CI->getArgOperand(0);
2944 Value *Op1 = CI->getArgOperand(1);
2945 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2946 unsigned DstNumElts =
2947 cast<FixedVectorType>(CI->getType())->getNumElements();
2948 unsigned SrcNumElts =
2949 cast<FixedVectorType>(Op1->getType())->getNumElements();
2950 unsigned Scale = DstNumElts / SrcNumElts;
2951
2952 // Mask off the high bits of the immediate value; hardware ignores those.
2953 Imm = Imm % Scale;
2954
2955 // Extend the second operand into a vector the size of the destination.
2956 SmallVector<int, 8> Idxs(DstNumElts);
2957 for (unsigned i = 0; i != SrcNumElts; ++i)
2958 Idxs[i] = i;
2959 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2960 Idxs[i] = SrcNumElts;
2961 Rep = Builder.CreateShuffleVector(Op1, Idxs);
2962
2963 // Insert the second operand into the first operand.
2964
2965 // Note that there is no guarantee that instruction lowering will actually
2966 // produce a vinsertf128 instruction for the created shuffles. In
2967 // particular, the 0 immediate case involves no lane changes, so it can
2968 // be handled as a blend.
2969
2970 // Example of shuffle mask for 32-bit elements:
2971 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2972 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2973
2974 // First fill with identify mask.
2975 for (unsigned i = 0; i != DstNumElts; ++i)
2976 Idxs[i] = i;
2977 // Then replace the elements where we need to insert.
2978 for (unsigned i = 0; i != SrcNumElts; ++i)
2979 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2980 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2981
2982 // If the intrinsic has a mask operand, handle that.
2983 if (CI->arg_size() == 5)
2984 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
2985 CI->getArgOperand(3));
2986 } else if (Name.starts_with("avx.vextractf128.") ||
2987 Name == "avx2.vextracti128" ||
2988 Name.starts_with("avx512.mask.vextract")) {
2989 Value *Op0 = CI->getArgOperand(0);
2990 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2991 unsigned DstNumElts =
2992 cast<FixedVectorType>(CI->getType())->getNumElements();
2993 unsigned SrcNumElts =
2994 cast<FixedVectorType>(Op0->getType())->getNumElements();
2995 unsigned Scale = SrcNumElts / DstNumElts;
2996
2997 // Mask off the high bits of the immediate value; hardware ignores those.
2998 Imm = Imm % Scale;
2999
3000 // Get indexes for the subvector of the input vector.
3001 SmallVector<int, 8> Idxs(DstNumElts);
3002 for (unsigned i = 0; i != DstNumElts; ++i) {
3003 Idxs[i] = i + (Imm * DstNumElts);
3004 }
3005 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3006
3007 // If the intrinsic has a mask operand, handle that.
3008 if (CI->arg_size() == 4)
3009 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3010 CI->getArgOperand(2));
3011 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3012 Name.starts_with("avx512.mask.perm.di.")) {
3013 Value *Op0 = CI->getArgOperand(0);
3014 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3015 auto *VecTy = cast<FixedVectorType>(CI->getType());
3016 unsigned NumElts = VecTy->getNumElements();
3017
3018 SmallVector<int, 8> Idxs(NumElts);
3019 for (unsigned i = 0; i != NumElts; ++i)
3020 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3021
3022 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3023
3024 if (CI->arg_size() == 4)
3025 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3026 CI->getArgOperand(2));
3027 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3028 // The immediate permute control byte looks like this:
3029 // [1:0] - select 128 bits from sources for low half of destination
3030 // [2] - ignore
3031 // [3] - zero low half of destination
3032 // [5:4] - select 128 bits from sources for high half of destination
3033 // [6] - ignore
3034 // [7] - zero high half of destination
3035
3036 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3037
3038 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3039 unsigned HalfSize = NumElts / 2;
3040 SmallVector<int, 8> ShuffleMask(NumElts);
3041
3042 // Determine which operand(s) are actually in use for this instruction.
3043 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3044 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3045
3046 // If needed, replace operands based on zero mask.
3047 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3048 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3049
3050 // Permute low half of result.
3051 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3052 for (unsigned i = 0; i < HalfSize; ++i)
3053 ShuffleMask[i] = StartIndex + i;
3054
3055 // Permute high half of result.
3056 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3057 for (unsigned i = 0; i < HalfSize; ++i)
3058 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3059
3060 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3061
3062 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3063 Name.starts_with("avx512.mask.vpermil.p") ||
3064 Name.starts_with("avx512.mask.pshuf.d.")) {
3065 Value *Op0 = CI->getArgOperand(0);
3066 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3067 auto *VecTy = cast<FixedVectorType>(CI->getType());
3068 unsigned NumElts = VecTy->getNumElements();
3069 // Calculate the size of each index in the immediate.
3070 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3071 unsigned IdxMask = ((1 << IdxSize) - 1);
3072
3073 SmallVector<int, 8> Idxs(NumElts);
3074 // Lookup the bits for this element, wrapping around the immediate every
3075 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3076 // to offset by the first index of each group.
3077 for (unsigned i = 0; i != NumElts; ++i)
3078 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3079
3080 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3081
3082 if (CI->arg_size() == 4)
3083 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3084 CI->getArgOperand(2));
3085 } else if (Name == "sse2.pshufl.w" ||
3086 Name.starts_with("avx512.mask.pshufl.w.")) {
3087 Value *Op0 = CI->getArgOperand(0);
3088 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3089 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3090
3091 SmallVector<int, 16> Idxs(NumElts);
3092 for (unsigned l = 0; l != NumElts; l += 8) {
3093 for (unsigned i = 0; i != 4; ++i)
3094 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3095 for (unsigned i = 4; i != 8; ++i)
3096 Idxs[i + l] = i + l;
3097 }
3098
3099 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3100
3101 if (CI->arg_size() == 4)
3102 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3103 CI->getArgOperand(2));
3104 } else if (Name == "sse2.pshufh.w" ||
3105 Name.starts_with("avx512.mask.pshufh.w.")) {
3106 Value *Op0 = CI->getArgOperand(0);
3107 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3108 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3109
3110 SmallVector<int, 16> Idxs(NumElts);
3111 for (unsigned l = 0; l != NumElts; l += 8) {
3112 for (unsigned i = 0; i != 4; ++i)
3113 Idxs[i + l] = i + l;
3114 for (unsigned i = 0; i != 4; ++i)
3115 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3116 }
3117
3118 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3119
3120 if (CI->arg_size() == 4)
3121 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3122 CI->getArgOperand(2));
3123 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3124 Value *Op0 = CI->getArgOperand(0);
3125 Value *Op1 = CI->getArgOperand(1);
3126 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3128
3129 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3130 unsigned HalfLaneElts = NumLaneElts / 2;
3131
3132 SmallVector<int, 16> Idxs(NumElts);
3133 for (unsigned i = 0; i != NumElts; ++i) {
3134 // Base index is the starting element of the lane.
3135 Idxs[i] = i - (i % NumLaneElts);
3136 // If we are half way through the lane switch to the other source.
3137 if ((i % NumLaneElts) >= HalfLaneElts)
3138 Idxs[i] += NumElts;
3139 // Now select the specific element. By adding HalfLaneElts bits from
3140 // the immediate. Wrapping around the immediate every 8-bits.
3141 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3142 }
3143
3144 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3145
3146 Rep =
3147 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3148 } else if (Name.starts_with("avx512.mask.movddup") ||
3149 Name.starts_with("avx512.mask.movshdup") ||
3150 Name.starts_with("avx512.mask.movsldup")) {
3151 Value *Op0 = CI->getArgOperand(0);
3152 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3153 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3154
3155 unsigned Offset = 0;
3156 if (Name.starts_with("avx512.mask.movshdup."))
3157 Offset = 1;
3158
3159 SmallVector<int, 16> Idxs(NumElts);
3160 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3161 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3162 Idxs[i + l + 0] = i + l + Offset;
3163 Idxs[i + l + 1] = i + l + Offset;
3164 }
3165
3166 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3167
3168 Rep =
3169 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3170 } else if (Name.starts_with("avx512.mask.punpckl") ||
3171 Name.starts_with("avx512.mask.unpckl.")) {
3172 Value *Op0 = CI->getArgOperand(0);
3173 Value *Op1 = CI->getArgOperand(1);
3174 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3175 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3176
3177 SmallVector<int, 64> Idxs(NumElts);
3178 for (int l = 0; l != NumElts; l += NumLaneElts)
3179 for (int i = 0; i != NumLaneElts; ++i)
3180 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3181
3182 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3183
3184 Rep =
3185 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3186 } else if (Name.starts_with("avx512.mask.punpckh") ||
3187 Name.starts_with("avx512.mask.unpckh.")) {
3188 Value *Op0 = CI->getArgOperand(0);
3189 Value *Op1 = CI->getArgOperand(1);
3190 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3191 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3192
3193 SmallVector<int, 64> Idxs(NumElts);
3194 for (int l = 0; l != NumElts; l += NumLaneElts)
3195 for (int i = 0; i != NumLaneElts; ++i)
3196 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3197
3198 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3199
3200 Rep =
3201 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3202 } else if (Name.starts_with("avx512.mask.and.") ||
3203 Name.starts_with("avx512.mask.pand.")) {
3204 VectorType *FTy = cast<VectorType>(CI->getType());
3205 VectorType *ITy = VectorType::getInteger(FTy);
3206 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3207 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3208 Rep = Builder.CreateBitCast(Rep, FTy);
3209 Rep =
3210 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3211 } else if (Name.starts_with("avx512.mask.andn.") ||
3212 Name.starts_with("avx512.mask.pandn.")) {
3213 VectorType *FTy = cast<VectorType>(CI->getType());
3214 VectorType *ITy = VectorType::getInteger(FTy);
3215 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3216 Rep = Builder.CreateAnd(Rep,
3217 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3218 Rep = Builder.CreateBitCast(Rep, FTy);
3219 Rep =
3220 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3221 } else if (Name.starts_with("avx512.mask.or.") ||
3222 Name.starts_with("avx512.mask.por.")) {
3223 VectorType *FTy = cast<VectorType>(CI->getType());
3224 VectorType *ITy = VectorType::getInteger(FTy);
3225 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3226 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3227 Rep = Builder.CreateBitCast(Rep, FTy);
3228 Rep =
3229 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3230 } else if (Name.starts_with("avx512.mask.xor.") ||
3231 Name.starts_with("avx512.mask.pxor.")) {
3232 VectorType *FTy = cast<VectorType>(CI->getType());
3233 VectorType *ITy = VectorType::getInteger(FTy);
3234 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3235 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3236 Rep = Builder.CreateBitCast(Rep, FTy);
3237 Rep =
3238 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3239 } else if (Name.starts_with("avx512.mask.padd.")) {
3240 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3241 Rep =
3242 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3243 } else if (Name.starts_with("avx512.mask.psub.")) {
3244 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3245 Rep =
3246 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3247 } else if (Name.starts_with("avx512.mask.pmull.")) {
3248 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3249 Rep =
3250 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3251 } else if (Name.starts_with("avx512.mask.add.p")) {
3252 if (Name.ends_with(".512")) {
3253 Intrinsic::ID IID;
3254 if (Name[17] == 's')
3255 IID = Intrinsic::x86_avx512_add_ps_512;
3256 else
3257 IID = Intrinsic::x86_avx512_add_pd_512;
3258
3259 Rep = Builder.CreateCall(
3260 Intrinsic::getDeclaration(F->getParent(), IID),
3261 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3262 } else {
3263 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3264 }
3265 Rep =
3266 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3267 } else if (Name.starts_with("avx512.mask.div.p")) {
3268 if (Name.ends_with(".512")) {
3269 Intrinsic::ID IID;
3270 if (Name[17] == 's')
3271 IID = Intrinsic::x86_avx512_div_ps_512;
3272 else
3273 IID = Intrinsic::x86_avx512_div_pd_512;
3274
3275 Rep = Builder.CreateCall(
3276 Intrinsic::getDeclaration(F->getParent(), IID),
3277 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3278 } else {
3279 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3280 }
3281 Rep =
3282 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3283 } else if (Name.starts_with("avx512.mask.mul.p")) {
3284 if (Name.ends_with(".512")) {
3285 Intrinsic::ID IID;
3286 if (Name[17] == 's')
3287 IID = Intrinsic::x86_avx512_mul_ps_512;
3288 else
3289 IID = Intrinsic::x86_avx512_mul_pd_512;
3290
3291 Rep = Builder.CreateCall(
3292 Intrinsic::getDeclaration(F->getParent(), IID),
3293 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3294 } else {
3295 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3296 }
3297 Rep =
3298 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3299 } else if (Name.starts_with("avx512.mask.sub.p")) {
3300 if (Name.ends_with(".512")) {
3301 Intrinsic::ID IID;
3302 if (Name[17] == 's')
3303 IID = Intrinsic::x86_avx512_sub_ps_512;
3304 else
3305 IID = Intrinsic::x86_avx512_sub_pd_512;
3306
3307 Rep = Builder.CreateCall(
3308 Intrinsic::getDeclaration(F->getParent(), IID),
3309 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3310 } else {
3311 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3312 }
3313 Rep =
3314 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3315 } else if ((Name.starts_with("avx512.mask.max.p") ||
3316 Name.starts_with("avx512.mask.min.p")) &&
3317 Name.drop_front(18) == ".512") {
3318 bool IsDouble = Name[17] == 'd';
3319 bool IsMin = Name[13] == 'i';
3320 static const Intrinsic::ID MinMaxTbl[2][2] = {
3321 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3322 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3323 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3324
3325 Rep = Builder.CreateCall(
3326 Intrinsic::getDeclaration(F->getParent(), IID),
3327 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3328 Rep =
3329 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3330 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3331 Rep =
3333 F->getParent(), Intrinsic::ctlz, CI->getType()),
3334 {CI->getArgOperand(0), Builder.getInt1(false)});
3335 Rep =
3336 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3337 } else if (Name.starts_with("avx512.mask.psll")) {
3338 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3339 bool IsVariable = Name[16] == 'v';
3340 char Size = Name[16] == '.' ? Name[17]
3341 : Name[17] == '.' ? Name[18]
3342 : Name[18] == '.' ? Name[19]
3343 : Name[20];
3344
3345 Intrinsic::ID IID;
3346 if (IsVariable && Name[17] != '.') {
3347 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3348 IID = Intrinsic::x86_avx2_psllv_q;
3349 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3350 IID = Intrinsic::x86_avx2_psllv_q_256;
3351 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3352 IID = Intrinsic::x86_avx2_psllv_d;
3353 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3354 IID = Intrinsic::x86_avx2_psllv_d_256;
3355 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3356 IID = Intrinsic::x86_avx512_psllv_w_128;
3357 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3358 IID = Intrinsic::x86_avx512_psllv_w_256;
3359 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3360 IID = Intrinsic::x86_avx512_psllv_w_512;
3361 else
3362 llvm_unreachable("Unexpected size");
3363 } else if (Name.ends_with(".128")) {
3364 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3365 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3366 : Intrinsic::x86_sse2_psll_d;
3367 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3368 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3369 : Intrinsic::x86_sse2_psll_q;
3370 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3371 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3372 : Intrinsic::x86_sse2_psll_w;
3373 else
3374 llvm_unreachable("Unexpected size");
3375 } else if (Name.ends_with(".256")) {
3376 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3377 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3378 : Intrinsic::x86_avx2_psll_d;
3379 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3380 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3381 : Intrinsic::x86_avx2_psll_q;
3382 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3383 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3384 : Intrinsic::x86_avx2_psll_w;
3385 else
3386 llvm_unreachable("Unexpected size");
3387 } else {
3388 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3389 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3390 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3391 : Intrinsic::x86_avx512_psll_d_512;
3392 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3393 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3394 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3395 : Intrinsic::x86_avx512_psll_q_512;
3396 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3397 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3398 : Intrinsic::x86_avx512_psll_w_512;
3399 else
3400 llvm_unreachable("Unexpected size");
3401 }
3402
3403 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3404 } else if (Name.starts_with("avx512.mask.psrl")) {
3405 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3406 bool IsVariable = Name[16] == 'v';
3407 char Size = Name[16] == '.' ? Name[17]
3408 : Name[17] == '.' ? Name[18]
3409 : Name[18] == '.' ? Name[19]
3410 : Name[20];
3411
3412 Intrinsic::ID IID;
3413 if (IsVariable && Name[17] != '.') {
3414 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3415 IID = Intrinsic::x86_avx2_psrlv_q;
3416 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3417 IID = Intrinsic::x86_avx2_psrlv_q_256;
3418 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3419 IID = Intrinsic::x86_avx2_psrlv_d;
3420 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3421 IID = Intrinsic::x86_avx2_psrlv_d_256;
3422 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3423 IID = Intrinsic::x86_avx512_psrlv_w_128;
3424 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3425 IID = Intrinsic::x86_avx512_psrlv_w_256;
3426 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3427 IID = Intrinsic::x86_avx512_psrlv_w_512;
3428 else
3429 llvm_unreachable("Unexpected size");
3430 } else if (Name.ends_with(".128")) {
3431 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3432 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3433 : Intrinsic::x86_sse2_psrl_d;
3434 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3435 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3436 : Intrinsic::x86_sse2_psrl_q;
3437 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3438 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3439 : Intrinsic::x86_sse2_psrl_w;
3440 else
3441 llvm_unreachable("Unexpected size");
3442 } else if (Name.ends_with(".256")) {
3443 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3444 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3445 : Intrinsic::x86_avx2_psrl_d;
3446 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3447 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3448 : Intrinsic::x86_avx2_psrl_q;
3449 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3450 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3451 : Intrinsic::x86_avx2_psrl_w;
3452 else
3453 llvm_unreachable("Unexpected size");
3454 } else {
3455 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3456 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3457 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3458 : Intrinsic::x86_avx512_psrl_d_512;
3459 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3460 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3461 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3462 : Intrinsic::x86_avx512_psrl_q_512;
3463 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3464 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3465 : Intrinsic::x86_avx512_psrl_w_512;
3466 else
3467 llvm_unreachable("Unexpected size");
3468 }
3469
3470 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3471 } else if (Name.starts_with("avx512.mask.psra")) {
3472 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3473 bool IsVariable = Name[16] == 'v';
3474 char Size = Name[16] == '.' ? Name[17]
3475 : Name[17] == '.' ? Name[18]
3476 : Name[18] == '.' ? Name[19]
3477 : Name[20];
3478
3479 Intrinsic::ID IID;
3480 if (IsVariable && Name[17] != '.') {
3481 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3482 IID = Intrinsic::x86_avx2_psrav_d;
3483 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3484 IID = Intrinsic::x86_avx2_psrav_d_256;
3485 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3486 IID = Intrinsic::x86_avx512_psrav_w_128;
3487 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3488 IID = Intrinsic::x86_avx512_psrav_w_256;
3489 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3490 IID = Intrinsic::x86_avx512_psrav_w_512;
3491 else
3492 llvm_unreachable("Unexpected size");
3493 } else if (Name.ends_with(".128")) {
3494 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3495 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3496 : Intrinsic::x86_sse2_psra_d;
3497 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3498 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3499 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3500 : Intrinsic::x86_avx512_psra_q_128;
3501 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3502 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3503 : Intrinsic::x86_sse2_psra_w;
3504 else
3505 llvm_unreachable("Unexpected size");
3506 } else if (Name.ends_with(".256")) {
3507 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3508 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3509 : Intrinsic::x86_avx2_psra_d;
3510 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3511 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3512 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3513 : Intrinsic::x86_avx512_psra_q_256;
3514 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3515 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3516 : Intrinsic::x86_avx2_psra_w;
3517 else
3518 llvm_unreachable("Unexpected size");
3519 } else {
3520 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3521 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3522 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3523 : Intrinsic::x86_avx512_psra_d_512;
3524 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3525 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3526 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3527 : Intrinsic::x86_avx512_psra_q_512;
3528 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3529 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3530 : Intrinsic::x86_avx512_psra_w_512;
3531 else
3532 llvm_unreachable("Unexpected size");
3533 }
3534
3535 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3536 } else if (Name.starts_with("avx512.mask.move.s")) {
3537 Rep = upgradeMaskedMove(Builder, *CI);
3538 } else if (Name.starts_with("avx512.cvtmask2")) {
3539 Rep = upgradeMaskToInt(Builder, *CI);
3540 } else if (Name.ends_with(".movntdqa")) {
3542 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3543
3544 Value *Ptr = CI->getArgOperand(0);
3545
3546 // Convert the type of the pointer to a pointer to the stored type.
3547 Value *BC = Builder.CreateBitCast(
3548 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3549 LoadInst *LI = Builder.CreateAlignedLoad(
3550 CI->getType(), BC,
3552 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3553 Rep = LI;
3554 } else if (Name.starts_with("fma.vfmadd.") ||
3555 Name.starts_with("fma.vfmsub.") ||
3556 Name.starts_with("fma.vfnmadd.") ||
3557 Name.starts_with("fma.vfnmsub.")) {
3558 bool NegMul = Name[6] == 'n';
3559 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3560 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3561
3562 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3563 CI->getArgOperand(2)};
3564
3565 if (IsScalar) {
3566 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3567 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3568 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3569 }
3570
3571 if (NegMul && !IsScalar)
3572 Ops[0] = Builder.CreateFNeg(Ops[0]);
3573 if (NegMul && IsScalar)
3574 Ops[1] = Builder.CreateFNeg(Ops[1]);
3575 if (NegAcc)
3576 Ops[2] = Builder.CreateFNeg(Ops[2]);
3577
3579 Intrinsic::fma,
3580 Ops[0]->getType()),
3581 Ops);
3582
3583 if (IsScalar)
3584 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3585 } else if (Name.starts_with("fma4.vfmadd.s")) {
3586 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3587 CI->getArgOperand(2)};
3588
3589 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3590 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3591 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3592
3594 Intrinsic::fma,
3595 Ops[0]->getType()),
3596 Ops);
3597
3599 Rep, (uint64_t)0);
3600 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3601 Name.starts_with("avx512.maskz.vfmadd.s") ||
3602 Name.starts_with("avx512.mask3.vfmadd.s") ||
3603 Name.starts_with("avx512.mask3.vfmsub.s") ||
3604 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3605 bool IsMask3 = Name[11] == '3';
3606 bool IsMaskZ = Name[11] == 'z';
3607 // Drop the "avx512.mask." to make it easier.
3608 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3609 bool NegMul = Name[2] == 'n';
3610 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3611
3612 Value *A = CI->getArgOperand(0);
3613 Value *B = CI->getArgOperand(1);
3614 Value *C = CI->getArgOperand(2);
3615
3616 if (NegMul && (IsMask3 || IsMaskZ))
3617 A = Builder.CreateFNeg(A);
3618 if (NegMul && !(IsMask3 || IsMaskZ))
3619 B = Builder.CreateFNeg(B);
3620 if (NegAcc)
3621 C = Builder.CreateFNeg(C);
3622
3623 A = Builder.CreateExtractElement(A, (uint64_t)0);
3624 B = Builder.CreateExtractElement(B, (uint64_t)0);
3625 C = Builder.CreateExtractElement(C, (uint64_t)0);
3626
3627 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3628 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3629 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3630
3631 Intrinsic::ID IID;
3632 if (Name.back() == 'd')
3633 IID = Intrinsic::x86_avx512_vfmadd_f64;
3634 else
3635 IID = Intrinsic::x86_avx512_vfmadd_f32;
3636 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3637 Rep = Builder.CreateCall(FMA, Ops);
3638 } else {
3639 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3640 A->getType());
3641 Rep = Builder.CreateCall(FMA, {A, B, C});
3642 }
3643
3644 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3645 : IsMask3 ? C
3646 : A;
3647
3648 // For Mask3 with NegAcc, we need to create a new extractelement that
3649 // avoids the negation above.
3650 if (NegAcc && IsMask3)
3651 PassThru =
3652 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3653
3654 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3655 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3656 (uint64_t)0);
3657 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3658 Name.starts_with("avx512.mask.vfnmadd.p") ||
3659 Name.starts_with("avx512.mask.vfnmsub.p") ||
3660 Name.starts_with("avx512.mask3.vfmadd.p") ||
3661 Name.starts_with("avx512.mask3.vfmsub.p") ||
3662 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3663 Name.starts_with("avx512.maskz.vfmadd.p")) {
3664 bool IsMask3 = Name[11] == '3';
3665 bool IsMaskZ = Name[11] == 'z';
3666 // Drop the "avx512.mask." to make it easier.
3667 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3668 bool NegMul = Name[2] == 'n';
3669 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3670
3671 Value *A = CI->getArgOperand(0);
3672 Value *B = CI->getArgOperand(1);
3673 Value *C = CI->getArgOperand(2);
3674
3675 if (NegMul && (IsMask3 || IsMaskZ))
3676 A = Builder.CreateFNeg(A);
3677 if (NegMul && !(IsMask3 || IsMaskZ))
3678 B = Builder.CreateFNeg(B);
3679 if (NegAcc)
3680 C = Builder.CreateFNeg(C);
3681
3682 if (CI->arg_size() == 5 &&
3683 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3684 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3685 Intrinsic::ID IID;
3686 // Check the character before ".512" in string.
3687 if (Name[Name.size() - 5] == 's')
3688 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3689 else
3690 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3691
3692 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3693 {A, B, C, CI->getArgOperand(4)});
3694 } else {
3695 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3696 A->getType());
3697 Rep = Builder.CreateCall(FMA, {A, B, C});
3698 }
3699
3700 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3701 : IsMask3 ? CI->getArgOperand(2)
3702 : CI->getArgOperand(0);
3703
3704 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3705 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3706 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3707 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3708 Intrinsic::ID IID;
3709 if (VecWidth == 128 && EltWidth == 32)
3710 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3711 else if (VecWidth == 256 && EltWidth == 32)
3712 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3713 else if (VecWidth == 128 && EltWidth == 64)
3714 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3715 else if (VecWidth == 256 && EltWidth == 64)
3716 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3717 else
3718 llvm_unreachable("Unexpected intrinsic");
3719
3720 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3721 CI->getArgOperand(2)};
3722 Ops[2] = Builder.CreateFNeg(Ops[2]);
3723 Rep =
3724 Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Ops);
3725 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3726 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3727 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3728 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3729 bool IsMask3 = Name[11] == '3';
3730 bool IsMaskZ = Name[11] == 'z';
3731 // Drop the "avx512.mask." to make it easier.
3732 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3733 bool IsSubAdd = Name[3] == 's';
3734 if (CI->arg_size() == 5) {
3735 Intrinsic::ID IID;
3736 // Check the character before ".512" in string.
3737 if (Name[Name.size() - 5] == 's')
3738 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3739 else
3740 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3741
3742 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3743 CI->getArgOperand(2), CI->getArgOperand(4)};
3744 if (IsSubAdd)
3745 Ops[2] = Builder.CreateFNeg(Ops[2]);
3746
3747 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3748 Ops);
3749 } else {
3750 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3751
3752 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3753 CI->getArgOperand(2)};
3754
3755 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3756 Ops[0]->getType());
3757 Value *Odd = Builder.CreateCall(FMA, Ops);
3758 Ops[2] = Builder.CreateFNeg(Ops[2]);
3759 Value *Even = Builder.CreateCall(FMA, Ops);
3760
3761 if (IsSubAdd)
3762 std::swap(Even, Odd);
3763
3764 SmallVector<int, 32> Idxs(NumElts);
3765 for (int i = 0; i != NumElts; ++i)
3766 Idxs[i] = i + (i % 2) * NumElts;
3767
3768 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3769 }
3770
3771 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3772 : IsMask3 ? CI->getArgOperand(2)
3773 : CI->getArgOperand(0);
3774
3775 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3776 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3777 Name.starts_with("avx512.maskz.pternlog.")) {
3778 bool ZeroMask = Name[11] == 'z';
3779 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3780 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3781 Intrinsic::ID IID;
3782 if (VecWidth == 128 && EltWidth == 32)
3783 IID = Intrinsic::x86_avx512_pternlog_d_128;
3784 else if (VecWidth == 256 && EltWidth == 32)
3785 IID = Intrinsic::x86_avx512_pternlog_d_256;
3786 else if (VecWidth == 512 && EltWidth == 32)
3787 IID = Intrinsic::x86_avx512_pternlog_d_512;
3788 else if (VecWidth == 128 && EltWidth == 64)
3789 IID = Intrinsic::x86_avx512_pternlog_q_128;
3790 else if (VecWidth == 256 && EltWidth == 64)
3791 IID = Intrinsic::x86_avx512_pternlog_q_256;
3792 else if (VecWidth == 512 && EltWidth == 64)
3793 IID = Intrinsic::x86_avx512_pternlog_q_512;
3794 else
3795 llvm_unreachable("Unexpected intrinsic");
3796
3797 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3798 CI->getArgOperand(2), CI->getArgOperand(3)};
3799 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3800 Args);
3801 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3802 : CI->getArgOperand(0);
3803 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3804 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3805 Name.starts_with("avx512.maskz.vpmadd52")) {
3806 bool ZeroMask = Name[11] == 'z';
3807 bool High = Name[20] == 'h' || Name[21] == 'h';
3808 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3809 Intrinsic::ID IID;
3810 if (VecWidth == 128 && !High)
3811 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3812 else if (VecWidth == 256 && !High)
3813 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3814 else if (VecWidth == 512 && !High)
3815 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3816 else if (VecWidth == 128 && High)
3817 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3818 else if (VecWidth == 256 && High)
3819 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3820 else if (VecWidth == 512 && High)
3821 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3822 else
3823 llvm_unreachable("Unexpected intrinsic");
3824
3825 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3826 CI->getArgOperand(2)};
3827 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3828 Args);
3829 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3830 : CI->getArgOperand(0);
3831 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3832 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3833 Name.starts_with("avx512.mask.vpermt2var.") ||
3834 Name.starts_with("avx512.maskz.vpermt2var.")) {
3835 bool ZeroMask = Name[11] == 'z';
3836 bool IndexForm = Name[17] == 'i';
3837 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3838 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3839 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3840 Name.starts_with("avx512.mask.vpdpbusds.") ||
3841 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3842 bool ZeroMask = Name[11] == 'z';
3843 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3844 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3845 Intrinsic::ID IID;
3846 if (VecWidth == 128 && !IsSaturating)
3847 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3848 else if (VecWidth == 256 && !IsSaturating)
3849 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3850 else if (VecWidth == 512 && !IsSaturating)
3851 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3852 else if (VecWidth == 128 && IsSaturating)
3853 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3854 else if (VecWidth == 256 && IsSaturating)
3855 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3856 else if (VecWidth == 512 && IsSaturating)
3857 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3858 else
3859 llvm_unreachable("Unexpected intrinsic");
3860
3861 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3862 CI->getArgOperand(2)};
3863 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3864 Args);
3865 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3866 : CI->getArgOperand(0);
3867 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3868 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3869 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3870 Name.starts_with("avx512.mask.vpdpwssds.") ||
3871 Name.starts_with("avx512.maskz.vpdpwssds.")) {
3872 bool ZeroMask = Name[11] == 'z';
3873 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3874 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3875 Intrinsic::ID IID;
3876 if (VecWidth == 128 && !IsSaturating)
3877 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3878 else if (VecWidth == 256 && !IsSaturating)
3879 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3880 else if (VecWidth == 512 && !IsSaturating)
3881 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3882 else if (VecWidth == 128 && IsSaturating)
3883 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3884 else if (VecWidth == 256 && IsSaturating)
3885 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3886 else if (VecWidth == 512 && IsSaturating)
3887 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3888 else
3889 llvm_unreachable("Unexpected intrinsic");
3890
3891 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3892 CI->getArgOperand(2)};
3893 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3894 Args);
3895 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3896 : CI->getArgOperand(0);
3897 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3898 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3899 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3900 Name == "subborrow.u32" || Name == "subborrow.u64") {
3901 Intrinsic::ID IID;
3902 if (Name[0] == 'a' && Name.back() == '2')
3903 IID = Intrinsic::x86_addcarry_32;
3904 else if (Name[0] == 'a' && Name.back() == '4')
3905 IID = Intrinsic::x86_addcarry_64;
3906 else if (Name[0] == 's' && Name.back() == '2')
3907 IID = Intrinsic::x86_subborrow_32;
3908 else if (Name[0] == 's' && Name.back() == '4')
3909 IID = Intrinsic::x86_subborrow_64;
3910 else
3911 llvm_unreachable("Unexpected intrinsic");
3912
3913 // Make a call with 3 operands.
3914 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3915 CI->getArgOperand(2)};
3916 Value *NewCall = Builder.CreateCall(
3917 Intrinsic::getDeclaration(CI->getModule(), IID), Args);
3918
3919 // Extract the second result and store it.
3920 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3921 // Cast the pointer to the right type.
3922 Value *Ptr = Builder.CreateBitCast(
3923 CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));
3924 Builder.CreateAlignedStore(Data, Ptr, Align(1));
3925 // Replace the original call result with the first result of the new call.
3926 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3927
3928 CI->replaceAllUsesWith(CF);
3929 Rep = nullptr;
3930 } else if (Name.starts_with("avx512.mask.") &&
3931 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3932 // Rep will be updated by the call in the condition.
3933 }
3934
3935 return Rep;
3936}
3937
3939 IRBuilder<> &Builder) {
3940 if (Name == "mve.vctp64.old") {
3941 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
3942 // correct type.
3943 Value *VCTP = Builder.CreateCall(
3944 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
3945 CI->getArgOperand(0), CI->getName());
3946 Value *C1 = Builder.CreateCall(
3948 F->getParent(), Intrinsic::arm_mve_pred_v2i,
3949 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
3950 VCTP);
3951 return Builder.CreateCall(
3953 F->getParent(), Intrinsic::arm_mve_pred_i2v,
3954 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
3955 C1);
3956 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
3957 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
3958 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
3959 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
3960 Name ==
3961 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
3962 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
3963 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
3964 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
3965 Name ==
3966 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
3967 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
3968 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
3969 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
3970 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
3971 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
3972 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
3973 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
3974 std::vector<Type *> Tys;
3975 unsigned ID = CI->getIntrinsicID();
3976 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
3977 switch (ID) {
3978 case Intrinsic::arm_mve_mull_int_predicated:
3979 case Intrinsic::arm_mve_vqdmull_predicated:
3980 case Intrinsic::arm_mve_vldr_gather_base_predicated:
3981 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
3982 break;
3983 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
3984 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
3985 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
3986 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
3987 V2I1Ty};
3988 break;
3989 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
3990 Tys = {CI->getType(), CI->getOperand(0)->getType(),
3991 CI->getOperand(1)->getType(), V2I1Ty};
3992 break;
3993 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
3994 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
3995 CI->getOperand(2)->getType(), V2I1Ty};
3996 break;
3997 case Intrinsic::arm_cde_vcx1q_predicated:
3998 case Intrinsic::arm_cde_vcx1qa_predicated:
3999 case Intrinsic::arm_cde_vcx2q_predicated:
4000 case Intrinsic::arm_cde_vcx2qa_predicated:
4001 case Intrinsic::arm_cde_vcx3q_predicated:
4002 case Intrinsic::arm_cde_vcx3qa_predicated:
4003 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4004 break;
4005 default:
4006 llvm_unreachable("Unhandled Intrinsic!");
4007 }
4008
4009 std::vector<Value *> Ops;
4010 for (Value *Op : CI->args()) {
4011 Type *Ty = Op->getType();
4012 if (Ty->getScalarSizeInBits() == 1) {
4013 Value *C1 = Builder.CreateCall(
4015 F->getParent(), Intrinsic::arm_mve_pred_v2i,
4016 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
4017 Op);
4018 Op = Builder.CreateCall(
4019 Intrinsic::getDeclaration(F->getParent(),
4020 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
4021 C1);
4022 }
4023 Ops.push_back(Op);
4024 }
4025
4026 Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
4027 return Builder.CreateCall(Fn, Ops, CI->getName());
4028 }
4029 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4030}
4031
4032// These are expected to have the arguments:
4033// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4034//
4035// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4036//
4038 Function *F, IRBuilder<> &Builder) {
4039 AtomicRMWInst::BinOp RMWOp =
4041 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4042 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4043 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4044 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4045 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
4046
4047 unsigned NumOperands = CI->getNumOperands();
4048 if (NumOperands < 3) // Malformed bitcode.
4049 return nullptr;
4050
4051 Value *Ptr = CI->getArgOperand(0);
4052 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4053 if (!PtrTy) // Malformed.
4054 return nullptr;
4055
4056 Value *Val = CI->getArgOperand(1);
4057 if (Val->getType() != CI->getType()) // Malformed.
4058 return nullptr;
4059
4060 ConstantInt *OrderArg = nullptr;
4061 bool IsVolatile = false;
4062
4063 // These should have 5 arguments (plus the callee). A separate version of the
4064 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4065 if (NumOperands > 3)
4066 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4067
4068 // Ignore scope argument at 3
4069
4070 if (NumOperands > 5) {
4071 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4072 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4073 }
4074
4075 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4076 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4077 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4078 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4079 Order = AtomicOrdering::SequentiallyConsistent;
4080
4081 LLVMContext &Ctx = F->getContext();
4082
4083 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4084 Type *RetTy = CI->getType();
4085 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4086 if (VT->getElementType()->isIntegerTy(16)) {
4087 VectorType *AsBF16 =
4088 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4089 Val = Builder.CreateBitCast(Val, AsBF16);
4090 }
4091 }
4092
4093 // The scope argument never really worked correctly. Use agent as the most
4094 // conservative option which should still always produce the instruction.
4095 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4096 AtomicRMWInst *RMW =
4097 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4098
4099 if (PtrTy->getAddressSpace() != 3) {
4100 RMW->setMetadata("amdgpu.no.fine.grained.memory",
4101 MDNode::get(F->getContext(), {}));
4102 }
4103
4104 if (IsVolatile)
4105 RMW->setVolatile(true);
4106
4107 return Builder.CreateBitCast(RMW, RetTy);
4108}
4109
4110/// Helper to unwrap intrinsic call MetadataAsValue operands.
4111template <typename MDType>
4112static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4113 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4114 return dyn_cast<MDType>(MAV->getMetadata());
4115 return nullptr;
4116}
4117
4118/// Convert debug intrinsic calls to non-instruction debug records.
4119/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4120/// \p CI - The debug intrinsic call.
4122 DbgRecord *DR = nullptr;
4123 if (Name == "label") {
4124 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4125 } else if (Name == "assign") {
4126 DR = new DbgVariableRecord(
4127 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4128 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4129 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4130 CI->getDebugLoc());
4131 } else if (Name == "declare") {
4132 DR = new DbgVariableRecord(
4133 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4134 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4135 DbgVariableRecord::LocationType::Declare);
4136 } else if (Name == "addr") {
4137 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4138 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4139 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4140 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4141 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4142 CI->getDebugLoc());
4143 } else if (Name == "value") {
4144 // An old version of dbg.value had an extra offset argument.
4145 unsigned VarOp = 1;
4146 unsigned ExprOp = 2;
4147 if (CI->arg_size() == 4) {
4148 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4149 // Nonzero offset dbg.values get dropped without a replacement.
4150 if (!Offset || !Offset->isZeroValue())
4151 return;
4152 VarOp = 2;
4153 ExprOp = 3;
4154 }
4155 DR = new DbgVariableRecord(
4156 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4157 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4158 }
4159 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4160 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4161}
4162
4163/// Upgrade a call to an old intrinsic. All argument and return casting must be
4164/// provided to seamlessly integrate with existing context.
4166 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4167 // checks the callee's function type matches. It's likely we need to handle
4168 // type changes here.
4169 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4170 if (!F)
4171 return;
4172
4173 LLVMContext &C = CI->getContext();
4174 IRBuilder<> Builder(C);
4175 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4176
4177 if (!NewFn) {
4178 bool FallthroughToDefaultUpgrade = false;
4179 // Get the Function's name.
4180 StringRef Name = F->getName();
4181
4182 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4183 Name = Name.substr(5);
4184
4185 bool IsX86 = Name.consume_front("x86.");
4186 bool IsNVVM = Name.consume_front("nvvm.");
4187 bool IsARM = Name.consume_front("arm.");
4188 bool IsAMDGCN = Name.consume_front("amdgcn.");
4189 bool IsDbg = Name.consume_front("dbg.");
4190 Value *Rep = nullptr;
4191
4192 if (!IsX86 && Name == "stackprotectorcheck") {
4193 Rep = nullptr;
4194 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4195 Value *Arg = CI->getArgOperand(0);
4196 Value *Neg = Builder.CreateNeg(Arg, "neg");
4197 Value *Cmp = Builder.CreateICmpSGE(
4198 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4199 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4200 } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4201 Name.starts_with("atomic.load.add.f64.p"))) {
4202 Value *Ptr = CI->getArgOperand(0);
4203 Value *Val = CI->getArgOperand(1);
4204 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4205 AtomicOrdering::SequentiallyConsistent);
4206 } else if (IsNVVM && Name.consume_front("max.") &&
4207 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4208 Name == "ui" || Name == "ull")) {
4209 Value *Arg0 = CI->getArgOperand(0);
4210 Value *Arg1 = CI->getArgOperand(1);
4211 Value *Cmp = Name.starts_with("u")
4212 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4213 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4214 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4215 } else if (IsNVVM && Name.consume_front("min.") &&
4216 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4217 Name == "ui" || Name == "ull")) {
4218 Value *Arg0 = CI->getArgOperand(0);
4219 Value *Arg1 = CI->getArgOperand(1);
4220 Value *Cmp = Name.starts_with("u")
4221 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4222 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4223 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4224 } else if (IsNVVM && Name == "clz.ll") {
4225 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4226 Value *Arg = CI->getArgOperand(0);
4227 Value *Ctlz = Builder.CreateCall(
4228 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4229 {Arg->getType()}),
4230 {Arg, Builder.getFalse()}, "ctlz");
4231 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4232 } else if (IsNVVM && Name == "popc.ll") {
4233 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4234 // i64.
4235 Value *Arg = CI->getArgOperand(0);
4236 Value *Popc = Builder.CreateCall(
4237 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4238 {Arg->getType()}),
4239 Arg, "ctpop");
4240 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4241 } else if (IsNVVM) {
4242 if (Name == "h2f") {
4243 Rep =
4245 F->getParent(), Intrinsic::convert_from_fp16,
4246 {Builder.getFloatTy()}),
4247 CI->getArgOperand(0), "h2f");
4248 } else {
4250 if (IID != Intrinsic::not_intrinsic &&
4251 !F->getReturnType()->getScalarType()->isBFloatTy()) {
4252 rename(F);
4253 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4255 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4256 Value *Arg = CI->getArgOperand(I);
4257 Type *OldType = Arg->getType();
4258 Type *NewType = NewFn->getArg(I)->getType();
4259 Args.push_back((OldType->isIntegerTy() &&
4260 NewType->getScalarType()->isBFloatTy())
4261 ? Builder.CreateBitCast(Arg, NewType)
4262 : Arg);
4263 }
4264 Rep = Builder.CreateCall(NewFn, Args);
4265 if (F->getReturnType()->isIntegerTy())
4266 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4267 }
4268 }
4269 } else if (IsX86) {
4270 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4271 } else if (IsARM) {
4272 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4273 } else if (IsAMDGCN) {
4274 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4275 } else if (IsDbg) {
4276 // We might have decided we don't want the new format after all between
4277 // first requesting the upgrade and now; skip the conversion if that is
4278 // the case, and check here to see if the intrinsic needs to be upgraded
4279 // normally.
4280 if (!CI->getModule()->IsNewDbgInfoFormat) {
4281 bool NeedsUpgrade =
4282 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4283 if (!NeedsUpgrade)
4284 return;
4285 FallthroughToDefaultUpgrade = true;
4286 } else {
4288 }
4289 } else {
4290 llvm_unreachable("Unknown function for CallBase upgrade.");
4291 }
4292
4293 if (!FallthroughToDefaultUpgrade) {
4294 if (Rep)
4295 CI->replaceAllUsesWith(Rep);
4296 CI->eraseFromParent();
4297 return;
4298 }
4299 }
4300
4301 const auto &DefaultCase = [&]() -> void {
4302 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4303 // Handle generic mangling change.
4304 assert(
4305 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4306 "Unknown function for CallBase upgrade and isn't just a name change");
4307 CI->setCalledFunction(NewFn);
4308 return;
4309 }
4310
4311 // This must be an upgrade from a named to a literal struct.
4312 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4313 assert(OldST != NewFn->getReturnType() &&
4314 "Return type must have changed");
4315 assert(OldST->getNumElements() ==
4316 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4317 "Must have same number of elements");
4318
4319 SmallVector<Value *> Args(CI->args());
4320 Value *NewCI = Builder.CreateCall(NewFn, Args);
4321 Value *Res = PoisonValue::get(OldST);
4322 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4323 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4324 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4325 }
4326 CI->replaceAllUsesWith(Res);
4327 CI->eraseFromParent();
4328 return;
4329 }
4330
4331 // We're probably about to produce something invalid. Let the verifier catch
4332 // it instead of dying here.
4333 CI->setCalledOperand(
4335 return;
4336 };
4337 CallInst *NewCall = nullptr;
4338 switch (NewFn->getIntrinsicID()) {
4339 default: {
4340 DefaultCase();
4341 return;
4342 }
4343 case Intrinsic::arm_neon_vst1:
4344 case Intrinsic::arm_neon_vst2:
4345 case Intrinsic::arm_neon_vst3:
4346 case Intrinsic::arm_neon_vst4:
4347 case Intrinsic::arm_neon_vst2lane:
4348 case Intrinsic::arm_neon_vst3lane:
4349 case Intrinsic::arm_neon_vst4lane: {
4350 SmallVector<Value *, 4> Args(CI->args());
4351 NewCall = Builder.CreateCall(NewFn, Args);
4352 break;
4353 }
4354 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4355 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4356 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4357 LLVMContext &Ctx = F->getParent()->getContext();
4358 SmallVector<Value *, 4> Args(CI->args());
4359 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4360 cast<ConstantInt>(Args[3])->getZExtValue());
4361 NewCall = Builder.CreateCall(NewFn, Args);
4362 break;
4363 }
4364 case Intrinsic::aarch64_sve_ld3_sret:
4365 case Intrinsic::aarch64_sve_ld4_sret:
4366 case Intrinsic::aarch64_sve_ld2_sret: {
4367 StringRef Name = F->getName();
4368 Name = Name.substr(5);
4369 unsigned N = StringSwitch<unsigned>(Name)
4370 .StartsWith("aarch64.sve.ld2", 2)
4371 .StartsWith("aarch64.sve.ld3", 3)
4372 .StartsWith("aarch64.sve.ld4", 4)
4373 .Default(0);
4374 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4375 unsigned MinElts = RetTy->getMinNumElements() / N;
4376 SmallVector<Value *, 2> Args(CI->args());
4377 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4379 for (unsigned I = 0; I < N; I++) {
4380 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4381 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4382 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4383 }
4384 NewCall = dyn_cast<CallInst>(Ret);
4385 break;
4386 }
4387
4388 case Intrinsic::coro_end: {
4389 SmallVector<Value *, 3> Args(CI->args());
4390 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4391 NewCall = Builder.CreateCall(NewFn, Args);
4392 break;
4393 }
4394
4395 case Intrinsic::vector_extract: {
4396 StringRef Name = F->getName();
4397 Name = Name.substr(5); // Strip llvm
4398 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4399 DefaultCase();
4400 return;
4401 }
4402 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4403 unsigned MinElts = RetTy->getMinNumElements();
4404 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4405 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4406 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4407 break;
4408 }
4409
4410 case Intrinsic::vector_insert: {
4411 StringRef Name = F->getName();
4412 Name = Name.substr(5);
4413 if (!Name.starts_with("aarch64.sve.tuple")) {
4414 DefaultCase();
4415 return;
4416 }
4417 if (Name.starts_with("aarch64.sve.tuple.set")) {
4418 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4419 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4420 Value *NewIdx =
4421 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4422 NewCall = Builder.CreateCall(
4423 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4424 break;
4425 }
4426 if (Name.starts_with("aarch64.sve.tuple.create")) {
4427 unsigned N = StringSwitch<unsigned>(Name)
4428 .StartsWith("aarch64.sve.tuple.create2", 2)
4429 .StartsWith("aarch64.sve.tuple.create3", 3)
4430 .StartsWith("aarch64.sve.tuple.create4", 4)
4431 .Default(0);
4432 assert(N > 1 && "Create is expected to be between 2-4");
4433 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4435 unsigned MinElts = RetTy->getMinNumElements() / N;
4436 for (unsigned I = 0; I < N; I++) {
4437 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4438 Value *V = CI->getArgOperand(I);
4439 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4440 }
4441 NewCall = dyn_cast<CallInst>(Ret);
4442 }
4443 break;
4444 }
4445
4446 case Intrinsic::arm_neon_bfdot:
4447 case Intrinsic::arm_neon_bfmmla:
4448 case Intrinsic::arm_neon_bfmlalb:
4449 case Intrinsic::arm_neon_bfmlalt:
4450 case Intrinsic::aarch64_neon_bfdot:
4451 case Intrinsic::aarch64_neon_bfmmla:
4452 case Intrinsic::aarch64_neon_bfmlalb:
4453 case Intrinsic::aarch64_neon_bfmlalt: {
4455 assert(CI->arg_size() == 3 &&
4456 "Mismatch between function args and call args");
4457 size_t OperandWidth =
4459 assert((OperandWidth == 64 || OperandWidth == 128) &&
4460 "Unexpected operand width");
4461 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4462 auto Iter = CI->args().begin();
4463 Args.push_back(*Iter++);
4464 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4465 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4466 NewCall = Builder.CreateCall(NewFn, Args);
4467 break;
4468 }
4469
4470 case Intrinsic::bitreverse:
4471 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4472 break;
4473
4474 case Intrinsic::ctlz:
4475 case Intrinsic::cttz:
4476 assert(CI->arg_size() == 1 &&
4477 "Mismatch between function args and call args");
4478 NewCall =
4479 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4480 break;
4481
4482 case Intrinsic::objectsize: {
4483 Value *NullIsUnknownSize =
4484 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4485 Value *Dynamic =
4486 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4487 NewCall = Builder.CreateCall(
4488 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4489 break;
4490 }
4491
4492 case Intrinsic::ctpop:
4493 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4494 break;
4495
4496 case Intrinsic::convert_from_fp16:
4497 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4498 break;
4499
4500 case Intrinsic::dbg_value: {
4501 StringRef Name = F->getName();
4502 Name = Name.substr(5); // Strip llvm.
4503 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4504 if (Name.starts_with("dbg.addr")) {
4505 DIExpression *Expr = cast<DIExpression>(
4506 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4507 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4508 NewCall =
4509 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4510 MetadataAsValue::get(C, Expr)});
4511 break;
4512 }
4513
4514 // Upgrade from the old version that had an extra offset argument.
4515 assert(CI->arg_size() == 4);
4516 // Drop nonzero offsets instead of attempting to upgrade them.
4517 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4518 if (Offset->isZeroValue()) {
4519 NewCall = Builder.CreateCall(
4520 NewFn,
4521 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4522 break;
4523 }
4524 CI->eraseFromParent();
4525 return;
4526 }
4527
4528 case Intrinsic::ptr_annotation:
4529 // Upgrade from versions that lacked the annotation attribute argument.
4530 if (CI->arg_size() != 4) {
4531 DefaultCase();
4532 return;
4533 }
4534
4535 // Create a new call with an added null annotation attribute argument.
4536 NewCall =
4537 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4538 CI->getArgOperand(2), CI->getArgOperand(3),
4539 Constant::getNullValue(Builder.getPtrTy())});
4540 NewCall->takeName(CI);
4541 CI->replaceAllUsesWith(NewCall);
4542 CI->eraseFromParent();
4543 return;
4544
4545 case Intrinsic::var_annotation:
4546 // Upgrade from versions that lacked the annotation attribute argument.
4547 if (CI->arg_size() != 4) {
4548 DefaultCase();
4549 return;
4550 }
4551 // Create a new call with an added null annotation attribute argument.
4552 NewCall =
4553 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4554 CI->getArgOperand(2), CI->getArgOperand(3),
4555 Constant::getNullValue(Builder.getPtrTy())});
4556 NewCall->takeName(CI);
4557 CI->replaceAllUsesWith(NewCall);
4558 CI->eraseFromParent();
4559 return;
4560
4561 case Intrinsic::riscv_aes32dsi:
4562 case Intrinsic::riscv_aes32dsmi:
4563 case Intrinsic::riscv_aes32esi:
4564 case Intrinsic::riscv_aes32esmi:
4565 case Intrinsic::riscv_sm4ks:
4566 case Intrinsic::riscv_sm4ed: {
4567 // The last argument to these intrinsics used to be i8 and changed to i32.
4568 // The type overload for sm4ks and sm4ed was removed.
4569 Value *Arg2 = CI->getArgOperand(2);
4570 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4571 return;
4572
4573 Value *Arg0 = CI->getArgOperand(0);
4574 Value *Arg1 = CI->getArgOperand(1);
4575 if (CI->getType()->isIntegerTy(64)) {
4576 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4577 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4578 }
4579
4580 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4581 cast<ConstantInt>(Arg2)->getZExtValue());
4582
4583 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4584 Value *Res = NewCall;
4585 if (Res->getType() != CI->getType())
4586 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4587 NewCall->takeName(CI);
4588 CI->replaceAllUsesWith(Res);
4589 CI->eraseFromParent();
4590 return;
4591 }
4592 case Intrinsic::riscv_sha256sig0:
4593 case Intrinsic::riscv_sha256sig1:
4594 case Intrinsic::riscv_sha256sum0:
4595 case Intrinsic::riscv_sha256sum1:
4596 case Intrinsic::riscv_sm3p0:
4597 case Intrinsic::riscv_sm3p1: {
4598 // The last argument to these intrinsics used to be i8 and changed to i32.
4599 // The type overload for sm4ks and sm4ed was removed.
4600 if (!CI->getType()->isIntegerTy(64))
4601 return;
4602
4603 Value *Arg =
4604 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4605
4606 NewCall = Builder.CreateCall(NewFn, Arg);
4607 Value *Res =
4608 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4609 NewCall->takeName(CI);
4610 CI->replaceAllUsesWith(Res);
4611 CI->eraseFromParent();
4612 return;
4613 }
4614
4615 case Intrinsic::x86_xop_vfrcz_ss:
4616 case Intrinsic::x86_xop_vfrcz_sd:
4617 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4618 break;
4619
4620 case Intrinsic::x86_xop_vpermil2pd:
4621 case Intrinsic::x86_xop_vpermil2ps:
4622 case Intrinsic::x86_xop_vpermil2pd_256:
4623 case Intrinsic::x86_xop_vpermil2ps_256: {
4624 SmallVector<Value *, 4> Args(CI->args());
4625 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4626 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4627 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4628 NewCall = Builder.CreateCall(NewFn, Args);
4629 break;
4630 }
4631
4632 case Intrinsic::x86_sse41_ptestc:
4633 case Intrinsic::x86_sse41_ptestz:
4634 case Intrinsic::x86_sse41_ptestnzc: {
4635 // The arguments for these intrinsics used to be v4f32, and changed
4636 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4637 // So, the only thing required is a bitcast for both arguments.
4638 // First, check the arguments have the old type.
4639 Value *Arg0 = CI->getArgOperand(0);
4640 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4641 return;
4642
4643 // Old intrinsic, add bitcasts
4644 Value *Arg1 = CI->getArgOperand(1);
4645
4646 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4647
4648 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4649 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4650
4651 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4652 break;
4653 }
4654
4655 case Intrinsic::x86_rdtscp: {
4656 // This used to take 1 arguments. If we have no arguments, it is already
4657 // upgraded.
4658 if (CI->getNumOperands() == 0)
4659 return;
4660
4661 NewCall = Builder.CreateCall(NewFn);
4662 // Extract the second result and store it.
4663 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4664 // Cast the pointer to the right type.
4665 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4666 llvm::PointerType::getUnqual(Data->getType()));
4667 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4668 // Replace the original call result with the first result of the new call.
4669 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4670
4671 NewCall->takeName(CI);
4672 CI->replaceAllUsesWith(TSC);
4673 CI->eraseFromParent();
4674 return;
4675 }
4676
4677 case Intrinsic::x86_sse41_insertps:
4678 case Intrinsic::x86_sse41_dppd:
4679 case Intrinsic::x86_sse41_dpps:
4680 case Intrinsic::x86_sse41_mpsadbw:
4681 case Intrinsic::x86_avx_dp_ps_256:
4682 case Intrinsic::x86_avx2_mpsadbw: {
4683 // Need to truncate the last argument from i32 to i8 -- this argument models
4684 // an inherently 8-bit immediate operand to these x86 instructions.
4685 SmallVector<Value *, 4> Args(CI->args());
4686
4687 // Replace the last argument with a trunc.
4688 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4689 NewCall = Builder.CreateCall(NewFn, Args);
4690 break;
4691 }
4692
4693 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4694 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4695 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4696 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4697 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4698 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4699 SmallVector<Value *, 4> Args(CI->args());
4700 unsigned NumElts =
4701 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4702 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4703
4704 NewCall = Builder.CreateCall(NewFn, Args);
4705 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4706
4707 NewCall->takeName(CI);
4708 CI->replaceAllUsesWith(Res);
4709 CI->eraseFromParent();
4710 return;
4711 }
4712
4713 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4714 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4715 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4716 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4717 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4718 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4719 SmallVector<Value *, 4> Args(CI->args());
4720 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4721 if (NewFn->getIntrinsicID() ==
4722 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4723 Args[1] = Builder.CreateBitCast(
4724 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4725
4726 NewCall = Builder.CreateCall(NewFn, Args);
4727 Value *Res = Builder.CreateBitCast(
4728 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4729
4730 NewCall->takeName(CI);
4731 CI->replaceAllUsesWith(Res);
4732 CI->eraseFromParent();
4733 return;
4734 }
4735 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4736 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4737 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4738 SmallVector<Value *, 4> Args(CI->args());
4739 unsigned NumElts =
4740 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4741 Args[1] = Builder.CreateBitCast(
4742 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4743 Args[2] = Builder.CreateBitCast(
4744 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4745
4746 NewCall = Builder.CreateCall(NewFn, Args);
4747 break;
4748 }
4749
4750 case Intrinsic::thread_pointer: {
4751 NewCall = Builder.CreateCall(NewFn, {});
4752 break;
4753 }
4754
4755 case Intrinsic::memcpy:
4756 case Intrinsic::memmove:
4757 case Intrinsic::memset: {
4758 // We have to make sure that the call signature is what we're expecting.
4759 // We only want to change the old signatures by removing the alignment arg:
4760 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4761 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4762 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4763 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4764 // Note: i8*'s in the above can be any pointer type
4765 if (CI->arg_size() != 5) {
4766 DefaultCase();
4767 return;
4768 }
4769 // Remove alignment argument (3), and add alignment attributes to the
4770 // dest/src pointers.
4771 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4772 CI->getArgOperand(2), CI->getArgOperand(4)};
4773 NewCall = Builder.CreateCall(NewFn, Args);
4774 AttributeList OldAttrs = CI->getAttributes();
4776 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4777 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4778 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4779 NewCall->setAttributes(NewAttrs);
4780 auto *MemCI = cast<MemIntrinsic>(NewCall);
4781 // All mem intrinsics support dest alignment.
4782 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4783 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4784 // Memcpy/Memmove also support source alignment.
4785 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4786 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4787 break;
4788 }
4789 }
4790 assert(NewCall && "Should have either set this variable or returned through "
4791 "the default case");
4792 NewCall->takeName(CI);
4793 CI->replaceAllUsesWith(NewCall);
4794 CI->eraseFromParent();
4795}
4796
4798 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4799
4800 // Check if this function should be upgraded and get the replacement function
4801 // if there is one.
4802 Function *NewFn;
4803 if (UpgradeIntrinsicFunction(F, NewFn)) {
4804 // Replace all users of the old function with the new function or new
4805 // instructions. This is not a range loop because the call is deleted.
4806 for (User *U : make_early_inc_range(F->users()))
4807 if (CallBase *CB = dyn_cast<CallBase>(U))
4808 UpgradeIntrinsicCall(CB, NewFn);
4809
4810 // Remove old function, no longer used, from the module.
4811 F->eraseFromParent();
4812 }
4813}
4814
4816 const unsigned NumOperands = MD.getNumOperands();
4817 if (NumOperands == 0)
4818 return &MD; // Invalid, punt to a verifier error.
4819
4820 // Check if the tag uses struct-path aware TBAA format.
4821 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4822 return &MD;
4823
4824 auto &Context = MD.getContext();
4825 if (NumOperands == 3) {
4826 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4827 MDNode *ScalarType = MDNode::get(Context, Elts);
4828 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4829 Metadata *Elts2[] = {ScalarType, ScalarType,
4832 MD.getOperand(2)};
4833 return MDNode::get(Context, Elts2);
4834 }
4835 // Create a MDNode <MD, MD, offset 0>
4837 Type::getInt64Ty(Context)))};
4838 return MDNode::get(Context, Elts);
4839}
4840
4842 Instruction *&Temp) {
4843 if (Opc != Instruction::BitCast)
4844 return nullptr;
4845
4846 Temp = nullptr;
4847 Type *SrcTy = V->getType();
4848 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4849 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4850 LLVMContext &Context = V->getContext();
4851
4852 // We have no information about target data layout, so we assume that
4853 // the maximum pointer size is 64bit.
4854 Type *MidTy = Type::getInt64Ty(Context);
4855 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4856
4857 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4858 }
4859
4860 return nullptr;
4861}
4862
4864 if (Opc != Instruction::BitCast)
4865 return nullptr;
4866
4867 Type *SrcTy = C->getType();
4868 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4869 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4870 LLVMContext &Context = C->getContext();
4871
4872 // We have no information about target data layout, so we assume that
4873 // the maximum pointer size is 64bit.
4874 Type *MidTy = Type::getInt64Ty(Context);
4875
4877 DestTy);
4878 }
4879
4880 return nullptr;
4881}
4882
4883/// Check the debug info version number, if it is out-dated, drop the debug
4884/// info. Return true if module is modified.
4887 return false;
4888
4889 // We need to get metadata before the module is verified (i.e., getModuleFlag
4890 // makes assumptions that we haven't verified yet). Carefully extract the flag
4891 // from the metadata.
4892 unsigned Version = 0;
4893 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
4894 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
4895 if (Flag->getNumOperands() < 3)
4896 return false;
4897 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
4898 return K->getString() == "Debug Info Version";
4899 return false;
4900 });
4901 if (OpIt != ModFlags->op_end()) {
4902 const MDOperand &ValOp = (*OpIt)->getOperand(2);
4903 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
4904 Version = CI->getZExtValue();
4905 }
4906 }
4907
4908 if (Version == DEBUG_METADATA_VERSION) {
4909 bool BrokenDebugInfo = false;
4910 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4911 report_fatal_error("Broken module found, compilation aborted!");
4912 if (!BrokenDebugInfo)
4913 // Everything is ok.
4914 return false;
4915 else {
4916 // Diagnose malformed debug info.
4918 M.getContext().diagnose(Diag);
4919 }
4920 }
4921 bool Modified = StripDebugInfo(M);
4922 if (Modified && Version != DEBUG_METADATA_VERSION) {
4923 // Diagnose a version mismatch.
4924 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4925 M.getContext().diagnose(DiagVersion);
4926 }
4927 return Modified;
4928}
4929
4930/// This checks for objc retain release marker which should be upgraded. It
4931/// returns true if module is modified.
4933 bool Changed = false;
4934 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4935 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4936 if (ModRetainReleaseMarker) {
4937 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4938 if (Op) {
4939 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4940 if (ID) {
4941 SmallVector<StringRef, 4> ValueComp;
4942 ID->getString().split(ValueComp, "#");
4943 if (ValueComp.size() == 2) {
4944 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4945 ID = MDString::get(M.getContext(), NewValue);
4946 }
4947 M.addModuleFlag(Module::Error, MarkerKey, ID);
4948 M.eraseNamedMetadata(ModRetainReleaseMarker);
4949 Changed = true;
4950 }
4951 }
4952 }
4953 return Changed;
4954}
4955
4957 // This lambda converts normal function calls to ARC runtime functions to
4958 // intrinsic calls.
4959 auto UpgradeToIntrinsic = [&](const char *OldFunc,
4960 llvm::Intrinsic::ID IntrinsicFunc) {
4961 Function *Fn = M.getFunction(OldFunc);
4962
4963 if (!Fn)
4964 return;
4965
4966 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4967
4968 for (User *U : make_early_inc_range(Fn->users())) {
4969 CallInst *CI = dyn_cast<CallInst>(U);
4970 if (!CI || CI->getCalledFunction() != Fn)
4971 continue;
4972
4973 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4974 FunctionType *NewFuncTy = NewFn->getFunctionType();
4976
4977 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4978 // value to the return type of the old function.
4979 if (NewFuncTy->getReturnType() != CI->getType() &&
4980 !CastInst::castIsValid(Instruction::BitCast, CI,
4981 NewFuncTy->getReturnType()))
4982 continue;
4983
4984 bool InvalidCast = false;
4985
4986 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4987 Value *Arg = CI->getArgOperand(I);
4988
4989 // Bitcast argument to the parameter type of the new function if it's
4990 // not a variadic argument.
4991 if (I < NewFuncTy->getNumParams()) {
4992 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4993 // to the parameter type of the new function.
4994 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4995 NewFuncTy->getParamType(I))) {
4996 InvalidCast = true;
4997 break;
4998 }
4999 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5000 }
5001 Args.push_back(Arg);
5002 }
5003
5004 if (InvalidCast)
5005 continue;
5006
5007 // Create a call instruction that calls the new function.
5008 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5009 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5010 NewCall->takeName(CI);
5011
5012 // Bitcast the return value back to the type of the old call.
5013 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5014
5015 if (!CI->use_empty())
5016 CI->replaceAllUsesWith(NewRetVal);
5017 CI->eraseFromParent();
5018 }
5019
5020 if (Fn->use_empty())
5021 Fn->eraseFromParent();
5022 };
5023
5024 // Unconditionally convert a call to "clang.arc.use" to a call to
5025 // "llvm.objc.clang.arc.use".
5026 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5027
5028 // Upgrade the retain release marker. If there is no need to upgrade
5029 // the marker, that means either the module is already new enough to contain
5030 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5032 return;
5033
5034 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5035 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5036 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5037 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5038 {"objc_autoreleaseReturnValue",
5039 llvm::Intrinsic::objc_autoreleaseReturnValue},
5040 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5041 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5042 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5043 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5044 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5045 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5046 {"objc_release", llvm::Intrinsic::objc_release},
5047 {"objc_retain", llvm::Intrinsic::objc_retain},
5048 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5049 {"objc_retainAutoreleaseReturnValue",
5050 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5051 {"objc_retainAutoreleasedReturnValue",
5052 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5053 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5054 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5055 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5056 {"objc_unsafeClaimAutoreleasedReturnValue",
5057 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5058 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5059 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5060 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5061 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5062 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5063 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5064 {"objc_arc_annotation_topdown_bbstart",
5065 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5066 {"objc_arc_annotation_topdown_bbend",
5067 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5068 {"objc_arc_annotation_bottomup_bbstart",
5069 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5070 {"objc_arc_annotation_bottomup_bbend",
5071 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5072
5073 for (auto &I : RuntimeFuncs)
5074 UpgradeToIntrinsic(I.first, I.second);
5075}
5076
5078 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5079 if (!ModFlags)
5080 return false;
5081
5082 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5083 bool HasSwiftVersionFlag = false;
5084 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5085 uint32_t SwiftABIVersion;
5086 auto Int8Ty = Type::getInt8Ty(M.getContext());
5087 auto Int32Ty = Type::getInt32Ty(M.getContext());
5088
5089 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5090 MDNode *Op = ModFlags->getOperand(I);
5091 if (Op->getNumOperands() != 3)
5092 continue;
5093 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5094 if (!ID)
5095 continue;
5096 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5097 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5098 Type::getInt32Ty(M.getContext()), B)),
5099 MDString::get(M.getContext(), ID->getString()),
5100 Op->getOperand(2)};
5101 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5102 Changed = true;
5103 };
5104
5105 if (ID->getString() == "Objective-C Image Info Version")
5106 HasObjCFlag = true;
5107 if (ID->getString() == "Objective-C Class Properties")
5108 HasClassProperties = true;
5109 // Upgrade PIC from Error/Max to Min.
5110 if (ID->getString() == "PIC Level") {
5111 if (auto *Behavior =
5112 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5113 uint64_t V = Behavior->getLimitedValue();
5114 if (V == Module::Error || V == Module::Max)
5115 SetBehavior(Module::Min);
5116 }
5117 }
5118 // Upgrade "PIE Level" from Error to Max.
5119 if (ID->getString() == "PIE Level")
5120 if (auto *Behavior =
5121 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5122 if (Behavior->getLimitedValue() == Module::Error)
5123 SetBehavior(Module::Max);
5124
5125 // Upgrade branch protection and return address signing module flags. The
5126 // module flag behavior for these fields were Error and now they are Min.
5127 if (ID->getString() == "branch-target-enforcement" ||
5128 ID->getString().starts_with("sign-return-address")) {
5129 if (auto *Behavior =
5130 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5131 if (Behavior->getLimitedValue() == Module::Error) {
5132 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5133 Metadata *Ops[3] = {
5134 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5135 Op->getOperand(1), Op->getOperand(2)};
5136 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5137 Changed = true;
5138 }
5139 }
5140 }
5141
5142 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5143 // section name so that llvm-lto will not complain about mismatching
5144 // module flags that is functionally the same.
5145 if (ID->getString() == "Objective-C Image Info Section") {
5146 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5147 SmallVector<StringRef, 4> ValueComp;
5148 Value->getString().split(ValueComp, " ");
5149 if (ValueComp.size() != 1) {
5150 std::string NewValue;
5151 for (auto &S : ValueComp)
5152 NewValue += S.str();
5153 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5154 MDString::get(M.getContext(), NewValue)};
5155 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5156 Changed = true;
5157 }
5158 }
5159 }
5160
5161 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5162 // If the higher bits are set, it adds new module flag for swift info.
5163 if (ID->getString() == "Objective-C Garbage Collection") {
5164 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5165 if (Md) {
5166 assert(Md->getValue() && "Expected non-empty metadata");
5167 auto Type = Md->getValue()->getType();
5168 if (Type == Int8Ty)
5169 continue;
5170 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5171 if ((Val & 0xff) != Val) {
5172 HasSwiftVersionFlag = true;
5173 SwiftABIVersion = (Val & 0xff00) >> 8;
5174 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5175 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5176 }
5177 Metadata *Ops[3] = {
5178 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5179 Op->getOperand(1),
5180 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5181 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5182 Changed = true;
5183 }
5184 }
5185
5186 if (ID->getString() == "amdgpu_code_object_version") {
5187 Metadata *Ops[3] = {
5188 Op->getOperand(0),
5189 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5190 Op->getOperand(2)};
5191 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5192 Changed = true;
5193 }
5194 }
5195
5196 // "Objective-C Class Properties" is recently added for Objective-C. We
5197 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5198 // flag of value 0, so we can correclty downgrade this flag when trying to
5199 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5200 // this module flag.
5201 if (HasObjCFlag && !HasClassProperties) {
5202 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5203 (uint32_t)0);
5204 Changed = true;
5205 }
5206
5207 if (HasSwiftVersionFlag) {
5208 M.addModuleFlag(Module::Error, "Swift ABI Version",
5209 SwiftABIVersion);
5210 M.addModuleFlag(Module::Error, "Swift Major Version",
5211 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5212 M.addModuleFlag(Module::Error, "Swift Minor Version",
5213 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5214 Changed = true;
5215 }
5216
5217 return Changed;
5218}
5219
5221 auto TrimSpaces = [](StringRef Section) -> std::string {
5222 SmallVector<StringRef, 5> Components;
5223 Section.split(Components, ',');
5224
5225 SmallString<32> Buffer;
5226 raw_svector_ostream OS(Buffer);
5227
5228 for (auto Component : Components)
5229 OS << ',' << Component.trim();
5230
5231 return std::string(OS.str().substr(1));
5232 };
5233
5234 for (auto &GV : M.globals()) {
5235 if (!GV.hasSection())
5236 continue;
5237
5238 StringRef Section = GV.getSection();
5239
5240 if (!Section.starts_with("__DATA, __objc_catlist"))
5241 continue;
5242
5243 // __DATA, __objc_catlist, regular, no_dead_strip
5244 // __DATA,__objc_catlist,regular,no_dead_strip
5245 GV.setSection(TrimSpaces(Section));
5246 }
5247}
5248
5249namespace {
5250// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5251// callsites within a function that did not also have the strictfp attribute.
5252// Since 10.0, if strict FP semantics are needed within a function, the
5253// function must have the strictfp attribute and all calls within the function
5254// must also have the strictfp attribute. This latter restriction is
5255// necessary to prevent unwanted libcall simplification when a function is
5256// being cloned (such as for inlining).
5257//
5258// The "dangling" strictfp attribute usage was only used to prevent constant
5259// folding and other libcall simplification. The nobuiltin attribute on the
5260// callsite has the same effect.
5261struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5262 StrictFPUpgradeVisitor() = default;
5263
5264 void visitCallBase(CallBase &Call) {
5265 if (!Call.isStrictFP())
5266 return;
5267 if (isa<ConstrainedFPIntrinsic>(&Call))
5268 return;
5269 // If we get here, the caller doesn't have the strictfp attribute
5270 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5271 Call.removeFnAttr(Attribute::StrictFP);
5272 Call.addFnAttr(Attribute::NoBuiltin);
5273 }
5274};
5275
5276/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5277struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5278 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5279 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5280
5282 if (!RMW.isFloatingPointOperation())
5283 return;
5284
5285 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5286 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5287 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5288 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5289 }
5290};
5291} // namespace
5292
5294 // If a function definition doesn't have the strictfp attribute,
5295 // convert any callsite strictfp attributes to nobuiltin.
5296 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5297 StrictFPUpgradeVisitor SFPV;
5298 SFPV.visit(F);
5299 }
5300
5301 // Remove all incompatibile attributes from function.
5302 F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5303 for (auto &Arg : F.args())
5304 Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5305
5306 // Older versions of LLVM treated an "implicit-section-name" attribute
5307 // similarly to directly setting the section on a Function.
5308 if (Attribute A = F.getFnAttribute("implicit-section-name");
5309 A.isValid() && A.isStringAttribute()) {
5310 F.setSection(A.getValueAsString());
5311 F.removeFnAttr("implicit-section-name");
5312 }
5313
5314 if (!F.empty()) {
5315 // For some reason this is called twice, and the first time is before any
5316 // instructions are loaded into the body.
5317
5318 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5319 A.isValid()) {
5320
5321 if (A.getValueAsBool()) {
5322 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5323 Visitor.visit(F);
5324 }
5325
5326 // We will leave behind dead attribute uses on external declarations, but
5327 // clang never added these to declarations anyway.
5328 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5329 }
5330 }
5331}
5332
5333static bool isOldLoopArgument(Metadata *MD) {
5334 auto *T = dyn_cast_or_null<MDTuple>(MD);
5335 if (!T)
5336 return false;
5337 if (T->getNumOperands() < 1)
5338 return false;
5339 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5340 if (!S)
5341 return false;
5342 return S->getString().starts_with("llvm.vectorizer.");
5343}
5344
5346 StringRef OldPrefix = "llvm.vectorizer.";
5347 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5348
5349 if (OldTag == "llvm.vectorizer.unroll")
5350 return MDString::get(C, "llvm.loop.interleave.count");
5351
5352 return MDString::get(
5353 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5354 .str());
5355}
5356
5358 auto *T = dyn_cast_or_null<MDTuple>(MD);
5359 if (!T)
5360 return MD;
5361 if (T->getNumOperands() < 1)
5362 return MD;
5363 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5364 if (!OldTag)
5365 return MD;
5366 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5367 return MD;
5368
5369 // This has an old tag. Upgrade it.
5371 Ops.reserve(T->getNumOperands());
5372 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5373 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5374 Ops.push_back(T->getOperand(I));
5375
5376 return MDTuple::get(T->getContext(), Ops);
5377}
5378
5380 auto *T = dyn_cast<MDTuple>(&N);
5381 if (!T)
5382 return &N;
5383
5384 if (none_of(T->operands(), isOldLoopArgument))
5385 return &N;
5386
5388 Ops.reserve(T->getNumOperands());
5389 for (Metadata *MD : T->operands())
5391
5392 return MDTuple::get(T->getContext(), Ops);
5393}
5394
5396 Triple T(TT);
5397 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5398 // the address space of globals to 1. This does not apply to SPIRV Logical.
5399 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5400 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5401 !DL.contains("-G") && !DL.starts_with("G")) {
5402 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5403 }
5404
5405 if (T.isLoongArch64() || T.isRISCV64()) {
5406 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5407 auto I = DL.find("-n64-");
5408 if (I != StringRef::npos)
5409 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5410 return DL.str();
5411 }
5412
5413 std::string Res = DL.str();
5414 // AMDGCN data layout upgrades.
5415 if (T.isAMDGCN()) {
5416 // Define address spaces for constants.
5417 if (!DL.contains("-G") && !DL.starts_with("G"))
5418 Res.append(Res.empty() ? "G1" : "-G1");
5419
5420 // Add missing non-integral declarations.
5421 // This goes before adding new address spaces to prevent incoherent string
5422 // values.
5423 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5424 Res.append("-ni:7:8:9");
5425 // Update ni:7 to ni:7:8:9.
5426 if (DL.ends_with("ni:7"))
5427 Res.append(":8:9");
5428 if (DL.ends_with("ni:7:8"))
5429 Res.append(":9");
5430
5431 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5432 // resources) An empty data layout has already been upgraded to G1 by now.
5433 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5434 Res.append("-p7:160:256:256:32");
5435 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5436 Res.append("-p8:128:128");
5437 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5438 Res.append("-p9:192:256:256:32");
5439
5440 return Res;
5441 }
5442
5443 // AArch64 data layout upgrades.
5444 if (T.isAArch64()) {
5445 // Add "-Fn32"
5446 if (!DL.empty() && !DL.contains("-Fn32"))
5447 Res.append("-Fn32");
5448 return Res;
5449 }
5450
5451 if (!T.isX86())
5452 return Res;
5453
5454 // If the datalayout matches the expected format, add pointer size address
5455 // spaces to the datalayout.
5456 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5457 if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5459 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5460 if (R.match(Res, &Groups))
5461 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5462 }
5463
5464 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5465 // for i128 operations prior to this being reflected in the data layout, and
5466 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5467 // boundaries, so although this is a breaking change, the upgrade is expected
5468 // to fix more IR than it breaks.
5469 // Intel MCU is an exception and uses 4-byte-alignment.
5470 if (!T.isOSIAMCU()) {
5471 std::string I128 = "-i128:128";
5472 if (StringRef Ref = Res; !Ref.contains(I128)) {
5474 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5475 if (R.match(Res, &Groups))
5476 Res = (Groups[1] + I128 + Groups[3]).str();
5477 }
5478 }
5479
5480 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5481 // Raising the alignment is safe because Clang did not produce f80 values in
5482 // the MSVC environment before this upgrade was added.
5483 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5484 StringRef Ref = Res;
5485 auto I = Ref.find("-f80:32-");
5486 if (I != StringRef::npos)
5487 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5488 }
5489
5490 return Res;
5491}
5492
5494 StringRef FramePointer;
5495 Attribute A = B.getAttribute("no-frame-pointer-elim");
5496 if (A.isValid()) {
5497 // The value can be "true" or "false".
5498 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5499 B.removeAttribute("no-frame-pointer-elim");
5500 }
5501 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5502 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5503 if (FramePointer != "all")
5504 FramePointer = "non-leaf";
5505 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5506 }
5507 if (!FramePointer.empty())
5508 B.addAttribute("frame-pointer", FramePointer);
5509
5510 A = B.getAttribute("null-pointer-is-valid");
5511 if (A.isValid()) {
5512 // The value can be "true" or "false".
5513 bool NullPointerIsValid = A.getValueAsString() == "true";
5514 B.removeAttribute("null-pointer-is-valid");
5515 if (NullPointerIsValid)
5516 B.addAttribute(Attribute::NullPointerIsValid);
5517 }
5518}
5519
5520void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5521 // clang.arc.attachedcall bundles are now required to have an operand.
5522 // If they don't, it's okay to drop them entirely: when there is an operand,
5523 // the "attachedcall" is meaningful and required, but without an operand,
5524 // it's just a marker NOP. Dropping it merely prevents an optimization.
5525 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5526 return OBD.getTag() == "clang.arc.attachedcall" &&
5527 OBD.inputs().empty();
5528 });
5529}
unsigned Intr
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:88
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:72
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:99
static MDType * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:52
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:56
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
uint64_t High
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
raw_pwrite_stream & OS
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:371
Type * getElementType() const
Definition: DerivedTypes.h:384
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:823
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ FAdd
*p = old + v
Definition: Instructions.h:733
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:744
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:740
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
bool isFloatingPointOperation() const
Definition: Instructions.h:864
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1546
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1323
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1401
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1501
unsigned arg_size() const
Definition: InstrTypes.h:1408
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1504
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1650
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2281
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2227
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2267
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1357
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1500
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
DWARF expression.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
Records a position in IR for a source label (DILabel).
Base class for non-instruction debug metadata records that have positions within IR.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Type * getReturnType() const
Definition: DerivedTypes.h:124
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:172
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:249
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:469
size_t arg_size() const
Definition: Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
Argument * getArg(unsigned i) const
Definition: Function.h:884
LinkageTypes getLinkage() const
Definition: GlobalValue.h:546
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:458
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1577
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2492
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:508
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2543
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1631
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1062
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2114
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2480
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1824
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1550
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2190
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1193
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2536
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:579
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2289
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2053
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:473
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2101
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:518
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1738
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2297
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1766
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2261
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2147
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1807
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1433
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2041
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2514
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1492
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:599
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:468
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2569
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1871
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2216
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1843
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1473
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2130
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1536
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2281
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2371
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1604
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1747
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:546
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1378
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
Definition: Instructions.h:174
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
LLVMContext & getContext() const
Definition: Metadata.h:1233
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
A single uniqued string.
Definition: Metadata.h:720
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1499
Metadata wrapper in the Value hierarchy.
Definition: Metadata.h:176
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:115
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:118
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:150
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:147
bool IsNewDbgInfoFormat
Is this Module using intrinsics to record the position of debugging information, or non-intrinsic rec...
Definition: Module.h:217
A tuple of MDNodes.
Definition: Metadata.h:1730
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1396
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1388
unsigned getNumOperands() const
Definition: Metadata.cpp:1384
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1189
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1204
StringRef getTag() const
Definition: InstrTypes.h:1212
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
ArrayRef< int > getShuffleMask() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:594
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
static constexpr size_t npos
Definition: StringRef.h:52
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:83
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:361
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:342
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getBFloatTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:258
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:212
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Function.cpp:1382
std::optional< Function * > remangleIntrinsicFunction(Function *F)
Definition: Function.cpp:1863
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1096
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:591
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
@ Dynamic
Denotes mode unknown at compile time.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:52
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7162
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117