LLVM 19.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Verifier.h"
42#include "llvm/Support/Regex.h"
44#include <cstring>
45
46using namespace llvm;
47
48static cl::opt<bool>
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
51
52static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55// changed their type from v4f32 to v2i64.
57 Function *&NewFn) {
58 // Check whether this is an old version of the function, which received
59 // v4f32 arguments.
60 Type *Arg0Type = F->getFunctionType()->getParamType(0);
61 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62 return false;
63
64 // Yes, it's old, replace it with new version.
65 rename(F);
66 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 return true;
68}
69
70// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71// arguments have changed their type from i32 to i8.
73 Function *&NewFn) {
74 // Check that the last argument is an i32.
75 Type *LastArgType = F->getFunctionType()->getParamType(
76 F->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType->isIntegerTy(32))
78 return false;
79
80 // Move this function aside and map down.
81 rename(F);
82 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83 return true;
84}
85
86// Upgrade the declaration of fp compare intrinsics that change return type
87// from scalar to vXi1 mask.
89 Function *&NewFn) {
90 // Check if the return type is a vector.
91 if (F->getReturnType()->isVectorTy())
92 return false;
93
94 rename(F);
95 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96 return true;
97}
98
100 Function *&NewFn) {
101 if (F->getReturnType()->getScalarType()->isBFloatTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106 return true;
107}
108
110 Function *&NewFn) {
111 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116 return true;
117}
118
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
124 // point.
125 if (Name.consume_front("avx."))
126 return (Name.starts_with("blend.p") || // Added in 3.7
127 Name == "cvt.ps2.pd.256" || // Added in 3.9
128 Name == "cvtdq2.pd.256" || // Added in 3.9
129 Name == "cvtdq2.ps.256" || // Added in 7.0
130 Name.starts_with("movnt.") || // Added in 3.2
131 Name.starts_with("sqrt.p") || // Added in 7.0
132 Name.starts_with("storeu.") || // Added in 3.9
133 Name.starts_with("vbroadcast.s") || // Added in 3.5
134 Name.starts_with("vbroadcastf128") || // Added in 4.0
135 Name.starts_with("vextractf128.") || // Added in 3.7
136 Name.starts_with("vinsertf128.") || // Added in 3.7
137 Name.starts_with("vperm2f128.") || // Added in 6.0
138 Name.starts_with("vpermil.")); // Added in 3.1
139
140 if (Name.consume_front("avx2."))
141 return (Name == "movntdqa" || // Added in 5.0
142 Name.starts_with("pabs.") || // Added in 6.0
143 Name.starts_with("padds.") || // Added in 8.0
144 Name.starts_with("paddus.") || // Added in 8.0
145 Name.starts_with("pblendd.") || // Added in 3.7
146 Name == "pblendw" || // Added in 3.7
147 Name.starts_with("pbroadcast") || // Added in 3.8
148 Name.starts_with("pcmpeq.") || // Added in 3.1
149 Name.starts_with("pcmpgt.") || // Added in 3.1
150 Name.starts_with("pmax") || // Added in 3.9
151 Name.starts_with("pmin") || // Added in 3.9
152 Name.starts_with("pmovsx") || // Added in 3.9
153 Name.starts_with("pmovzx") || // Added in 3.9
154 Name == "pmul.dq" || // Added in 7.0
155 Name == "pmulu.dq" || // Added in 7.0
156 Name.starts_with("psll.dq") || // Added in 3.7
157 Name.starts_with("psrl.dq") || // Added in 3.7
158 Name.starts_with("psubs.") || // Added in 8.0
159 Name.starts_with("psubus.") || // Added in 8.0
160 Name.starts_with("vbroadcast") || // Added in 3.8
161 Name == "vbroadcasti128" || // Added in 3.7
162 Name == "vextracti128" || // Added in 3.7
163 Name == "vinserti128" || // Added in 3.7
164 Name == "vperm2i128"); // Added in 6.0
165
166 if (Name.consume_front("avx512.")) {
167 if (Name.consume_front("mask."))
168 // 'avx512.mask.*'
169 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
170 Name.starts_with("and.") || // Added in 3.9
171 Name.starts_with("andn.") || // Added in 3.9
172 Name.starts_with("broadcast.s") || // Added in 3.9
173 Name.starts_with("broadcastf32x4.") || // Added in 6.0
174 Name.starts_with("broadcastf32x8.") || // Added in 6.0
175 Name.starts_with("broadcastf64x2.") || // Added in 6.0
176 Name.starts_with("broadcastf64x4.") || // Added in 6.0
177 Name.starts_with("broadcasti32x4.") || // Added in 6.0
178 Name.starts_with("broadcasti32x8.") || // Added in 6.0
179 Name.starts_with("broadcasti64x2.") || // Added in 6.0
180 Name.starts_with("broadcasti64x4.") || // Added in 6.0
181 Name.starts_with("cmp.b") || // Added in 5.0
182 Name.starts_with("cmp.d") || // Added in 5.0
183 Name.starts_with("cmp.q") || // Added in 5.0
184 Name.starts_with("cmp.w") || // Added in 5.0
185 Name.starts_with("compress.b") || // Added in 9.0
186 Name.starts_with("compress.d") || // Added in 9.0
187 Name.starts_with("compress.p") || // Added in 9.0
188 Name.starts_with("compress.q") || // Added in 9.0
189 Name.starts_with("compress.store.") || // Added in 7.0
190 Name.starts_with("compress.w") || // Added in 9.0
191 Name.starts_with("conflict.") || // Added in 9.0
192 Name.starts_with("cvtdq2pd.") || // Added in 4.0
193 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
194 Name == "cvtpd2dq.256" || // Added in 7.0
195 Name == "cvtpd2ps.256" || // Added in 7.0
196 Name == "cvtps2pd.128" || // Added in 7.0
197 Name == "cvtps2pd.256" || // Added in 7.0
198 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
199 Name == "cvtqq2ps.256" || // Added in 9.0
200 Name == "cvtqq2ps.512" || // Added in 9.0
201 Name == "cvttpd2dq.256" || // Added in 7.0
202 Name == "cvttps2dq.128" || // Added in 7.0
203 Name == "cvttps2dq.256" || // Added in 7.0
204 Name.starts_with("cvtudq2pd.") || // Added in 4.0
205 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
206 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name == "cvtuqq2ps.256" || // Added in 9.0
208 Name == "cvtuqq2ps.512" || // Added in 9.0
209 Name.starts_with("dbpsadbw.") || // Added in 7.0
210 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
211 Name.starts_with("expand.b") || // Added in 9.0
212 Name.starts_with("expand.d") || // Added in 9.0
213 Name.starts_with("expand.load.") || // Added in 7.0
214 Name.starts_with("expand.p") || // Added in 9.0
215 Name.starts_with("expand.q") || // Added in 9.0
216 Name.starts_with("expand.w") || // Added in 9.0
217 Name.starts_with("fpclass.p") || // Added in 7.0
218 Name.starts_with("insert") || // Added in 4.0
219 Name.starts_with("load.") || // Added in 3.9
220 Name.starts_with("loadu.") || // Added in 3.9
221 Name.starts_with("lzcnt.") || // Added in 5.0
222 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
223 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
224 Name.starts_with("movddup") || // Added in 3.9
225 Name.starts_with("move.s") || // Added in 4.0
226 Name.starts_with("movshdup") || // Added in 3.9
227 Name.starts_with("movsldup") || // Added in 3.9
228 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("or.") || // Added in 3.9
230 Name.starts_with("pabs.") || // Added in 6.0
231 Name.starts_with("packssdw.") || // Added in 5.0
232 Name.starts_with("packsswb.") || // Added in 5.0
233 Name.starts_with("packusdw.") || // Added in 5.0
234 Name.starts_with("packuswb.") || // Added in 5.0
235 Name.starts_with("padd.") || // Added in 4.0
236 Name.starts_with("padds.") || // Added in 8.0
237 Name.starts_with("paddus.") || // Added in 8.0
238 Name.starts_with("palignr.") || // Added in 3.9
239 Name.starts_with("pand.") || // Added in 3.9
240 Name.starts_with("pandn.") || // Added in 3.9
241 Name.starts_with("pavg") || // Added in 6.0
242 Name.starts_with("pbroadcast") || // Added in 6.0
243 Name.starts_with("pcmpeq.") || // Added in 3.9
244 Name.starts_with("pcmpgt.") || // Added in 3.9
245 Name.starts_with("perm.df.") || // Added in 3.9
246 Name.starts_with("perm.di.") || // Added in 3.9
247 Name.starts_with("permvar.") || // Added in 7.0
248 Name.starts_with("pmaddubs.w.") || // Added in 7.0
249 Name.starts_with("pmaddw.d.") || // Added in 7.0
250 Name.starts_with("pmax") || // Added in 4.0
251 Name.starts_with("pmin") || // Added in 4.0
252 Name == "pmov.qd.256" || // Added in 9.0
253 Name == "pmov.qd.512" || // Added in 9.0
254 Name == "pmov.wb.256" || // Added in 9.0
255 Name == "pmov.wb.512" || // Added in 9.0
256 Name.starts_with("pmovsx") || // Added in 4.0
257 Name.starts_with("pmovzx") || // Added in 4.0
258 Name.starts_with("pmul.dq.") || // Added in 4.0
259 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260 Name.starts_with("pmulh.w.") || // Added in 7.0
261 Name.starts_with("pmulhu.w.") || // Added in 7.0
262 Name.starts_with("pmull.") || // Added in 4.0
263 Name.starts_with("pmultishift.qb.") || // Added in 8.0
264 Name.starts_with("pmulu.dq.") || // Added in 4.0
265 Name.starts_with("por.") || // Added in 3.9
266 Name.starts_with("prol.") || // Added in 8.0
267 Name.starts_with("prolv.") || // Added in 8.0
268 Name.starts_with("pror.") || // Added in 8.0
269 Name.starts_with("prorv.") || // Added in 8.0
270 Name.starts_with("pshuf.b.") || // Added in 4.0
271 Name.starts_with("pshuf.d.") || // Added in 3.9
272 Name.starts_with("pshufh.w.") || // Added in 3.9
273 Name.starts_with("pshufl.w.") || // Added in 3.9
274 Name.starts_with("psll.d") || // Added in 4.0
275 Name.starts_with("psll.q") || // Added in 4.0
276 Name.starts_with("psll.w") || // Added in 4.0
277 Name.starts_with("pslli") || // Added in 4.0
278 Name.starts_with("psllv") || // Added in 4.0
279 Name.starts_with("psra.d") || // Added in 4.0
280 Name.starts_with("psra.q") || // Added in 4.0
281 Name.starts_with("psra.w") || // Added in 4.0
282 Name.starts_with("psrai") || // Added in 4.0
283 Name.starts_with("psrav") || // Added in 4.0
284 Name.starts_with("psrl.d") || // Added in 4.0
285 Name.starts_with("psrl.q") || // Added in 4.0
286 Name.starts_with("psrl.w") || // Added in 4.0
287 Name.starts_with("psrli") || // Added in 4.0
288 Name.starts_with("psrlv") || // Added in 4.0
289 Name.starts_with("psub.") || // Added in 4.0
290 Name.starts_with("psubs.") || // Added in 8.0
291 Name.starts_with("psubus.") || // Added in 8.0
292 Name.starts_with("pternlog.") || // Added in 7.0
293 Name.starts_with("punpckh") || // Added in 3.9
294 Name.starts_with("punpckl") || // Added in 3.9
295 Name.starts_with("pxor.") || // Added in 3.9
296 Name.starts_with("shuf.f") || // Added in 6.0
297 Name.starts_with("shuf.i") || // Added in 6.0
298 Name.starts_with("shuf.p") || // Added in 4.0
299 Name.starts_with("sqrt.p") || // Added in 7.0
300 Name.starts_with("store.b.") || // Added in 3.9
301 Name.starts_with("store.d.") || // Added in 3.9
302 Name.starts_with("store.p") || // Added in 3.9
303 Name.starts_with("store.q.") || // Added in 3.9
304 Name.starts_with("store.w.") || // Added in 3.9
305 Name == "store.ss" || // Added in 7.0
306 Name.starts_with("storeu.") || // Added in 3.9
307 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
308 Name.starts_with("ucmp.") || // Added in 5.0
309 Name.starts_with("unpckh.") || // Added in 3.9
310 Name.starts_with("unpckl.") || // Added in 3.9
311 Name.starts_with("valign.") || // Added in 4.0
312 Name == "vcvtph2ps.128" || // Added in 11.0
313 Name == "vcvtph2ps.256" || // Added in 11.0
314 Name.starts_with("vextract") || // Added in 4.0
315 Name.starts_with("vfmadd.") || // Added in 7.0
316 Name.starts_with("vfmaddsub.") || // Added in 7.0
317 Name.starts_with("vfnmadd.") || // Added in 7.0
318 Name.starts_with("vfnmsub.") || // Added in 7.0
319 Name.starts_with("vpdpbusd.") || // Added in 7.0
320 Name.starts_with("vpdpbusds.") || // Added in 7.0
321 Name.starts_with("vpdpwssd.") || // Added in 7.0
322 Name.starts_with("vpdpwssds.") || // Added in 7.0
323 Name.starts_with("vpermi2var.") || // Added in 7.0
324 Name.starts_with("vpermil.p") || // Added in 3.9
325 Name.starts_with("vpermilvar.") || // Added in 4.0
326 Name.starts_with("vpermt2var.") || // Added in 7.0
327 Name.starts_with("vpmadd52") || // Added in 7.0
328 Name.starts_with("vpshld.") || // Added in 7.0
329 Name.starts_with("vpshldv.") || // Added in 8.0
330 Name.starts_with("vpshrd.") || // Added in 7.0
331 Name.starts_with("vpshrdv.") || // Added in 8.0
332 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333 Name.starts_with("xor.")); // Added in 3.9
334
335 if (Name.consume_front("mask3."))
336 // 'avx512.mask3.*'
337 return (Name.starts_with("vfmadd.") || // Added in 7.0
338 Name.starts_with("vfmaddsub.") || // Added in 7.0
339 Name.starts_with("vfmsub.") || // Added in 7.0
340 Name.starts_with("vfmsubadd.") || // Added in 7.0
341 Name.starts_with("vfnmsub.")); // Added in 7.0
342
343 if (Name.consume_front("maskz."))
344 // 'avx512.maskz.*'
345 return (Name.starts_with("pternlog.") || // Added in 7.0
346 Name.starts_with("vfmadd.") || // Added in 7.0
347 Name.starts_with("vfmaddsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermt2var.") || // Added in 7.0
353 Name.starts_with("vpmadd52") || // Added in 7.0
354 Name.starts_with("vpshldv.") || // Added in 8.0
355 Name.starts_with("vpshrdv.")); // Added in 8.0
356
357 // 'avx512.*'
358 return (Name == "movntdqa" || // Added in 5.0
359 Name == "pmul.dq.512" || // Added in 7.0
360 Name == "pmulu.dq.512" || // Added in 7.0
361 Name.starts_with("broadcastm") || // Added in 6.0
362 Name.starts_with("cmp.p") || // Added in 12.0
363 Name.starts_with("cvtb2mask.") || // Added in 7.0
364 Name.starts_with("cvtd2mask.") || // Added in 7.0
365 Name.starts_with("cvtmask2") || // Added in 5.0
366 Name.starts_with("cvtq2mask.") || // Added in 7.0
367 Name == "cvtusi2sd" || // Added in 7.0
368 Name.starts_with("cvtw2mask.") || // Added in 7.0
369 Name == "kand.w" || // Added in 7.0
370 Name == "kandn.w" || // Added in 7.0
371 Name == "knot.w" || // Added in 7.0
372 Name == "kor.w" || // Added in 7.0
373 Name == "kortestc.w" || // Added in 7.0
374 Name == "kortestz.w" || // Added in 7.0
375 Name.starts_with("kunpck") || // added in 6.0
376 Name == "kxnor.w" || // Added in 7.0
377 Name == "kxor.w" || // Added in 7.0
378 Name.starts_with("padds.") || // Added in 8.0
379 Name.starts_with("pbroadcast") || // Added in 3.9
380 Name.starts_with("prol") || // Added in 8.0
381 Name.starts_with("pror") || // Added in 8.0
382 Name.starts_with("psll.dq") || // Added in 3.9
383 Name.starts_with("psrl.dq") || // Added in 3.9
384 Name.starts_with("psubs.") || // Added in 8.0
385 Name.starts_with("ptestm") || // Added in 6.0
386 Name.starts_with("ptestnm") || // Added in 6.0
387 Name.starts_with("storent.") || // Added in 3.9
388 Name.starts_with("vbroadcast.s") || // Added in 7.0
389 Name.starts_with("vpshld.") || // Added in 8.0
390 Name.starts_with("vpshrd.")); // Added in 8.0
391 }
392
393 if (Name.consume_front("fma."))
394 return (Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmsub.") || // Added in 7.0
396 Name.starts_with("vfmsubadd.") || // Added in 7.0
397 Name.starts_with("vfnmadd.") || // Added in 7.0
398 Name.starts_with("vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front("fma4."))
401 return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403 if (Name.consume_front("sse."))
404 return (Name == "add.ss" || // Added in 4.0
405 Name == "cvtsi2ss" || // Added in 7.0
406 Name == "cvtsi642ss" || // Added in 7.0
407 Name == "div.ss" || // Added in 4.0
408 Name == "mul.ss" || // Added in 4.0
409 Name.starts_with("sqrt.p") || // Added in 7.0
410 Name == "sqrt.ss" || // Added in 7.0
411 Name.starts_with("storeu.") || // Added in 3.9
412 Name == "sub.ss"); // Added in 4.0
413
414 if (Name.consume_front("sse2."))
415 return (Name == "add.sd" || // Added in 4.0
416 Name == "cvtdq2pd" || // Added in 3.9
417 Name == "cvtdq2ps" || // Added in 7.0
418 Name == "cvtps2pd" || // Added in 3.9
419 Name == "cvtsi2sd" || // Added in 7.0
420 Name == "cvtsi642sd" || // Added in 7.0
421 Name == "cvtss2sd" || // Added in 7.0
422 Name == "div.sd" || // Added in 4.0
423 Name == "mul.sd" || // Added in 4.0
424 Name.starts_with("padds.") || // Added in 8.0
425 Name.starts_with("paddus.") || // Added in 8.0
426 Name.starts_with("pcmpeq.") || // Added in 3.1
427 Name.starts_with("pcmpgt.") || // Added in 3.1
428 Name == "pmaxs.w" || // Added in 3.9
429 Name == "pmaxu.b" || // Added in 3.9
430 Name == "pmins.w" || // Added in 3.9
431 Name == "pminu.b" || // Added in 3.9
432 Name == "pmulu.dq" || // Added in 7.0
433 Name.starts_with("pshuf") || // Added in 3.9
434 Name.starts_with("psll.dq") || // Added in 3.7
435 Name.starts_with("psrl.dq") || // Added in 3.7
436 Name.starts_with("psubs.") || // Added in 8.0
437 Name.starts_with("psubus.") || // Added in 8.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.sd" || // Added in 7.0
440 Name == "storel.dq" || // Added in 3.9
441 Name.starts_with("storeu.") || // Added in 3.9
442 Name == "sub.sd"); // Added in 4.0
443
444 if (Name.consume_front("sse41."))
445 return (Name.starts_with("blendp") || // Added in 3.7
446 Name == "movntdqa" || // Added in 5.0
447 Name == "pblendw" || // Added in 3.7
448 Name == "pmaxsb" || // Added in 3.9
449 Name == "pmaxsd" || // Added in 3.9
450 Name == "pmaxud" || // Added in 3.9
451 Name == "pmaxuw" || // Added in 3.9
452 Name == "pminsb" || // Added in 3.9
453 Name == "pminsd" || // Added in 3.9
454 Name == "pminud" || // Added in 3.9
455 Name == "pminuw" || // Added in 3.9
456 Name.starts_with("pmovsx") || // Added in 3.8
457 Name.starts_with("pmovzx") || // Added in 3.9
458 Name == "pmuldq"); // Added in 7.0
459
460 if (Name.consume_front("sse42."))
461 return Name == "crc32.64.8"; // Added in 3.4
462
463 if (Name.consume_front("sse4a."))
464 return Name.starts_with("movnt."); // Added in 3.9
465
466 if (Name.consume_front("ssse3."))
467 return (Name == "pabs.b.128" || // Added in 6.0
468 Name == "pabs.d.128" || // Added in 6.0
469 Name == "pabs.w.128"); // Added in 6.0
470
471 if (Name.consume_front("xop."))
472 return (Name == "vpcmov" || // Added in 3.8
473 Name == "vpcmov.256" || // Added in 5.0
474 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475 Name.starts_with("vprot")); // Added in 8.0
476
477 return (Name == "addcarry.u32" || // Added in 8.0
478 Name == "addcarry.u64" || // Added in 8.0
479 Name == "addcarryx.u32" || // Added in 8.0
480 Name == "addcarryx.u64" || // Added in 8.0
481 Name == "subborrow.u32" || // Added in 8.0
482 Name == "subborrow.u64" || // Added in 8.0
483 Name.starts_with("vcvtph2ps.")); // Added in 11.0
484}
485
487 Function *&NewFn) {
488 // Only handle intrinsics that start with "x86.".
489 if (!Name.consume_front("x86."))
490 return false;
491
493 NewFn = nullptr;
494 return true;
495 }
496
497 if (Name == "rdtscp") { // Added in 8.0
498 // If this intrinsic has 0 operands, it's the new version.
499 if (F->getFunctionType()->getNumParams() == 0)
500 return false;
501
502 rename(F);
503 NewFn = Intrinsic::getDeclaration(F->getParent(),
504 Intrinsic::x86_rdtscp);
505 return true;
506 }
507
509
510 // SSE4.1 ptest functions may have an old signature.
511 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
513 .Case("c", Intrinsic::x86_sse41_ptestc)
514 .Case("z", Intrinsic::x86_sse41_ptestz)
515 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
518 return upgradePTESTIntrinsic(F, ID, NewFn);
519
520 return false;
521 }
522
523 // Several blend and other instructions with masks used the wrong number of
524 // bits.
525
526 // Added in 3.6
528 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
536 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538 if (Name.consume_front("avx512.mask.cmp.")) {
539 // Added in 7.0
541 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
549 return upgradeX86MaskedFPCompare(F, ID, NewFn);
550 return false; // No other 'x86.avx523.mask.cmp.*'.
551 }
552
553 if (Name.consume_front("avx512bf16.")) {
554 // Added in 9.0
556 .Case("cvtne2ps2bf16.128",
557 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558 .Case("cvtne2ps2bf16.256",
559 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560 .Case("cvtne2ps2bf16.512",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562 .Case("mask.cvtneps2bf16.128",
563 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564 .Case("cvtneps2bf16.256",
565 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566 .Case("cvtneps2bf16.512",
567 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
570 return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572 // Added in 9.0
574 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
579 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580 return false; // No other 'x86.avx512bf16.*'.
581 }
582
583 if (Name.consume_front("xop.")) {
585 if (Name.starts_with("vpermil2")) { // Added in 3.9
586 // Upgrade any XOP PERMIL2 index operand still using a float/double
587 // vector.
588 auto Idx = F->getFunctionType()->getParamType(2);
589 if (Idx->isFPOrFPVectorTy()) {
590 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591 unsigned EltSize = Idx->getScalarSizeInBits();
592 if (EltSize == 64 && IdxSize == 128)
593 ID = Intrinsic::x86_xop_vpermil2pd;
594 else if (EltSize == 32 && IdxSize == 128)
595 ID = Intrinsic::x86_xop_vpermil2ps;
596 else if (EltSize == 64 && IdxSize == 256)
597 ID = Intrinsic::x86_xop_vpermil2pd_256;
598 else
599 ID = Intrinsic::x86_xop_vpermil2ps_256;
600 }
601 } else if (F->arg_size() == 2)
602 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
604 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
607
609 rename(F);
610 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611 return true;
612 }
613 return false; // No other 'x86.xop.*'
614 }
615
616 if (Name == "seh.recoverfp") {
617 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618 return true;
619 }
620
621 return false;
622}
623
624// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
628 Function *&NewFn) {
629 if (Name.starts_with("rbit")) {
630 // '(arm|aarch64).rbit'.
631 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632 F->arg_begin()->getType());
633 return true;
634 }
635
636 if (Name == "thread.pointer") {
637 // '(arm|aarch64).thread.pointer'.
638 NewFn =
639 Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640 return true;
641 }
642
643 bool Neon = Name.consume_front("neon.");
644 if (Neon) {
645 // '(arm|aarch64).neon.*'.
646 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647 // v16i8 respectively.
648 if (Name.consume_front("bfdot.")) {
649 // (arm|aarch64).neon.bfdot.*'.
652 .Cases("v2f32.v8i8", "v4f32.v16i8",
653 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
657 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658 assert((OperandWidth == 64 || OperandWidth == 128) &&
659 "Unexpected operand width");
660 LLVMContext &Ctx = F->getParent()->getContext();
661 std::array<Type *, 2> Tys{
662 {F->getReturnType(),
663 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
664 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
665 return true;
666 }
667 return false; // No other '(arm|aarch64).neon.bfdot.*'.
668 }
669
670 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671 // anymore and accept v8bf16 instead of v16i8.
672 if (Name.consume_front("bfm")) {
673 // (arm|aarch64).neon.bfm*'.
674 if (Name.consume_back(".v4f32.v16i8")) {
675 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
678 .Case("mla",
679 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681 .Case("lalb",
682 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684 .Case("lalt",
685 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
689 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
690 return true;
691 }
692 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693 }
694 return false; // No other '(arm|aarch64).neon.bfm*.
695 }
696 // Continue on to Aarch64 Neon or Arm Neon.
697 }
698 // Continue on to Arm or Aarch64.
699
700 if (IsArm) {
701 // 'arm.*'.
702 if (Neon) {
703 // 'arm.neon.*'.
705 .StartsWith("vclz.", Intrinsic::ctlz)
706 .StartsWith("vcnt.", Intrinsic::ctpop)
707 .StartsWith("vqadds.", Intrinsic::sadd_sat)
708 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
709 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
710 .StartsWith("vqsubu.", Intrinsic::usub_sat)
713 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
714 F->arg_begin()->getType());
715 return true;
716 }
717
718 if (Name.consume_front("vst")) {
719 // 'arm.neon.vst*'.
720 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
722 if (vstRegex.match(Name, &Groups)) {
723 static const Intrinsic::ID StoreInts[] = {
724 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726
727 static const Intrinsic::ID StoreLaneInts[] = {
728 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729 Intrinsic::arm_neon_vst4lane};
730
731 auto fArgs = F->getFunctionType()->params();
732 Type *Tys[] = {fArgs[0], fArgs[1]};
733 if (Groups[1].size() == 1)
734 NewFn = Intrinsic::getDeclaration(F->getParent(),
735 StoreInts[fArgs.size() - 3], Tys);
736 else
738 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
739 return true;
740 }
741 return false; // No other 'arm.neon.vst*'.
742 }
743
744 return false; // No other 'arm.neon.*'.
745 }
746
747 if (Name.consume_front("mve.")) {
748 // 'arm.mve.*'.
749 if (Name == "vctp64") {
750 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
751 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752 // the function and deal with it below in UpgradeIntrinsicCall.
753 rename(F);
754 return true;
755 }
756 return false; // Not 'arm.mve.vctp64'.
757 }
758
759 // These too are changed to accept a v2i1 instead of the old v4i1.
760 if (Name.consume_back(".v4i1")) {
761 // 'arm.mve.*.v4i1'.
762 if (Name.consume_back(".predicated.v2i64.v4i32"))
763 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764 return Name == "mull.int" || Name == "vqdmull";
765
766 if (Name.consume_back(".v2i64")) {
767 // 'arm.mve.*.v2i64.v4i1'
768 bool IsGather = Name.consume_front("vldr.gather.");
769 if (IsGather || Name.consume_front("vstr.scatter.")) {
770 if (Name.consume_front("base.")) {
771 // Optional 'wb.' prefix.
772 Name.consume_front("wb.");
773 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774 // predicated.v2i64.v2i64.v4i1'.
775 return Name == "predicated.v2i64";
776 }
777
778 if (Name.consume_front("offset.predicated."))
779 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781
782 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783 return false;
784 }
785
786 return false; // No other 'arm.mve.*.v2i64.v4i1'.
787 }
788 return false; // No other 'arm.mve.*.v4i1'.
789 }
790 return false; // No other 'arm.mve.*'.
791 }
792
793 if (Name.consume_front("cde.vcx")) {
794 // 'arm.cde.vcx*'.
795 if (Name.consume_back(".predicated.v2i64.v4i1"))
796 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798 Name == "3q" || Name == "3qa";
799
800 return false; // No other 'arm.cde.vcx*'.
801 }
802 } else {
803 // 'aarch64.*'.
804 if (Neon) {
805 // 'aarch64.neon.*'.
807 .StartsWith("frintn", Intrinsic::roundeven)
808 .StartsWith("rbit", Intrinsic::bitreverse)
811 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
812 F->arg_begin()->getType());
813 return true;
814 }
815
816 if (Name.starts_with("addp")) {
817 // 'aarch64.neon.addp*'.
818 if (F->arg_size() != 2)
819 return false; // Invalid IR.
820 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
821 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822 NewFn = Intrinsic::getDeclaration(F->getParent(),
823 Intrinsic::aarch64_neon_faddp, Ty);
824 return true;
825 }
826 }
827 return false; // No other 'aarch64.neon.*'.
828 }
829 if (Name.consume_front("sve.")) {
830 // 'aarch64.sve.*'.
831 if (Name.consume_front("bf")) {
832 if (Name.consume_back(".lane")) {
833 // 'aarch64.sve.bf*.lane'.
836 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
841 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
842 return true;
843 }
844 return false; // No other 'aarch64.sve.bf*.lane'.
845 }
846 return false; // No other 'aarch64.sve.bf*'.
847 }
848
849 if (Name.consume_front("addqv")) {
850 // 'aarch64.sve.addqv'.
851 if (!F->getReturnType()->isFPOrFPVectorTy())
852 return false;
853
854 auto Args = F->getFunctionType()->params();
855 Type *Tys[] = {F->getReturnType(), Args[1]};
856 NewFn = Intrinsic::getDeclaration(F->getParent(),
857 Intrinsic::aarch64_sve_faddqv, Tys);
858 return true;
859 }
860
861 if (Name.consume_front("ld")) {
862 // 'aarch64.sve.ld*'.
863 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
864 if (LdRegex.match(Name)) {
865 Type *ScalarTy =
866 cast<VectorType>(F->getReturnType())->getElementType();
867 ElementCount EC =
868 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
869 Type *Ty = VectorType::get(ScalarTy, EC);
870 static const Intrinsic::ID LoadIDs[] = {
871 Intrinsic::aarch64_sve_ld2_sret,
872 Intrinsic::aarch64_sve_ld3_sret,
873 Intrinsic::aarch64_sve_ld4_sret,
874 };
875 NewFn = Intrinsic::getDeclaration(F->getParent(),
876 LoadIDs[Name[0] - '2'], Ty);
877 return true;
878 }
879 return false; // No other 'aarch64.sve.ld*'.
880 }
881
882 if (Name.consume_front("tuple.")) {
883 // 'aarch64.sve.tuple.*'.
884 if (Name.starts_with("get")) {
885 // 'aarch64.sve.tuple.get*'.
886 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
887 NewFn = Intrinsic::getDeclaration(F->getParent(),
888 Intrinsic::vector_extract, Tys);
889 return true;
890 }
891
892 if (Name.starts_with("set")) {
893 // 'aarch64.sve.tuple.set*'.
894 auto Args = F->getFunctionType()->params();
895 Type *Tys[] = {Args[0], Args[2], Args[1]};
896 NewFn = Intrinsic::getDeclaration(F->getParent(),
897 Intrinsic::vector_insert, Tys);
898 return true;
899 }
900
901 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
902 if (CreateTupleRegex.match(Name)) {
903 // 'aarch64.sve.tuple.create*'.
904 auto Args = F->getFunctionType()->params();
905 Type *Tys[] = {F->getReturnType(), Args[1]};
906 NewFn = Intrinsic::getDeclaration(F->getParent(),
907 Intrinsic::vector_insert, Tys);
908 return true;
909 }
910 return false; // No other 'aarch64.sve.tuple.*'.
911 }
912 return false; // No other 'aarch64.sve.*'.
913 }
914 }
915 return false; // No other 'arm.*', 'aarch64.*'.
916}
917
919 if (Name.consume_front("abs."))
921 .Case("bf16", Intrinsic::nvvm_abs_bf16)
922 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
924
925 if (Name.consume_front("fma.rn."))
927 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
928 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
929 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
930 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
931 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
932 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
933 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
934 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
935 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
936 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
937 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
938 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
940
941 if (Name.consume_front("fmax."))
943 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
944 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
945 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
946 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
947 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
948 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
949 .Case("ftz.nan.xorsign.abs.bf16",
950 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
951 .Case("ftz.nan.xorsign.abs.bf16x2",
952 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
953 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
954 .Case("ftz.xorsign.abs.bf16x2",
955 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
956 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
957 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
958 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
959 .Case("nan.xorsign.abs.bf16x2",
960 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
961 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
962 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
964
965 if (Name.consume_front("fmin."))
967 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
968 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
969 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
970 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
971 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
972 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
973 .Case("ftz.nan.xorsign.abs.bf16",
974 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
975 .Case("ftz.nan.xorsign.abs.bf16x2",
976 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
977 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
978 .Case("ftz.xorsign.abs.bf16x2",
979 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
980 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
981 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
982 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
983 .Case("nan.xorsign.abs.bf16x2",
984 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
985 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
986 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
988
989 if (Name.consume_front("neg."))
991 .Case("bf16", Intrinsic::nvvm_neg_bf16)
992 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
994
996}
997
999 bool CanUpgradeDebugIntrinsicsToRecords) {
1000 assert(F && "Illegal to upgrade a non-existent Function.");
1001
1002 StringRef Name = F->getName();
1003
1004 // Quickly eliminate it, if it's not a candidate.
1005 if (!Name.consume_front("llvm.") || Name.empty())
1006 return false;
1007
1008 switch (Name[0]) {
1009 default: break;
1010 case 'a': {
1011 bool IsArm = Name.consume_front("arm.");
1012 if (IsArm || Name.consume_front("aarch64.")) {
1013 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1014 return true;
1015 break;
1016 }
1017
1018 if (Name.consume_front("amdgcn.")) {
1019 if (Name == "alignbit") {
1020 // Target specific intrinsic became redundant
1021 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1022 {F->getReturnType()});
1023 return true;
1024 }
1025
1026 if (Name.consume_front("atomic.")) {
1027 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029 // there's no new declaration.
1030 NewFn = nullptr;
1031 return true;
1032 }
1033 break; // No other 'amdgcn.atomic.*'
1034 }
1035
1036 if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1037 Name.starts_with("ds.fmax")) {
1038 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1039 // declaration.
1040 NewFn = nullptr;
1041 return true;
1042 }
1043
1044 if (Name.starts_with("ldexp.")) {
1045 // Target specific intrinsic became redundant
1047 F->getParent(), Intrinsic::ldexp,
1048 {F->getReturnType(), F->getArg(1)->getType()});
1049 return true;
1050 }
1051 break; // No other 'amdgcn.*'
1052 }
1053
1054 break;
1055 }
1056 case 'c': {
1057 if (F->arg_size() == 1) {
1059 .StartsWith("ctlz.", Intrinsic::ctlz)
1060 .StartsWith("cttz.", Intrinsic::cttz)
1063 rename(F);
1064 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1065 F->arg_begin()->getType());
1066 return true;
1067 }
1068 }
1069
1070 if (F->arg_size() == 2 && Name == "coro.end") {
1071 rename(F);
1072 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1073 return true;
1074 }
1075
1076 break;
1077 }
1078 case 'd':
1079 if (Name.consume_front("dbg.")) {
1080 // Mark debug intrinsics for upgrade to new debug format.
1081 if (CanUpgradeDebugIntrinsicsToRecords &&
1082 F->getParent()->IsNewDbgInfoFormat) {
1083 if (Name == "addr" || Name == "value" || Name == "assign" ||
1084 Name == "declare" || Name == "label") {
1085 // There's no function to replace these with.
1086 NewFn = nullptr;
1087 // But we do want these to get upgraded.
1088 return true;
1089 }
1090 }
1091 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1092 // converted to DbgVariableRecords later.
1093 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1094 rename(F);
1095 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1096 return true;
1097 }
1098 break; // No other 'dbg.*'.
1099 }
1100 break;
1101 case 'e':
1102 if (Name.consume_front("experimental.vector.")) {
1105 .StartsWith("extract.", Intrinsic::vector_extract)
1106 .StartsWith("insert.", Intrinsic::vector_insert)
1107 .StartsWith("splice.", Intrinsic::vector_splice)
1108 .StartsWith("reverse.", Intrinsic::vector_reverse)
1109 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1110 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1113 const auto *FT = F->getFunctionType();
1115 if (ID == Intrinsic::vector_extract ||
1116 ID == Intrinsic::vector_interleave2)
1117 // Extracting overloads the return type.
1118 Tys.push_back(FT->getReturnType());
1119 if (ID != Intrinsic::vector_interleave2)
1120 Tys.push_back(FT->getParamType(0));
1121 if (ID == Intrinsic::vector_insert)
1122 // Inserting overloads the inserted type.
1123 Tys.push_back(FT->getParamType(1));
1124 rename(F);
1125 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1126 return true;
1127 }
1128
1129 if (Name.consume_front("reduce.")) {
1131 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1132 if (R.match(Name, &Groups))
1134 .Case("add", Intrinsic::vector_reduce_add)
1135 .Case("mul", Intrinsic::vector_reduce_mul)
1136 .Case("and", Intrinsic::vector_reduce_and)
1137 .Case("or", Intrinsic::vector_reduce_or)
1138 .Case("xor", Intrinsic::vector_reduce_xor)
1139 .Case("smax", Intrinsic::vector_reduce_smax)
1140 .Case("smin", Intrinsic::vector_reduce_smin)
1141 .Case("umax", Intrinsic::vector_reduce_umax)
1142 .Case("umin", Intrinsic::vector_reduce_umin)
1143 .Case("fmax", Intrinsic::vector_reduce_fmax)
1144 .Case("fmin", Intrinsic::vector_reduce_fmin)
1146
1147 bool V2 = false;
1149 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1150 Groups.clear();
1151 V2 = true;
1152 if (R2.match(Name, &Groups))
1154 .Case("fadd", Intrinsic::vector_reduce_fadd)
1155 .Case("fmul", Intrinsic::vector_reduce_fmul)
1157 }
1159 rename(F);
1160 auto Args = F->getFunctionType()->params();
1161 NewFn =
1162 Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1163 return true;
1164 }
1165 break; // No other 'expermental.vector.reduce.*'.
1166 }
1167 break; // No other 'experimental.vector.*'.
1168 }
1169 break; // No other 'e*'.
1170 case 'f':
1171 if (Name.starts_with("flt.rounds")) {
1172 rename(F);
1173 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1174 return true;
1175 }
1176 break;
1177 case 'i':
1178 if (Name.starts_with("invariant.group.barrier")) {
1179 // Rename invariant.group.barrier to launder.invariant.group
1180 auto Args = F->getFunctionType()->params();
1181 Type* ObjectPtr[1] = {Args[0]};
1182 rename(F);
1183 NewFn = Intrinsic::getDeclaration(F->getParent(),
1184 Intrinsic::launder_invariant_group, ObjectPtr);
1185 return true;
1186 }
1187 break;
1188 case 'm': {
1189 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1190 // alignment parameter to embedding the alignment as an attribute of
1191 // the pointer args.
1192 if (unsigned ID = StringSwitch<unsigned>(Name)
1193 .StartsWith("memcpy.", Intrinsic::memcpy)
1194 .StartsWith("memmove.", Intrinsic::memmove)
1195 .Default(0)) {
1196 if (F->arg_size() == 5) {
1197 rename(F);
1198 // Get the types of dest, src, and len
1199 ArrayRef<Type *> ParamTypes =
1200 F->getFunctionType()->params().slice(0, 3);
1201 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1202 return true;
1203 }
1204 }
1205 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1206 rename(F);
1207 // Get the types of dest, and len
1208 const auto *FT = F->getFunctionType();
1209 Type *ParamTypes[2] = {
1210 FT->getParamType(0), // Dest
1211 FT->getParamType(2) // len
1212 };
1213 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1214 ParamTypes);
1215 return true;
1216 }
1217 break;
1218 }
1219 case 'n': {
1220 if (Name.consume_front("nvvm.")) {
1221 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1222 if (F->arg_size() == 1) {
1223 Intrinsic::ID IID =
1225 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1226 .Case("clz.i", Intrinsic::ctlz)
1227 .Case("popc.i", Intrinsic::ctpop)
1229 if (IID != Intrinsic::not_intrinsic) {
1230 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1231 {F->getReturnType()});
1232 return true;
1233 }
1234 }
1235
1236 // Check for nvvm intrinsics that need a return type adjustment.
1237 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1239 if (IID != Intrinsic::not_intrinsic) {
1240 NewFn = nullptr;
1241 return true;
1242 }
1243 }
1244
1245 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1246 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1247 //
1248 // TODO: We could add lohi.i2d.
1249 bool Expand = false;
1250 if (Name.consume_front("abs."))
1251 // nvvm.abs.{i,ii}
1252 Expand = Name == "i" || Name == "ll";
1253 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1254 Expand = true;
1255 else if (Name.consume_front("max.") || Name.consume_front("min."))
1256 // nvvm.{min,max}.{i,ii,ui,ull}
1257 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1258 Name == "ui" || Name == "ull";
1259 else if (Name.consume_front("atomic.load.add."))
1260 // nvvm.atomic.load.add.{f32.p,f64.p}
1261 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1262 else
1263 Expand = false;
1264
1265 if (Expand) {
1266 NewFn = nullptr;
1267 return true;
1268 }
1269 break; // No other 'nvvm.*'.
1270 }
1271 break;
1272 }
1273 case 'o':
1274 // We only need to change the name to match the mangling including the
1275 // address space.
1276 if (Name.starts_with("objectsize.")) {
1277 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1278 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1279 F->getName() !=
1280 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1281 rename(F);
1282 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1283 Tys);
1284 return true;
1285 }
1286 }
1287 break;
1288
1289 case 'p':
1290 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1291 rename(F);
1293 F->getParent(), Intrinsic::ptr_annotation,
1294 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1295 return true;
1296 }
1297 break;
1298
1299 case 'r': {
1300 if (Name.consume_front("riscv.")) {
1303 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1304 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1305 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1306 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1309 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1310 rename(F);
1311 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1312 return true;
1313 }
1314 break; // No other applicable upgrades.
1315 }
1316
1318 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1319 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1322 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1323 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1324 rename(F);
1325 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1326 return true;
1327 }
1328 break; // No other applicable upgrades.
1329 }
1330
1332 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1333 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1334 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1335 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1336 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1337 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1340 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1341 rename(F);
1342 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1343 return true;
1344 }
1345 break; // No other applicable upgrades.
1346 }
1347 break; // No other 'riscv.*' intrinsics
1348 }
1349 } break;
1350
1351 case 's':
1352 if (Name == "stackprotectorcheck") {
1353 NewFn = nullptr;
1354 return true;
1355 }
1356 break;
1357
1358 case 'v': {
1359 if (Name == "var.annotation" && F->arg_size() == 4) {
1360 rename(F);
1362 F->getParent(), Intrinsic::var_annotation,
1363 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1364 return true;
1365 }
1366 break;
1367 }
1368
1369 case 'w':
1370 if (Name.consume_front("wasm.")) {
1373 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1374 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1375 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1378 rename(F);
1379 NewFn =
1380 Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1381 return true;
1382 }
1383
1384 if (Name.consume_front("dot.i8x16.i7x16.")) {
1386 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1387 .Case("add.signed",
1388 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1391 rename(F);
1392 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1393 return true;
1394 }
1395 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1396 }
1397 break; // No other 'wasm.*'.
1398 }
1399 break;
1400
1401 case 'x':
1402 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1403 return true;
1404 }
1405
1406 auto *ST = dyn_cast<StructType>(F->getReturnType());
1407 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1408 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1409 // Replace return type with literal non-packed struct. Only do this for
1410 // intrinsics declared to return a struct, not for intrinsics with
1411 // overloaded return type, in which case the exact struct type will be
1412 // mangled into the name.
1415 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1416 auto *FT = F->getFunctionType();
1417 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1418 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1419 std::string Name = F->getName().str();
1420 rename(F);
1421 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1422 Name, F->getParent());
1423
1424 // The new function may also need remangling.
1425 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1426 NewFn = *Result;
1427 return true;
1428 }
1429 }
1430
1431 // Remangle our intrinsic since we upgrade the mangling
1433 if (Result != std::nullopt) {
1434 NewFn = *Result;
1435 return true;
1436 }
1437
1438 // This may not belong here. This function is effectively being overloaded
1439 // to both detect an intrinsic which needs upgrading, and to provide the
1440 // upgraded form of the intrinsic. We should perhaps have two separate
1441 // functions for this.
1442 return false;
1443}
1444
1446 bool CanUpgradeDebugIntrinsicsToRecords) {
1447 NewFn = nullptr;
1448 bool Upgraded =
1449 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1450 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1451
1452 // Upgrade intrinsic attributes. This does not change the function.
1453 if (NewFn)
1454 F = NewFn;
1455 if (Intrinsic::ID id = F->getIntrinsicID())
1456 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1457 return Upgraded;
1458}
1459
1461 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1462 GV->getName() == "llvm.global_dtors")) ||
1463 !GV->hasInitializer())
1464 return nullptr;
1465 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1466 if (!ATy)
1467 return nullptr;
1468 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1469 if (!STy || STy->getNumElements() != 2)
1470 return nullptr;
1471
1472 LLVMContext &C = GV->getContext();
1473 IRBuilder<> IRB(C);
1474 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1475 IRB.getPtrTy());
1476 Constant *Init = GV->getInitializer();
1477 unsigned N = Init->getNumOperands();
1478 std::vector<Constant *> NewCtors(N);
1479 for (unsigned i = 0; i != N; ++i) {
1480 auto Ctor = cast<Constant>(Init->getOperand(i));
1481 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1482 Ctor->getAggregateElement(1),
1484 }
1485 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1486
1487 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1488 NewInit, GV->getName());
1489}
1490
1491// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1492// to byte shuffles.
1494 unsigned Shift) {
1495 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1496 unsigned NumElts = ResultTy->getNumElements() * 8;
1497
1498 // Bitcast from a 64-bit element type to a byte element type.
1499 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1500 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1501
1502 // We'll be shuffling in zeroes.
1503 Value *Res = Constant::getNullValue(VecTy);
1504
1505 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1506 // we'll just return the zero vector.
1507 if (Shift < 16) {
1508 int Idxs[64];
1509 // 256/512-bit version is split into 2/4 16-byte lanes.
1510 for (unsigned l = 0; l != NumElts; l += 16)
1511 for (unsigned i = 0; i != 16; ++i) {
1512 unsigned Idx = NumElts + i - Shift;
1513 if (Idx < NumElts)
1514 Idx -= NumElts - 16; // end of lane, switch operand.
1515 Idxs[l + i] = Idx + l;
1516 }
1517
1518 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1519 }
1520
1521 // Bitcast back to a 64-bit element type.
1522 return Builder.CreateBitCast(Res, ResultTy, "cast");
1523}
1524
1525// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1526// to byte shuffles.
1528 unsigned Shift) {
1529 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1530 unsigned NumElts = ResultTy->getNumElements() * 8;
1531
1532 // Bitcast from a 64-bit element type to a byte element type.
1533 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1534 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1535
1536 // We'll be shuffling in zeroes.
1537 Value *Res = Constant::getNullValue(VecTy);
1538
1539 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1540 // we'll just return the zero vector.
1541 if (Shift < 16) {
1542 int Idxs[64];
1543 // 256/512-bit version is split into 2/4 16-byte lanes.
1544 for (unsigned l = 0; l != NumElts; l += 16)
1545 for (unsigned i = 0; i != 16; ++i) {
1546 unsigned Idx = i + Shift;
1547 if (Idx >= 16)
1548 Idx += NumElts - 16; // end of lane, switch operand.
1549 Idxs[l + i] = Idx + l;
1550 }
1551
1552 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1553 }
1554
1555 // Bitcast back to a 64-bit element type.
1556 return Builder.CreateBitCast(Res, ResultTy, "cast");
1557}
1558
1559static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1560 unsigned NumElts) {
1561 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1563 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1564 Mask = Builder.CreateBitCast(Mask, MaskTy);
1565
1566 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1567 // i8 and we need to extract down to the right number of elements.
1568 if (NumElts <= 4) {
1569 int Indices[4];
1570 for (unsigned i = 0; i != NumElts; ++i)
1571 Indices[i] = i;
1572 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1573 "extract");
1574 }
1575
1576 return Mask;
1577}
1578
1579static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1580 Value *Op1) {
1581 // If the mask is all ones just emit the first operation.
1582 if (const auto *C = dyn_cast<Constant>(Mask))
1583 if (C->isAllOnesValue())
1584 return Op0;
1585
1586 Mask = getX86MaskVec(Builder, Mask,
1587 cast<FixedVectorType>(Op0->getType())->getNumElements());
1588 return Builder.CreateSelect(Mask, Op0, Op1);
1589}
1590
1591static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1592 Value *Op1) {
1593 // If the mask is all ones just emit the first operation.
1594 if (const auto *C = dyn_cast<Constant>(Mask))
1595 if (C->isAllOnesValue())
1596 return Op0;
1597
1598 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1599 Mask->getType()->getIntegerBitWidth());
1600 Mask = Builder.CreateBitCast(Mask, MaskTy);
1601 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1602 return Builder.CreateSelect(Mask, Op0, Op1);
1603}
1604
1605// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1606// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1607// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1609 Value *Op1, Value *Shift,
1610 Value *Passthru, Value *Mask,
1611 bool IsVALIGN) {
1612 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1613
1614 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1615 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1616 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1617 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1618
1619 // Mask the immediate for VALIGN.
1620 if (IsVALIGN)
1621 ShiftVal &= (NumElts - 1);
1622
1623 // If palignr is shifting the pair of vectors more than the size of two
1624 // lanes, emit zero.
1625 if (ShiftVal >= 32)
1627
1628 // If palignr is shifting the pair of input vectors more than one lane,
1629 // but less than two lanes, convert to shifting in zeroes.
1630 if (ShiftVal > 16) {
1631 ShiftVal -= 16;
1632 Op1 = Op0;
1634 }
1635
1636 int Indices[64];
1637 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1638 for (unsigned l = 0; l < NumElts; l += 16) {
1639 for (unsigned i = 0; i != 16; ++i) {
1640 unsigned Idx = ShiftVal + i;
1641 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1642 Idx += NumElts - 16; // End of lane, switch operand.
1643 Indices[l + i] = Idx + l;
1644 }
1645 }
1646
1647 Value *Align = Builder.CreateShuffleVector(
1648 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1649
1650 return emitX86Select(Builder, Mask, Align, Passthru);
1651}
1652
1654 bool ZeroMask, bool IndexForm) {
1655 Type *Ty = CI.getType();
1656 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1657 unsigned EltWidth = Ty->getScalarSizeInBits();
1658 bool IsFloat = Ty->isFPOrFPVectorTy();
1659 Intrinsic::ID IID;
1660 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1661 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1662 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1663 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1664 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1665 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1666 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1667 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1668 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1669 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1670 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1671 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1672 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1673 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1674 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1675 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1676 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1677 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1678 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1679 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1680 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1681 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1682 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1683 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1684 else if (VecWidth == 128 && EltWidth == 16)
1685 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1686 else if (VecWidth == 256 && EltWidth == 16)
1687 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1688 else if (VecWidth == 512 && EltWidth == 16)
1689 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1690 else if (VecWidth == 128 && EltWidth == 8)
1691 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1692 else if (VecWidth == 256 && EltWidth == 8)
1693 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1694 else if (VecWidth == 512 && EltWidth == 8)
1695 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1696 else
1697 llvm_unreachable("Unexpected intrinsic");
1698
1699 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1700 CI.getArgOperand(2) };
1701
1702 // If this isn't index form we need to swap operand 0 and 1.
1703 if (!IndexForm)
1704 std::swap(Args[0], Args[1]);
1705
1706 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1707 Args);
1708 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1709 : Builder.CreateBitCast(CI.getArgOperand(1),
1710 Ty);
1711 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1712}
1713
1715 Intrinsic::ID IID) {
1716 Type *Ty = CI.getType();
1717 Value *Op0 = CI.getOperand(0);
1718 Value *Op1 = CI.getOperand(1);
1719 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1720 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1721
1722 if (CI.arg_size() == 4) { // For masked intrinsics.
1723 Value *VecSrc = CI.getOperand(2);
1724 Value *Mask = CI.getOperand(3);
1725 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1726 }
1727 return Res;
1728}
1729
1731 bool IsRotateRight) {
1732 Type *Ty = CI.getType();
1733 Value *Src = CI.getArgOperand(0);
1734 Value *Amt = CI.getArgOperand(1);
1735
1736 // Amount may be scalar immediate, in which case create a splat vector.
1737 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1738 // we only care about the lowest log2 bits anyway.
1739 if (Amt->getType() != Ty) {
1740 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1741 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1742 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1743 }
1744
1745 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1746 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1747 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1748
1749 if (CI.arg_size() == 4) { // For masked intrinsics.
1750 Value *VecSrc = CI.getOperand(2);
1751 Value *Mask = CI.getOperand(3);
1752 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1753 }
1754 return Res;
1755}
1756
1757static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1758 bool IsSigned) {
1759 Type *Ty = CI.getType();
1760 Value *LHS = CI.getArgOperand(0);
1761 Value *RHS = CI.getArgOperand(1);
1762
1763 CmpInst::Predicate Pred;
1764 switch (Imm) {
1765 case 0x0:
1766 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1767 break;
1768 case 0x1:
1769 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1770 break;
1771 case 0x2:
1772 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1773 break;
1774 case 0x3:
1775 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1776 break;
1777 case 0x4:
1778 Pred = ICmpInst::ICMP_EQ;
1779 break;
1780 case 0x5:
1781 Pred = ICmpInst::ICMP_NE;
1782 break;
1783 case 0x6:
1784 return Constant::getNullValue(Ty); // FALSE
1785 case 0x7:
1786 return Constant::getAllOnesValue(Ty); // TRUE
1787 default:
1788 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1789 }
1790
1791 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1792 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1793 return Ext;
1794}
1795
1797 bool IsShiftRight, bool ZeroMask) {
1798 Type *Ty = CI.getType();
1799 Value *Op0 = CI.getArgOperand(0);
1800 Value *Op1 = CI.getArgOperand(1);
1801 Value *Amt = CI.getArgOperand(2);
1802
1803 if (IsShiftRight)
1804 std::swap(Op0, Op1);
1805
1806 // Amount may be scalar immediate, in which case create a splat vector.
1807 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1808 // we only care about the lowest log2 bits anyway.
1809 if (Amt->getType() != Ty) {
1810 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1811 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1812 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1813 }
1814
1815 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1816 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1817 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1818
1819 unsigned NumArgs = CI.arg_size();
1820 if (NumArgs >= 4) { // For masked intrinsics.
1821 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1822 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1823 CI.getArgOperand(0);
1824 Value *Mask = CI.getOperand(NumArgs - 1);
1825 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1826 }
1827 return Res;
1828}
1829
1831 Value *Mask, bool Aligned) {
1832 // Cast the pointer to the right type.
1833 Ptr = Builder.CreateBitCast(Ptr,
1834 llvm::PointerType::getUnqual(Data->getType()));
1835 const Align Alignment =
1836 Aligned
1837 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1838 : Align(1);
1839
1840 // If the mask is all ones just emit a regular store.
1841 if (const auto *C = dyn_cast<Constant>(Mask))
1842 if (C->isAllOnesValue())
1843 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1844
1845 // Convert the mask from an integer type to a vector of i1.
1846 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1847 Mask = getX86MaskVec(Builder, Mask, NumElts);
1848 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1849}
1850
1852 Value *Passthru, Value *Mask, bool Aligned) {
1853 Type *ValTy = Passthru->getType();
1854 // Cast the pointer to the right type.
1856 const Align Alignment =
1857 Aligned
1858 ? Align(
1860 8)
1861 : Align(1);
1862
1863 // If the mask is all ones just emit a regular store.
1864 if (const auto *C = dyn_cast<Constant>(Mask))
1865 if (C->isAllOnesValue())
1866 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1867
1868 // Convert the mask from an integer type to a vector of i1.
1869 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1870 Mask = getX86MaskVec(Builder, Mask, NumElts);
1871 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1872}
1873
1874static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1875 Type *Ty = CI.getType();
1876 Value *Op0 = CI.getArgOperand(0);
1877 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1878 Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1879 if (CI.arg_size() == 3)
1880 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1881 return Res;
1882}
1883
1884static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1885 Type *Ty = CI.getType();
1886
1887 // Arguments have a vXi32 type so cast to vXi64.
1888 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1889 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1890
1891 if (IsSigned) {
1892 // Shift left then arithmetic shift right.
1893 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1894 LHS = Builder.CreateShl(LHS, ShiftAmt);
1895 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1896 RHS = Builder.CreateShl(RHS, ShiftAmt);
1897 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1898 } else {
1899 // Clear the upper bits.
1900 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1901 LHS = Builder.CreateAnd(LHS, Mask);
1902 RHS = Builder.CreateAnd(RHS, Mask);
1903 }
1904
1905 Value *Res = Builder.CreateMul(LHS, RHS);
1906
1907 if (CI.arg_size() == 4)
1908 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1909
1910 return Res;
1911}
1912
1913// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1915 Value *Mask) {
1916 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1917 if (Mask) {
1918 const auto *C = dyn_cast<Constant>(Mask);
1919 if (!C || !C->isAllOnesValue())
1920 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1921 }
1922
1923 if (NumElts < 8) {
1924 int Indices[8];
1925 for (unsigned i = 0; i != NumElts; ++i)
1926 Indices[i] = i;
1927 for (unsigned i = NumElts; i != 8; ++i)
1928 Indices[i] = NumElts + i % NumElts;
1929 Vec = Builder.CreateShuffleVector(Vec,
1931 Indices);
1932 }
1933 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1934}
1935
1937 unsigned CC, bool Signed) {
1938 Value *Op0 = CI.getArgOperand(0);
1939 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1940
1941 Value *Cmp;
1942 if (CC == 3) {
1944 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1945 } else if (CC == 7) {
1947 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1948 } else {
1950 switch (CC) {
1951 default: llvm_unreachable("Unknown condition code");
1952 case 0: Pred = ICmpInst::ICMP_EQ; break;
1953 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1954 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1955 case 4: Pred = ICmpInst::ICMP_NE; break;
1956 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1957 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1958 }
1959 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1960 }
1961
1962 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1963
1964 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1965}
1966
1967// Replace a masked intrinsic with an older unmasked intrinsic.
1969 Intrinsic::ID IID) {
1970 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1971 Value *Rep = Builder.CreateCall(Intrin,
1972 { CI.getArgOperand(0), CI.getArgOperand(1) });
1973 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1974}
1975
1977 Value* A = CI.getArgOperand(0);
1978 Value* B = CI.getArgOperand(1);
1979 Value* Src = CI.getArgOperand(2);
1980 Value* Mask = CI.getArgOperand(3);
1981
1982 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1983 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1984 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1985 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1986 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1987 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1988}
1989
1991 Value* Op = CI.getArgOperand(0);
1992 Type* ReturnOp = CI.getType();
1993 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1994 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1995 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1996}
1997
1998// Replace intrinsic with unmasked version and a select.
2000 CallBase &CI, Value *&Rep) {
2001 Name = Name.substr(12); // Remove avx512.mask.
2002
2003 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2004 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2005 Intrinsic::ID IID;
2006 if (Name.starts_with("max.p")) {
2007 if (VecWidth == 128 && EltWidth == 32)
2008 IID = Intrinsic::x86_sse_max_ps;
2009 else if (VecWidth == 128 && EltWidth == 64)
2010 IID = Intrinsic::x86_sse2_max_pd;
2011 else if (VecWidth == 256 && EltWidth == 32)
2012 IID = Intrinsic::x86_avx_max_ps_256;
2013 else if (VecWidth == 256 && EltWidth == 64)
2014 IID = Intrinsic::x86_avx_max_pd_256;
2015 else
2016 llvm_unreachable("Unexpected intrinsic");
2017 } else if (Name.starts_with("min.p")) {
2018 if (VecWidth == 128 && EltWidth == 32)
2019 IID = Intrinsic::x86_sse_min_ps;
2020 else if (VecWidth == 128 && EltWidth == 64)
2021 IID = Intrinsic::x86_sse2_min_pd;
2022 else if (VecWidth == 256 && EltWidth == 32)
2023 IID = Intrinsic::x86_avx_min_ps_256;
2024 else if (VecWidth == 256 && EltWidth == 64)
2025 IID = Intrinsic::x86_avx_min_pd_256;
2026 else
2027 llvm_unreachable("Unexpected intrinsic");
2028 } else if (Name.starts_with("pshuf.b.")) {
2029 if (VecWidth == 128)
2030 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2031 else if (VecWidth == 256)
2032 IID = Intrinsic::x86_avx2_pshuf_b;
2033 else if (VecWidth == 512)
2034 IID = Intrinsic::x86_avx512_pshuf_b_512;
2035 else
2036 llvm_unreachable("Unexpected intrinsic");
2037 } else if (Name.starts_with("pmul.hr.sw.")) {
2038 if (VecWidth == 128)
2039 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2040 else if (VecWidth == 256)
2041 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2042 else if (VecWidth == 512)
2043 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2044 else
2045 llvm_unreachable("Unexpected intrinsic");
2046 } else if (Name.starts_with("pmulh.w.")) {
2047 if (VecWidth == 128)
2048 IID = Intrinsic::x86_sse2_pmulh_w;
2049 else if (VecWidth == 256)
2050 IID = Intrinsic::x86_avx2_pmulh_w;
2051 else if (VecWidth == 512)
2052 IID = Intrinsic::x86_avx512_pmulh_w_512;
2053 else
2054 llvm_unreachable("Unexpected intrinsic");
2055 } else if (Name.starts_with("pmulhu.w.")) {
2056 if (VecWidth == 128)
2057 IID = Intrinsic::x86_sse2_pmulhu_w;
2058 else if (VecWidth == 256)
2059 IID = Intrinsic::x86_avx2_pmulhu_w;
2060 else if (VecWidth == 512)
2061 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2062 else
2063 llvm_unreachable("Unexpected intrinsic");
2064 } else if (Name.starts_with("pmaddw.d.")) {
2065 if (VecWidth == 128)
2066 IID = Intrinsic::x86_sse2_pmadd_wd;
2067 else if (VecWidth == 256)
2068 IID = Intrinsic::x86_avx2_pmadd_wd;
2069 else if (VecWidth == 512)
2070 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with("pmaddubs.w.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with("packsswb.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_sse2_packsswb_128;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_packsswb;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_packsswb_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with("packssdw.")) {
2092 if (VecWidth == 128)
2093 IID = Intrinsic::x86_sse2_packssdw_128;
2094 else if (VecWidth == 256)
2095 IID = Intrinsic::x86_avx2_packssdw;
2096 else if (VecWidth == 512)
2097 IID = Intrinsic::x86_avx512_packssdw_512;
2098 else
2099 llvm_unreachable("Unexpected intrinsic");
2100 } else if (Name.starts_with("packuswb.")) {
2101 if (VecWidth == 128)
2102 IID = Intrinsic::x86_sse2_packuswb_128;
2103 else if (VecWidth == 256)
2104 IID = Intrinsic::x86_avx2_packuswb;
2105 else if (VecWidth == 512)
2106 IID = Intrinsic::x86_avx512_packuswb_512;
2107 else
2108 llvm_unreachable("Unexpected intrinsic");
2109 } else if (Name.starts_with("packusdw.")) {
2110 if (VecWidth == 128)
2111 IID = Intrinsic::x86_sse41_packusdw;
2112 else if (VecWidth == 256)
2113 IID = Intrinsic::x86_avx2_packusdw;
2114 else if (VecWidth == 512)
2115 IID = Intrinsic::x86_avx512_packusdw_512;
2116 else
2117 llvm_unreachable("Unexpected intrinsic");
2118 } else if (Name.starts_with("vpermilvar.")) {
2119 if (VecWidth == 128 && EltWidth == 32)
2120 IID = Intrinsic::x86_avx_vpermilvar_ps;
2121 else if (VecWidth == 128 && EltWidth == 64)
2122 IID = Intrinsic::x86_avx_vpermilvar_pd;
2123 else if (VecWidth == 256 && EltWidth == 32)
2124 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2125 else if (VecWidth == 256 && EltWidth == 64)
2126 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2127 else if (VecWidth == 512 && EltWidth == 32)
2128 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2129 else if (VecWidth == 512 && EltWidth == 64)
2130 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2131 else
2132 llvm_unreachable("Unexpected intrinsic");
2133 } else if (Name == "cvtpd2dq.256") {
2134 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2135 } else if (Name == "cvtpd2ps.256") {
2136 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2137 } else if (Name == "cvttpd2dq.256") {
2138 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2139 } else if (Name == "cvttps2dq.128") {
2140 IID = Intrinsic::x86_sse2_cvttps2dq;
2141 } else if (Name == "cvttps2dq.256") {
2142 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2143 } else if (Name.starts_with("permvar.")) {
2144 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2145 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146 IID = Intrinsic::x86_avx2_permps;
2147 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148 IID = Intrinsic::x86_avx2_permd;
2149 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150 IID = Intrinsic::x86_avx512_permvar_df_256;
2151 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152 IID = Intrinsic::x86_avx512_permvar_di_256;
2153 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154 IID = Intrinsic::x86_avx512_permvar_sf_512;
2155 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156 IID = Intrinsic::x86_avx512_permvar_si_512;
2157 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158 IID = Intrinsic::x86_avx512_permvar_df_512;
2159 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160 IID = Intrinsic::x86_avx512_permvar_di_512;
2161 else if (VecWidth == 128 && EltWidth == 16)
2162 IID = Intrinsic::x86_avx512_permvar_hi_128;
2163 else if (VecWidth == 256 && EltWidth == 16)
2164 IID = Intrinsic::x86_avx512_permvar_hi_256;
2165 else if (VecWidth == 512 && EltWidth == 16)
2166 IID = Intrinsic::x86_avx512_permvar_hi_512;
2167 else if (VecWidth == 128 && EltWidth == 8)
2168 IID = Intrinsic::x86_avx512_permvar_qi_128;
2169 else if (VecWidth == 256 && EltWidth == 8)
2170 IID = Intrinsic::x86_avx512_permvar_qi_256;
2171 else if (VecWidth == 512 && EltWidth == 8)
2172 IID = Intrinsic::x86_avx512_permvar_qi_512;
2173 else
2174 llvm_unreachable("Unexpected intrinsic");
2175 } else if (Name.starts_with("dbpsadbw.")) {
2176 if (VecWidth == 128)
2177 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2178 else if (VecWidth == 256)
2179 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2180 else if (VecWidth == 512)
2181 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2182 else
2183 llvm_unreachable("Unexpected intrinsic");
2184 } else if (Name.starts_with("pmultishift.qb.")) {
2185 if (VecWidth == 128)
2186 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2187 else if (VecWidth == 256)
2188 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2189 else if (VecWidth == 512)
2190 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2191 else
2192 llvm_unreachable("Unexpected intrinsic");
2193 } else if (Name.starts_with("conflict.")) {
2194 if (Name[9] == 'd' && VecWidth == 128)
2195 IID = Intrinsic::x86_avx512_conflict_d_128;
2196 else if (Name[9] == 'd' && VecWidth == 256)
2197 IID = Intrinsic::x86_avx512_conflict_d_256;
2198 else if (Name[9] == 'd' && VecWidth == 512)
2199 IID = Intrinsic::x86_avx512_conflict_d_512;
2200 else if (Name[9] == 'q' && VecWidth == 128)
2201 IID = Intrinsic::x86_avx512_conflict_q_128;
2202 else if (Name[9] == 'q' && VecWidth == 256)
2203 IID = Intrinsic::x86_avx512_conflict_q_256;
2204 else if (Name[9] == 'q' && VecWidth == 512)
2205 IID = Intrinsic::x86_avx512_conflict_q_512;
2206 else
2207 llvm_unreachable("Unexpected intrinsic");
2208 } else if (Name.starts_with("pavg.")) {
2209 if (Name[5] == 'b' && VecWidth == 128)
2210 IID = Intrinsic::x86_sse2_pavg_b;
2211 else if (Name[5] == 'b' && VecWidth == 256)
2212 IID = Intrinsic::x86_avx2_pavg_b;
2213 else if (Name[5] == 'b' && VecWidth == 512)
2214 IID = Intrinsic::x86_avx512_pavg_b_512;
2215 else if (Name[5] == 'w' && VecWidth == 128)
2216 IID = Intrinsic::x86_sse2_pavg_w;
2217 else if (Name[5] == 'w' && VecWidth == 256)
2218 IID = Intrinsic::x86_avx2_pavg_w;
2219 else if (Name[5] == 'w' && VecWidth == 512)
2220 IID = Intrinsic::x86_avx512_pavg_w_512;
2221 else
2222 llvm_unreachable("Unexpected intrinsic");
2223 } else
2224 return false;
2225
2226 SmallVector<Value *, 4> Args(CI.args());
2227 Args.pop_back();
2228 Args.pop_back();
2229 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2230 Args);
2231 unsigned NumArgs = CI.arg_size();
2232 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2233 CI.getArgOperand(NumArgs - 2));
2234 return true;
2235}
2236
2237/// Upgrade comment in call to inline asm that represents an objc retain release
2238/// marker.
2239void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2240 size_t Pos;
2241 if (AsmStr->find("mov\tfp") == 0 &&
2242 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2243 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2244 AsmStr->replace(Pos, 1, ";");
2245 }
2246}
2247
2249 IRBuilder<> &Builder) {
2250 if (Name == "mve.vctp64.old") {
2251 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2252 // correct type.
2253 Value *VCTP = Builder.CreateCall(
2254 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2255 CI->getArgOperand(0), CI->getName());
2256 Value *C1 = Builder.CreateCall(
2258 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2259 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2260 VCTP);
2261 return Builder.CreateCall(
2263 F->getParent(), Intrinsic::arm_mve_pred_i2v,
2264 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2265 C1);
2266 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2267 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2268 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2269 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2270 Name ==
2271 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2272 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2273 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2274 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2275 Name ==
2276 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2277 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2278 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2279 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2280 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2281 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2282 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2283 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2284 std::vector<Type *> Tys;
2285 unsigned ID = CI->getIntrinsicID();
2286 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
2287 switch (ID) {
2288 case Intrinsic::arm_mve_mull_int_predicated:
2289 case Intrinsic::arm_mve_vqdmull_predicated:
2290 case Intrinsic::arm_mve_vldr_gather_base_predicated:
2291 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
2292 break;
2293 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2294 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2295 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2296 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
2297 V2I1Ty};
2298 break;
2299 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2300 Tys = {CI->getType(), CI->getOperand(0)->getType(),
2301 CI->getOperand(1)->getType(), V2I1Ty};
2302 break;
2303 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2304 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2305 CI->getOperand(2)->getType(), V2I1Ty};
2306 break;
2307 case Intrinsic::arm_cde_vcx1q_predicated:
2308 case Intrinsic::arm_cde_vcx1qa_predicated:
2309 case Intrinsic::arm_cde_vcx2q_predicated:
2310 case Intrinsic::arm_cde_vcx2qa_predicated:
2311 case Intrinsic::arm_cde_vcx3q_predicated:
2312 case Intrinsic::arm_cde_vcx3qa_predicated:
2313 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2314 break;
2315 default:
2316 llvm_unreachable("Unhandled Intrinsic!");
2317 }
2318
2319 std::vector<Value *> Ops;
2320 for (Value *Op : CI->args()) {
2321 Type *Ty = Op->getType();
2322 if (Ty->getScalarSizeInBits() == 1) {
2323 Value *C1 = Builder.CreateCall(
2325 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2326 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2327 Op);
2328 Op = Builder.CreateCall(
2329 Intrinsic::getDeclaration(F->getParent(),
2330 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2331 C1);
2332 }
2333 Ops.push_back(Op);
2334 }
2335
2336 Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2337 return Builder.CreateCall(Fn, Ops, CI->getName());
2338 }
2339 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2340}
2341
2342// These are expected to have the arguments:
2343// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
2344//
2345// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
2346//
2348 Function *F, IRBuilder<> &Builder) {
2349 AtomicRMWInst::BinOp RMWOp =
2351 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
2352 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
2353 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
2354 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
2355 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
2356
2357 unsigned NumOperands = CI->getNumOperands();
2358 if (NumOperands < 3) // Malformed bitcode.
2359 return nullptr;
2360
2361 Value *Ptr = CI->getArgOperand(0);
2362 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
2363 if (!PtrTy) // Malformed.
2364 return nullptr;
2365
2366 Value *Val = CI->getArgOperand(1);
2367 if (Val->getType() != CI->getType()) // Malformed.
2368 return nullptr;
2369
2370 ConstantInt *OrderArg = nullptr;
2371 bool IsVolatile = false;
2372
2373 // These should have 5 arguments (plus the callee). A separate version of the
2374 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
2375 if (NumOperands > 3)
2376 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2377
2378 // Ignore scope argument at 3
2379
2380 if (NumOperands > 5) {
2381 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2382 IsVolatile = !VolatileArg || !VolatileArg->isZero();
2383 }
2384
2385 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2386 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2387 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2388 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
2389 Order = AtomicOrdering::SequentiallyConsistent;
2390
2391 LLVMContext &Ctx = F->getContext();
2392
2393 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
2394 Type *RetTy = CI->getType();
2395 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
2396 if (VT->getElementType()->isIntegerTy(16)) {
2397 VectorType *AsBF16 =
2398 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
2399 Val = Builder.CreateBitCast(Val, AsBF16);
2400 }
2401 }
2402
2403 // The scope argument never really worked correctly. Use agent as the most
2404 // conservative option which should still always produce the instruction.
2405 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
2406 AtomicRMWInst *RMW =
2407 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2408
2409 if (PtrTy->getAddressSpace() != 3) {
2410 RMW->setMetadata("amdgpu.no.fine.grained.memory",
2411 MDNode::get(F->getContext(), {}));
2412 }
2413
2414 if (IsVolatile)
2415 RMW->setVolatile(true);
2416
2417 return Builder.CreateBitCast(RMW, RetTy);
2418}
2419
2420/// Helper to unwrap intrinsic call MetadataAsValue operands.
2421template <typename MDType>
2422static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
2423 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
2424 return dyn_cast<MDType>(MAV->getMetadata());
2425 return nullptr;
2426}
2427
2428/// Convert debug intrinsic calls to non-instruction debug records.
2429/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
2430/// \p CI - The debug intrinsic call.
2432 DbgRecord *DR = nullptr;
2433 if (Name == "label") {
2434 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
2435 } else if (Name == "assign") {
2436 DR = new DbgVariableRecord(
2437 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
2438 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
2439 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
2440 CI->getDebugLoc());
2441 } else if (Name == "declare") {
2442 DR = new DbgVariableRecord(
2443 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
2444 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
2445 DbgVariableRecord::LocationType::Declare);
2446 } else if (Name == "addr") {
2447 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
2448 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
2449 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
2450 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
2451 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
2452 CI->getDebugLoc());
2453 } else if (Name == "value") {
2454 // An old version of dbg.value had an extra offset argument.
2455 unsigned VarOp = 1;
2456 unsigned ExprOp = 2;
2457 if (CI->arg_size() == 4) {
2458 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
2459 // Nonzero offset dbg.values get dropped without a replacement.
2460 if (!Offset || !Offset->isZeroValue())
2461 return;
2462 VarOp = 2;
2463 ExprOp = 3;
2464 }
2465 DR = new DbgVariableRecord(
2466 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
2467 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
2468 }
2469 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
2470 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
2471}
2472
2473/// Upgrade a call to an old intrinsic. All argument and return casting must be
2474/// provided to seamlessly integrate with existing context.
2476 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2477 // checks the callee's function type matches. It's likely we need to handle
2478 // type changes here.
2479 Function *F = dyn_cast<Function>(CI->getCalledOperand());
2480 if (!F)
2481 return;
2482
2483 LLVMContext &C = CI->getContext();
2484 IRBuilder<> Builder(C);
2485 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2486
2487 if (!NewFn) {
2488 bool FallthroughToDefaultUpgrade = false;
2489 // Get the Function's name.
2490 StringRef Name = F->getName();
2491
2492 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2493 Name = Name.substr(5);
2494
2495 bool IsX86 = Name.consume_front("x86.");
2496 bool IsNVVM = Name.consume_front("nvvm.");
2497 bool IsARM = Name.consume_front("arm.");
2498 bool IsAMDGCN = Name.consume_front("amdgcn.");
2499 bool IsDbg = Name.consume_front("dbg.");
2500
2501 if (IsX86 && Name.starts_with("sse4a.movnt.")) {
2503 Elts.push_back(
2504 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2505 MDNode *Node = MDNode::get(C, Elts);
2506
2507 Value *Arg0 = CI->getArgOperand(0);
2508 Value *Arg1 = CI->getArgOperand(1);
2509
2510 // Nontemporal (unaligned) store of the 0'th element of the float/double
2511 // vector.
2512 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2513 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2514 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2515 Value *Extract =
2516 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2517
2518 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2519 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2520
2521 // Remove intrinsic.
2522 CI->eraseFromParent();
2523 return;
2524 }
2525
2526 if (IsX86 && (Name.starts_with("avx.movnt.") ||
2527 Name.starts_with("avx512.storent."))) {
2529 Elts.push_back(
2530 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2531 MDNode *Node = MDNode::get(C, Elts);
2532
2533 Value *Arg0 = CI->getArgOperand(0);
2534 Value *Arg1 = CI->getArgOperand(1);
2535
2536 // Convert the type of the pointer to a pointer to the stored type.
2537 Value *BC = Builder.CreateBitCast(Arg0,
2538 PointerType::getUnqual(Arg1->getType()),
2539 "cast");
2540 StoreInst *SI = Builder.CreateAlignedStore(
2541 Arg1, BC,
2543 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2544
2545 // Remove intrinsic.
2546 CI->eraseFromParent();
2547 return;
2548 }
2549
2550 if (IsX86 && Name == "sse2.storel.dq") {
2551 Value *Arg0 = CI->getArgOperand(0);
2552 Value *Arg1 = CI->getArgOperand(1);
2553
2554 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2555 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2556 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2557 Value *BC = Builder.CreateBitCast(Arg0,
2558 PointerType::getUnqual(Elt->getType()),
2559 "cast");
2560 Builder.CreateAlignedStore(Elt, BC, Align(1));
2561
2562 // Remove intrinsic.
2563 CI->eraseFromParent();
2564 return;
2565 }
2566
2567 if (IsX86 && (Name.starts_with("sse.storeu.") ||
2568 Name.starts_with("sse2.storeu.") ||
2569 Name.starts_with("avx.storeu."))) {
2570 Value *Arg0 = CI->getArgOperand(0);
2571 Value *Arg1 = CI->getArgOperand(1);
2572
2573 Arg0 = Builder.CreateBitCast(Arg0,
2574 PointerType::getUnqual(Arg1->getType()),
2575 "cast");
2576 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2577
2578 // Remove intrinsic.
2579 CI->eraseFromParent();
2580 return;
2581 }
2582
2583 if (IsX86 && Name == "avx512.mask.store.ss") {
2584 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2585 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2586 Mask, false);
2587
2588 // Remove intrinsic.
2589 CI->eraseFromParent();
2590 return;
2591 }
2592
2593 if (IsX86 && Name.starts_with("avx512.mask.store")) {
2594 // "avx512.mask.storeu." or "avx512.mask.store."
2595 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2596 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2597 CI->getArgOperand(2), Aligned);
2598
2599 // Remove intrinsic.
2600 CI->eraseFromParent();
2601 return;
2602 }
2603
2604 Value *Rep = nullptr;
2605 // Upgrade packed integer vector compare intrinsics to compare instructions.
2606 if (IsX86 && (Name.starts_with("sse2.pcmp") ||
2607 Name.starts_with("avx2.pcmp"))) {
2608 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2609 bool CmpEq = Name[9] == 'e';
2610 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2611 CI->getArgOperand(0), CI->getArgOperand(1));
2612 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2613 } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) {
2614 Type *ExtTy = Type::getInt32Ty(C);
2615 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2616 ExtTy = Type::getInt64Ty(C);
2617 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2618 ExtTy->getPrimitiveSizeInBits();
2619 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2620 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2621 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2622 Name == "sse2.sqrt.sd")) {
2623 Value *Vec = CI->getArgOperand(0);
2624 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2625 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2626 Intrinsic::sqrt, Elt0->getType());
2627 Elt0 = Builder.CreateCall(Intr, Elt0);
2628 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2629 } else if (IsX86 && (Name.starts_with("avx.sqrt.p") ||
2630 Name.starts_with("sse2.sqrt.p") ||
2631 Name.starts_with("sse.sqrt.p"))) {
2632 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2633 Intrinsic::sqrt,
2634 CI->getType()),
2635 {CI->getArgOperand(0)});
2636 } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) {
2637 if (CI->arg_size() == 4 &&
2638 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2639 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2640 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2641 : Intrinsic::x86_avx512_sqrt_pd_512;
2642
2643 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2645 IID), Args);
2646 } else {
2647 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2648 Intrinsic::sqrt,
2649 CI->getType()),
2650 {CI->getArgOperand(0)});
2651 }
2652 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2653 CI->getArgOperand(1));
2654 } else if (IsX86 && (Name.starts_with("avx512.ptestm") ||
2655 Name.starts_with("avx512.ptestnm"))) {
2656 Value *Op0 = CI->getArgOperand(0);
2657 Value *Op1 = CI->getArgOperand(1);
2658 Value *Mask = CI->getArgOperand(2);
2659 Rep = Builder.CreateAnd(Op0, Op1);
2660 llvm::Type *Ty = Op0->getType();
2662 ICmpInst::Predicate Pred =
2663 Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2664 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2665 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2666 } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){
2667 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2668 ->getNumElements();
2669 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2670 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2671 CI->getArgOperand(1));
2672 } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) {
2673 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2674 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2675 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2676 int Indices[64];
2677 for (unsigned i = 0; i != NumElts; ++i)
2678 Indices[i] = i;
2679
2680 // First extract half of each vector. This gives better codegen than
2681 // doing it in a single shuffle.
2682 LHS =
2683 Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2684 RHS =
2685 Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2686 // Concat the vectors.
2687 // NOTE: Operands have to be swapped to match intrinsic definition.
2688 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2689 Rep = Builder.CreateBitCast(Rep, CI->getType());
2690 } else if (IsX86 && Name == "avx512.kand.w") {
2691 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2692 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2693 Rep = Builder.CreateAnd(LHS, RHS);
2694 Rep = Builder.CreateBitCast(Rep, CI->getType());
2695 } else if (IsX86 && Name == "avx512.kandn.w") {
2696 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2697 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2698 LHS = Builder.CreateNot(LHS);
2699 Rep = Builder.CreateAnd(LHS, RHS);
2700 Rep = Builder.CreateBitCast(Rep, CI->getType());
2701 } else if (IsX86 && Name == "avx512.kor.w") {
2702 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2703 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2704 Rep = Builder.CreateOr(LHS, RHS);
2705 Rep = Builder.CreateBitCast(Rep, CI->getType());
2706 } else if (IsX86 && Name == "avx512.kxor.w") {
2707 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2708 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2709 Rep = Builder.CreateXor(LHS, RHS);
2710 Rep = Builder.CreateBitCast(Rep, CI->getType());
2711 } else if (IsX86 && Name == "avx512.kxnor.w") {
2712 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2713 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2714 LHS = Builder.CreateNot(LHS);
2715 Rep = Builder.CreateXor(LHS, RHS);
2716 Rep = Builder.CreateBitCast(Rep, CI->getType());
2717 } else if (IsX86 && Name == "avx512.knot.w") {
2718 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2719 Rep = Builder.CreateNot(Rep);
2720 Rep = Builder.CreateBitCast(Rep, CI->getType());
2721 } else if (IsX86 &&
2722 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2723 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2724 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2725 Rep = Builder.CreateOr(LHS, RHS);
2726 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2727 Value *C;
2728 if (Name[14] == 'c')
2729 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2730 else
2731 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2732 Rep = Builder.CreateICmpEQ(Rep, C);
2733 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2734 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2735 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2736 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2737 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2738 Type *I32Ty = Type::getInt32Ty(C);
2739 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2740 ConstantInt::get(I32Ty, 0));
2741 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2742 ConstantInt::get(I32Ty, 0));
2743 Value *EltOp;
2744 if (Name.contains(".add."))
2745 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2746 else if (Name.contains(".sub."))
2747 EltOp = Builder.CreateFSub(Elt0, Elt1);
2748 else if (Name.contains(".mul."))
2749 EltOp = Builder.CreateFMul(Elt0, Elt1);
2750 else
2751 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2752 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2753 ConstantInt::get(I32Ty, 0));
2754 } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) {
2755 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2756 bool CmpEq = Name[16] == 'e';
2757 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2758 } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2759 Type *OpTy = CI->getArgOperand(0)->getType();
2760 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2761 Intrinsic::ID IID;
2762 switch (VecWidth) {
2763 default: llvm_unreachable("Unexpected intrinsic");
2764 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2765 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2766 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2767 }
2768
2769 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2770 { CI->getOperand(0), CI->getArgOperand(1) });
2771 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2772 } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) {
2773 Type *OpTy = CI->getArgOperand(0)->getType();
2774 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2775 unsigned EltWidth = OpTy->getScalarSizeInBits();
2776 Intrinsic::ID IID;
2777 if (VecWidth == 128 && EltWidth == 32)
2778 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2779 else if (VecWidth == 256 && EltWidth == 32)
2780 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2781 else if (VecWidth == 512 && EltWidth == 32)
2782 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2783 else if (VecWidth == 128 && EltWidth == 64)
2784 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2785 else if (VecWidth == 256 && EltWidth == 64)
2786 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2787 else if (VecWidth == 512 && EltWidth == 64)
2788 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2789 else
2790 llvm_unreachable("Unexpected intrinsic");
2791
2792 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2793 { CI->getOperand(0), CI->getArgOperand(1) });
2794 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2795 } else if (IsX86 && Name.starts_with("avx512.cmp.p")) {
2796 SmallVector<Value *, 4> Args(CI->args());
2797 Type *OpTy = Args[0]->getType();
2798 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2799 unsigned EltWidth = OpTy->getScalarSizeInBits();
2800 Intrinsic::ID IID;
2801 if (VecWidth == 128 && EltWidth == 32)
2802 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2803 else if (VecWidth == 256 && EltWidth == 32)
2804 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2805 else if (VecWidth == 512 && EltWidth == 32)
2806 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2807 else if (VecWidth == 128 && EltWidth == 64)
2808 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2809 else if (VecWidth == 256 && EltWidth == 64)
2810 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2811 else if (VecWidth == 512 && EltWidth == 64)
2812 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2813 else
2814 llvm_unreachable("Unexpected intrinsic");
2815
2817 if (VecWidth == 512)
2818 std::swap(Mask, Args.back());
2819 Args.push_back(Mask);
2820
2821 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2822 Args);
2823 } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) {
2824 // Integer compare intrinsics.
2825 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2826 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2827 } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) {
2828 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2829 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2830 } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") ||
2831 Name.starts_with("avx512.cvtw2mask.") ||
2832 Name.starts_with("avx512.cvtd2mask.") ||
2833 Name.starts_with("avx512.cvtq2mask."))) {
2834 Value *Op = CI->getArgOperand(0);
2835 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2836 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2837 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2838 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2839 Name == "ssse3.pabs.w.128" ||
2840 Name == "ssse3.pabs.d.128" ||
2841 Name.starts_with("avx2.pabs") ||
2842 Name.starts_with("avx512.mask.pabs"))) {
2843 Rep = upgradeAbs(Builder, *CI);
2844 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2845 Name == "sse2.pmaxs.w" ||
2846 Name == "sse41.pmaxsd" ||
2847 Name.starts_with("avx2.pmaxs") ||
2848 Name.starts_with("avx512.mask.pmaxs"))) {
2849 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2850 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2851 Name == "sse41.pmaxuw" ||
2852 Name == "sse41.pmaxud" ||
2853 Name.starts_with("avx2.pmaxu") ||
2854 Name.starts_with("avx512.mask.pmaxu"))) {
2855 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2856 } else if (IsX86 && (Name == "sse41.pminsb" ||
2857 Name == "sse2.pmins.w" ||
2858 Name == "sse41.pminsd" ||
2859 Name.starts_with("avx2.pmins") ||
2860 Name.starts_with("avx512.mask.pmins"))) {
2861 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2862 } else if (IsX86 && (Name == "sse2.pminu.b" ||
2863 Name == "sse41.pminuw" ||
2864 Name == "sse41.pminud" ||
2865 Name.starts_with("avx2.pminu") ||
2866 Name.starts_with("avx512.mask.pminu"))) {
2867 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2868 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2869 Name == "avx2.pmulu.dq" ||
2870 Name == "avx512.pmulu.dq.512" ||
2871 Name.starts_with("avx512.mask.pmulu.dq."))) {
2872 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2873 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2874 Name == "avx2.pmul.dq" ||
2875 Name == "avx512.pmul.dq.512" ||
2876 Name.starts_with("avx512.mask.pmul.dq."))) {
2877 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2878 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2879 Name == "sse2.cvtsi2sd" ||
2880 Name == "sse.cvtsi642ss" ||
2881 Name == "sse2.cvtsi642sd")) {
2882 Rep = Builder.CreateSIToFP(
2883 CI->getArgOperand(1),
2884 cast<VectorType>(CI->getType())->getElementType());
2885 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2886 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2887 Rep = Builder.CreateUIToFP(
2888 CI->getArgOperand(1),
2889 cast<VectorType>(CI->getType())->getElementType());
2890 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2891 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2892 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2893 Rep = Builder.CreateFPExt(
2894 Rep, cast<VectorType>(CI->getType())->getElementType());
2895 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2896 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2897 Name == "sse2.cvtdq2ps" ||
2898 Name == "avx.cvtdq2.pd.256" ||
2899 Name == "avx.cvtdq2.ps.256" ||
2900 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2901 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2902 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2903 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2904 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2905 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2906 Name == "avx512.mask.cvtqq2ps.256" ||
2907 Name == "avx512.mask.cvtqq2ps.512" ||
2908 Name == "avx512.mask.cvtuqq2ps.256" ||
2909 Name == "avx512.mask.cvtuqq2ps.512" ||
2910 Name == "sse2.cvtps2pd" ||
2911 Name == "avx.cvt.ps2.pd.256" ||
2912 Name == "avx512.mask.cvtps2pd.128" ||
2913 Name == "avx512.mask.cvtps2pd.256")) {
2914 auto *DstTy = cast<FixedVectorType>(CI->getType());
2915 Rep = CI->getArgOperand(0);
2916 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2917
2918 unsigned NumDstElts = DstTy->getNumElements();
2919 if (NumDstElts < SrcTy->getNumElements()) {
2920 assert(NumDstElts == 2 && "Unexpected vector size");
2921 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2922 }
2923
2924 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2925 bool IsUnsigned = Name.contains("cvtu");
2926 if (IsPS2PD)
2927 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2928 else if (CI->arg_size() == 4 &&
2929 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2930 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2931 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2932 : Intrinsic::x86_avx512_sitofp_round;
2934 { DstTy, SrcTy });
2935 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2936 } else {
2937 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2938 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2939 }
2940
2941 if (CI->arg_size() >= 3)
2942 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2943 CI->getArgOperand(1));
2944 } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2945 Name.starts_with("vcvtph2ps."))) {
2946 auto *DstTy = cast<FixedVectorType>(CI->getType());
2947 Rep = CI->getArgOperand(0);
2948 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2949 unsigned NumDstElts = DstTy->getNumElements();
2950 if (NumDstElts != SrcTy->getNumElements()) {
2951 assert(NumDstElts == 4 && "Unexpected vector size");
2952 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2953 }
2954 Rep = Builder.CreateBitCast(
2955 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2956 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2957 if (CI->arg_size() >= 3)
2958 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2959 CI->getArgOperand(1));
2960 } else if (IsX86 && Name.starts_with("avx512.mask.load")) {
2961 // "avx512.mask.loadu." or "avx512.mask.load."
2962 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2963 Rep =
2964 upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2965 CI->getArgOperand(2), Aligned);
2966 } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) {
2967 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2968 Type *PtrTy = ResultTy->getElementType();
2969
2970 // Cast the pointer to element type.
2971 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2973
2974 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2975 ResultTy->getNumElements());
2976
2977 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2978 Intrinsic::masked_expandload,
2979 ResultTy);
2980 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2981 } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) {
2982 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2983 Type *PtrTy = ResultTy->getElementType();
2984
2985 // Cast the pointer to element type.
2986 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2988
2989 Value *MaskVec =
2990 getX86MaskVec(Builder, CI->getArgOperand(2),
2991 cast<FixedVectorType>(ResultTy)->getNumElements());
2992
2993 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2994 Intrinsic::masked_compressstore,
2995 ResultTy);
2996 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2997 } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") ||
2998 Name.starts_with("avx512.mask.expand."))) {
2999 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3000
3001 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3002 ResultTy->getNumElements());
3003
3004 bool IsCompress = Name[12] == 'c';
3005 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3006 : Intrinsic::x86_avx512_mask_expand;
3007 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
3008 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
3009 MaskVec });
3010 } else if (IsX86 && Name.starts_with("xop.vpcom")) {
3011 bool IsSigned;
3012 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3013 Name.ends_with("uq"))
3014 IsSigned = false;
3015 else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") ||
3016 Name.ends_with("q"))
3017 IsSigned = true;
3018 else
3019 llvm_unreachable("Unknown suffix");
3020
3021 unsigned Imm;
3022 if (CI->arg_size() == 3) {
3023 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3024 } else {
3025 Name = Name.substr(9); // strip off "xop.vpcom"
3026 if (Name.starts_with("lt"))
3027 Imm = 0;
3028 else if (Name.starts_with("le"))
3029 Imm = 1;
3030 else if (Name.starts_with("gt"))
3031 Imm = 2;
3032 else if (Name.starts_with("ge"))
3033 Imm = 3;
3034 else if (Name.starts_with("eq"))
3035 Imm = 4;
3036 else if (Name.starts_with("ne"))
3037 Imm = 5;
3038 else if (Name.starts_with("false"))
3039 Imm = 6;
3040 else if (Name.starts_with("true"))
3041 Imm = 7;
3042 else
3043 llvm_unreachable("Unknown condition");
3044 }
3045
3046 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3047 } else if (IsX86 && Name.starts_with("xop.vpcmov")) {
3048 Value *Sel = CI->getArgOperand(2);
3049 Value *NotSel = Builder.CreateNot(Sel);
3050 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3051 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3052 Rep = Builder.CreateOr(Sel0, Sel1);
3053 } else if (IsX86 && (Name.starts_with("xop.vprot") ||
3054 Name.starts_with("avx512.prol") ||
3055 Name.starts_with("avx512.mask.prol"))) {
3056 Rep = upgradeX86Rotate(Builder, *CI, false);
3057 } else if (IsX86 && (Name.starts_with("avx512.pror") ||
3058 Name.starts_with("avx512.mask.pror"))) {
3059 Rep = upgradeX86Rotate(Builder, *CI, true);
3060 } else if (IsX86 && (Name.starts_with("avx512.vpshld.") ||
3061 Name.starts_with("avx512.mask.vpshld") ||
3062 Name.starts_with("avx512.maskz.vpshld"))) {
3063 bool ZeroMask = Name[11] == 'z';
3064 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3065 } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") ||
3066 Name.starts_with("avx512.mask.vpshrd") ||
3067 Name.starts_with("avx512.maskz.vpshrd"))) {
3068 bool ZeroMask = Name[11] == 'z';
3069 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3070 } else if (IsX86 && Name == "sse42.crc32.64.8") {
3071 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
3072 Intrinsic::x86_sse42_crc32_32_8);
3073 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3074 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
3075 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3076 } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") ||
3077 Name.starts_with("avx512.vbroadcast.s"))) {
3078 // Replace broadcasts with a series of insertelements.
3079 auto *VecTy = cast<FixedVectorType>(CI->getType());
3080 Type *EltTy = VecTy->getElementType();
3081 unsigned EltNum = VecTy->getNumElements();
3082 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3083 Type *I32Ty = Type::getInt32Ty(C);
3084 Rep = PoisonValue::get(VecTy);
3085 for (unsigned I = 0; I < EltNum; ++I)
3086 Rep = Builder.CreateInsertElement(Rep, Load,
3087 ConstantInt::get(I32Ty, I));
3088 } else if (IsX86 && (Name.starts_with("sse41.pmovsx") ||
3089 Name.starts_with("sse41.pmovzx") ||
3090 Name.starts_with("avx2.pmovsx") ||
3091 Name.starts_with("avx2.pmovzx") ||
3092 Name.starts_with("avx512.mask.pmovsx") ||
3093 Name.starts_with("avx512.mask.pmovzx"))) {
3094 auto *DstTy = cast<FixedVectorType>(CI->getType());
3095 unsigned NumDstElts = DstTy->getNumElements();
3096
3097 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3098 SmallVector<int, 8> ShuffleMask(NumDstElts);
3099 for (unsigned i = 0; i != NumDstElts; ++i)
3100 ShuffleMask[i] = i;
3101
3102 Value *SV =
3103 Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3104
3105 bool DoSext = Name.contains("pmovsx");
3106 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
3107 : Builder.CreateZExt(SV, DstTy);
3108 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3109 if (CI->arg_size() == 3)
3110 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3111 CI->getArgOperand(1));
3112 } else if (Name == "avx512.mask.pmov.qd.256" ||
3113 Name == "avx512.mask.pmov.qd.512" ||
3114 Name == "avx512.mask.pmov.wb.256" ||
3115 Name == "avx512.mask.pmov.wb.512") {
3116 Type *Ty = CI->getArgOperand(1)->getType();
3117 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3118 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3119 CI->getArgOperand(1));
3120 } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") ||
3121 Name == "avx2.vbroadcasti128")) {
3122 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3123 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3124 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3125 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3126 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
3127 PointerType::getUnqual(VT));
3128 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
3129 if (NumSrcElts == 2)
3130 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3131 else
3132 Rep = Builder.CreateShuffleVector(
3133 Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3134 } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") ||
3135 Name.starts_with("avx512.mask.shuf.f"))) {
3136 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3137 Type *VT = CI->getType();
3138 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3139 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3140 unsigned ControlBitsMask = NumLanes - 1;
3141 unsigned NumControlBits = NumLanes / 2;
3142 SmallVector<int, 8> ShuffleMask(0);
3143
3144 for (unsigned l = 0; l != NumLanes; ++l) {
3145 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3146 // We actually need the other source.
3147 if (l >= NumLanes / 2)
3148 LaneMask += NumLanes;
3149 for (unsigned i = 0; i != NumElementsInLane; ++i)
3150 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3151 }
3152 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3153 CI->getArgOperand(1), ShuffleMask);
3154 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3155 CI->getArgOperand(3));
3156 }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") ||
3157 Name.starts_with("avx512.mask.broadcasti"))) {
3158 unsigned NumSrcElts =
3159 cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3160 ->getNumElements();
3161 unsigned NumDstElts =
3162 cast<FixedVectorType>(CI->getType())->getNumElements();
3163
3164 SmallVector<int, 8> ShuffleMask(NumDstElts);
3165 for (unsigned i = 0; i != NumDstElts; ++i)
3166 ShuffleMask[i] = i % NumSrcElts;
3167
3168 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3169 CI->getArgOperand(0),
3170 ShuffleMask);
3171 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3172 CI->getArgOperand(1));
3173 } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") ||
3174 Name.starts_with("avx2.vbroadcast") ||
3175 Name.starts_with("avx512.pbroadcast") ||
3176 Name.starts_with("avx512.mask.broadcast.s"))) {
3177 // Replace vp?broadcasts with a vector shuffle.
3178 Value *Op = CI->getArgOperand(0);
3179 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3180 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3183 Rep = Builder.CreateShuffleVector(Op, M);
3184
3185 if (CI->arg_size() == 3)
3186 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3187 CI->getArgOperand(1));
3188 } else if (IsX86 && (Name.starts_with("sse2.padds.") ||
3189 Name.starts_with("avx2.padds.") ||
3190 Name.starts_with("avx512.padds.") ||
3191 Name.starts_with("avx512.mask.padds."))) {
3192 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3193 } else if (IsX86 && (Name.starts_with("sse2.psubs.") ||
3194 Name.starts_with("avx2.psubs.") ||
3195 Name.starts_with("avx512.psubs.") ||
3196 Name.starts_with("avx512.mask.psubs."))) {
3197 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3198 } else if (IsX86 && (Name.starts_with("sse2.paddus.") ||
3199 Name.starts_with("avx2.paddus.") ||
3200 Name.starts_with("avx512.mask.paddus."))) {
3201 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3202 } else if (IsX86 && (Name.starts_with("sse2.psubus.") ||
3203 Name.starts_with("avx2.psubus.") ||
3204 Name.starts_with("avx512.mask.psubus."))) {
3205 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3206 } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) {
3208 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3209 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3210 false);
3211 } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) {
3213 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3214 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3215 true);
3216 } else if (IsX86 && (Name == "sse2.psll.dq" ||
3217 Name == "avx2.psll.dq")) {
3218 // 128/256-bit shift left specified in bits.
3219 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3220 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3221 Shift / 8); // Shift is in bits.
3222 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3223 Name == "avx2.psrl.dq")) {
3224 // 128/256-bit shift right specified in bits.
3225 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3226 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3227 Shift / 8); // Shift is in bits.
3228 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3229 Name == "avx2.psll.dq.bs" ||
3230 Name == "avx512.psll.dq.512")) {
3231 // 128/256/512-bit shift left specified in bytes.
3232 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3233 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3234 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3235 Name == "avx2.psrl.dq.bs" ||
3236 Name == "avx512.psrl.dq.512")) {
3237 // 128/256/512-bit shift right specified in bytes.
3238 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3239 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3240 } else if (IsX86 && (Name == "sse41.pblendw" ||
3241 Name.starts_with("sse41.blendp") ||
3242 Name.starts_with("avx.blend.p") ||
3243 Name == "avx2.pblendw" ||
3244 Name.starts_with("avx2.pblendd."))) {
3245 Value *Op0 = CI->getArgOperand(0);
3246 Value *Op1 = CI->getArgOperand(1);
3247 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3248 auto *VecTy = cast<FixedVectorType>(CI->getType());
3249 unsigned NumElts = VecTy->getNumElements();
3250
3251 SmallVector<int, 16> Idxs(NumElts);
3252 for (unsigned i = 0; i != NumElts; ++i)
3253 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3254
3255 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3256 } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") ||
3257 Name == "avx2.vinserti128" ||
3258 Name.starts_with("avx512.mask.insert"))) {
3259 Value *Op0 = CI->getArgOperand(0);
3260 Value *Op1 = CI->getArgOperand(1);
3261 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3262 unsigned DstNumElts =
3263 cast<FixedVectorType>(CI->getType())->getNumElements();
3264 unsigned SrcNumElts =
3265 cast<FixedVectorType>(Op1->getType())->getNumElements();
3266 unsigned Scale = DstNumElts / SrcNumElts;
3267
3268 // Mask off the high bits of the immediate value; hardware ignores those.
3269 Imm = Imm % Scale;
3270
3271 // Extend the second operand into a vector the size of the destination.
3272 SmallVector<int, 8> Idxs(DstNumElts);
3273 for (unsigned i = 0; i != SrcNumElts; ++i)
3274 Idxs[i] = i;
3275 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3276 Idxs[i] = SrcNumElts;
3277 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3278
3279 // Insert the second operand into the first operand.
3280
3281 // Note that there is no guarantee that instruction lowering will actually
3282 // produce a vinsertf128 instruction for the created shuffles. In
3283 // particular, the 0 immediate case involves no lane changes, so it can
3284 // be handled as a blend.
3285
3286 // Example of shuffle mask for 32-bit elements:
3287 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3288 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3289
3290 // First fill with identify mask.
3291 for (unsigned i = 0; i != DstNumElts; ++i)
3292 Idxs[i] = i;
3293 // Then replace the elements where we need to insert.
3294 for (unsigned i = 0; i != SrcNumElts; ++i)
3295 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3296 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3297
3298 // If the intrinsic has a mask operand, handle that.
3299 if (CI->arg_size() == 5)
3300 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3301 CI->getArgOperand(3));
3302 } else if (IsX86 && (Name.starts_with("avx.vextractf128.") ||
3303 Name == "avx2.vextracti128" ||
3304 Name.starts_with("avx512.mask.vextract"))) {
3305 Value *Op0 = CI->getArgOperand(0);
3306 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3307 unsigned DstNumElts =
3308 cast<FixedVectorType>(CI->getType())->getNumElements();
3309 unsigned SrcNumElts =
3310 cast<FixedVectorType>(Op0->getType())->getNumElements();
3311 unsigned Scale = SrcNumElts / DstNumElts;
3312
3313 // Mask off the high bits of the immediate value; hardware ignores those.
3314 Imm = Imm % Scale;
3315
3316 // Get indexes for the subvector of the input vector.
3317 SmallVector<int, 8> Idxs(DstNumElts);
3318 for (unsigned i = 0; i != DstNumElts; ++i) {
3319 Idxs[i] = i + (Imm * DstNumElts);
3320 }
3321 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3322
3323 // If the intrinsic has a mask operand, handle that.
3324 if (CI->arg_size() == 4)
3325 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3326 CI->getArgOperand(2));
3327 } else if (!IsX86 && Name == "stackprotectorcheck") {
3328 Rep = nullptr;
3329 } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") ||
3330 Name.starts_with("avx512.mask.perm.di."))) {
3331 Value *Op0 = CI->getArgOperand(0);
3332 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3333 auto *VecTy = cast<FixedVectorType>(CI->getType());
3334 unsigned NumElts = VecTy->getNumElements();
3335
3336 SmallVector<int, 8> Idxs(NumElts);
3337 for (unsigned i = 0; i != NumElts; ++i)
3338 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3339
3340 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3341
3342 if (CI->arg_size() == 4)
3343 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3344 CI->getArgOperand(2));
3345 } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") ||
3346 Name == "avx2.vperm2i128")) {
3347 // The immediate permute control byte looks like this:
3348 // [1:0] - select 128 bits from sources for low half of destination
3349 // [2] - ignore
3350 // [3] - zero low half of destination
3351 // [5:4] - select 128 bits from sources for high half of destination
3352 // [6] - ignore
3353 // [7] - zero high half of destination
3354
3355 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3356
3357 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3358 unsigned HalfSize = NumElts / 2;
3359 SmallVector<int, 8> ShuffleMask(NumElts);
3360
3361 // Determine which operand(s) are actually in use for this instruction.
3362 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3363 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3364
3365 // If needed, replace operands based on zero mask.
3366 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3367 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3368
3369 // Permute low half of result.
3370 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3371 for (unsigned i = 0; i < HalfSize; ++i)
3372 ShuffleMask[i] = StartIndex + i;
3373
3374 // Permute high half of result.
3375 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3376 for (unsigned i = 0; i < HalfSize; ++i)
3377 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3378
3379 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3380
3381 } else if (IsX86 && (Name.starts_with("avx.vpermil.") ||
3382 Name == "sse2.pshuf.d" ||
3383 Name.starts_with("avx512.mask.vpermil.p") ||
3384 Name.starts_with("avx512.mask.pshuf.d."))) {
3385 Value *Op0 = CI->getArgOperand(0);
3386 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3387 auto *VecTy = cast<FixedVectorType>(CI->getType());
3388 unsigned NumElts = VecTy->getNumElements();
3389 // Calculate the size of each index in the immediate.
3390 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3391 unsigned IdxMask = ((1 << IdxSize) - 1);
3392
3393 SmallVector<int, 8> Idxs(NumElts);
3394 // Lookup the bits for this element, wrapping around the immediate every
3395 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3396 // to offset by the first index of each group.
3397 for (unsigned i = 0; i != NumElts; ++i)
3398 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3399
3400 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3401
3402 if (CI->arg_size() == 4)
3403 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3404 CI->getArgOperand(2));
3405 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3406 Name.starts_with("avx512.mask.pshufl.w."))) {
3407 Value *Op0 = CI->getArgOperand(0);
3408 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3409 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3410
3411 SmallVector<int, 16> Idxs(NumElts);
3412 for (unsigned l = 0; l != NumElts; l += 8) {
3413 for (unsigned i = 0; i != 4; ++i)
3414 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3415 for (unsigned i = 4; i != 8; ++i)
3416 Idxs[i + l] = i + l;
3417 }
3418
3419 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3420
3421 if (CI->arg_size() == 4)
3422 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3423 CI->getArgOperand(2));
3424 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3425 Name.starts_with("avx512.mask.pshufh.w."))) {
3426 Value *Op0 = CI->getArgOperand(0);
3427 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3428 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3429
3430 SmallVector<int, 16> Idxs(NumElts);
3431 for (unsigned l = 0; l != NumElts; l += 8) {
3432 for (unsigned i = 0; i != 4; ++i)
3433 Idxs[i + l] = i + l;
3434 for (unsigned i = 0; i != 4; ++i)
3435 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3436 }
3437
3438 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3439
3440 if (CI->arg_size() == 4)
3441 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3442 CI->getArgOperand(2));
3443 } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) {
3444 Value *Op0 = CI->getArgOperand(0);
3445 Value *Op1 = CI->getArgOperand(1);
3446 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3447 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3448
3449 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3450 unsigned HalfLaneElts = NumLaneElts / 2;
3451
3452 SmallVector<int, 16> Idxs(NumElts);
3453 for (unsigned i = 0; i != NumElts; ++i) {
3454 // Base index is the starting element of the lane.
3455 Idxs[i] = i - (i % NumLaneElts);
3456 // If we are half way through the lane switch to the other source.
3457 if ((i % NumLaneElts) >= HalfLaneElts)
3458 Idxs[i] += NumElts;
3459 // Now select the specific element. By adding HalfLaneElts bits from
3460 // the immediate. Wrapping around the immediate every 8-bits.
3461 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3462 }
3463
3464 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3465
3466 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3467 CI->getArgOperand(3));
3468 } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") ||
3469 Name.starts_with("avx512.mask.movshdup") ||
3470 Name.starts_with("avx512.mask.movsldup"))) {
3471 Value *Op0 = CI->getArgOperand(0);
3472 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3473 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3474
3475 unsigned Offset = 0;
3476 if (Name.starts_with("avx512.mask.movshdup."))
3477 Offset = 1;
3478
3479 SmallVector<int, 16> Idxs(NumElts);
3480 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3481 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3482 Idxs[i + l + 0] = i + l + Offset;
3483 Idxs[i + l + 1] = i + l + Offset;
3484 }
3485
3486 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3487
3488 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3489 CI->getArgOperand(1));
3490 } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") ||
3491 Name.starts_with("avx512.mask.unpckl."))) {
3492 Value *Op0 = CI->getArgOperand(0);
3493 Value *Op1 = CI->getArgOperand(1);
3494 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3495 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3496
3497 SmallVector<int, 64> Idxs(NumElts);
3498 for (int l = 0; l != NumElts; l += NumLaneElts)
3499 for (int i = 0; i != NumLaneElts; ++i)
3500 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3501
3502 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3503
3504 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3505 CI->getArgOperand(2));
3506 } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") ||
3507 Name.starts_with("avx512.mask.unpckh."))) {
3508 Value *Op0 = CI->getArgOperand(0);
3509 Value *Op1 = CI->getArgOperand(1);
3510 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3511 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3512
3513 SmallVector<int, 64> Idxs(NumElts);
3514 for (int l = 0; l != NumElts; l += NumLaneElts)
3515 for (int i = 0; i != NumLaneElts; ++i)
3516 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3517
3518 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3519
3520 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3521 CI->getArgOperand(2));
3522 } else if (IsX86 && (Name.starts_with("avx512.mask.and.") ||
3523 Name.starts_with("avx512.mask.pand."))) {
3524 VectorType *FTy = cast<VectorType>(CI->getType());
3525 VectorType *ITy = VectorType::getInteger(FTy);
3526 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3527 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3528 Rep = Builder.CreateBitCast(Rep, FTy);
3529 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3530 CI->getArgOperand(2));
3531 } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") ||
3532 Name.starts_with("avx512.mask.pandn."))) {
3533 VectorType *FTy = cast<VectorType>(CI->getType());
3534 VectorType *ITy = VectorType::getInteger(FTy);
3535 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3536 Rep = Builder.CreateAnd(Rep,
3537 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3538 Rep = Builder.CreateBitCast(Rep, FTy);
3539 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3540 CI->getArgOperand(2));
3541 } else if (IsX86 && (Name.starts_with("avx512.mask.or.") ||
3542 Name.starts_with("avx512.mask.por."))) {
3543 VectorType *FTy = cast<VectorType>(CI->getType());
3544 VectorType *ITy = VectorType::getInteger(FTy);
3545 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3546 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3547 Rep = Builder.CreateBitCast(Rep, FTy);
3548 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3549 CI->getArgOperand(2));
3550 } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") ||
3551 Name.starts_with("avx512.mask.pxor."))) {
3552 VectorType *FTy = cast<VectorType>(CI->getType());
3553 VectorType *ITy = VectorType::getInteger(FTy);
3554 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3555 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3556 Rep = Builder.CreateBitCast(Rep, FTy);
3557 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3558 CI->getArgOperand(2));
3559 } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) {
3560 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3561 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3562 CI->getArgOperand(2));
3563 } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) {
3564 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3565 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3566 CI->getArgOperand(2));
3567 } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) {
3568 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3569 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3570 CI->getArgOperand(2));
3571 } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) {
3572 if (Name.ends_with(".512")) {
3573 Intrinsic::ID IID;
3574 if (Name[17] == 's')
3575 IID = Intrinsic::x86_avx512_add_ps_512;
3576 else
3577 IID = Intrinsic::x86_avx512_add_pd_512;
3578
3579 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3580 { CI->getArgOperand(0), CI->getArgOperand(1),
3581 CI->getArgOperand(4) });
3582 } else {
3583 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3584 }
3585 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3586 CI->getArgOperand(2));
3587 } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) {
3588 if (Name.ends_with(".512")) {
3589 Intrinsic::ID IID;
3590 if (Name[17] == 's')
3591 IID = Intrinsic::x86_avx512_div_ps_512;
3592 else
3593 IID = Intrinsic::x86_avx512_div_pd_512;
3594
3595 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3596 { CI->getArgOperand(0), CI->getArgOperand(1),
3597 CI->getArgOperand(4) });
3598 } else {
3599 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3600 }
3601 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3602 CI->getArgOperand(2));
3603 } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) {
3604 if (Name.ends_with(".512")) {
3605 Intrinsic::ID IID;
3606 if (Name[17] == 's')
3607 IID = Intrinsic::x86_avx512_mul_ps_512;
3608 else
3609 IID = Intrinsic::x86_avx512_mul_pd_512;
3610
3611 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3612 { CI->getArgOperand(0), CI->getArgOperand(1),
3613 CI->getArgOperand(4) });
3614 } else {
3615 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3616 }
3617 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3618 CI->getArgOperand(2));
3619 } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) {
3620 if (Name.ends_with(".512")) {
3621 Intrinsic::ID IID;
3622 if (Name[17] == 's')
3623 IID = Intrinsic::x86_avx512_sub_ps_512;
3624 else
3625 IID = Intrinsic::x86_avx512_sub_pd_512;
3626
3627 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3628 { CI->getArgOperand(0), CI->getArgOperand(1),
3629 CI->getArgOperand(4) });
3630 } else {
3631 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3632 }
3633 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3634 CI->getArgOperand(2));
3635 } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") ||
3636 Name.starts_with("avx512.mask.min.p")) &&
3637 Name.drop_front(18) == ".512") {
3638 bool IsDouble = Name[17] == 'd';
3639 bool IsMin = Name[13] == 'i';
3640 static const Intrinsic::ID MinMaxTbl[2][2] = {
3641 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3642 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3643 };
3644 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3645
3646 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3647 { CI->getArgOperand(0), CI->getArgOperand(1),
3648 CI->getArgOperand(4) });
3649 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3650 CI->getArgOperand(2));
3651 } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) {
3652 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3653 Intrinsic::ctlz,
3654 CI->getType()),
3655 { CI->getArgOperand(0), Builder.getInt1(false) });
3656 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3657 CI->getArgOperand(1));
3658 } else if (IsX86 && Name.starts_with("avx512.mask.psll")) {
3659 bool IsImmediate = Name[16] == 'i' ||
3660 (Name.size() > 18 && Name[18] == 'i');
3661 bool IsVariable = Name[16] == 'v';
3662 char Size = Name[16] == '.' ? Name[17] :
3663 Name[17] == '.' ? Name[18] :
3664 Name[18] == '.' ? Name[19] :
3665 Name[20];
3666
3667 Intrinsic::ID IID;
3668 if (IsVariable && Name[17] != '.') {
3669 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3670 IID = Intrinsic::x86_avx2_psllv_q;
3671 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3672 IID = Intrinsic::x86_avx2_psllv_q_256;
3673 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3674 IID = Intrinsic::x86_avx2_psllv_d;
3675 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3676 IID = Intrinsic::x86_avx2_psllv_d_256;
3677 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3678 IID = Intrinsic::x86_avx512_psllv_w_128;
3679 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3680 IID = Intrinsic::x86_avx512_psllv_w_256;
3681 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3682 IID = Intrinsic::x86_avx512_psllv_w_512;
3683 else
3684 llvm_unreachable("Unexpected size");
3685 } else if (Name.ends_with(".128")) {
3686 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3687 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3688 : Intrinsic::x86_sse2_psll_d;
3689 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3690 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3691 : Intrinsic::x86_sse2_psll_q;
3692 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3693 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3694 : Intrinsic::x86_sse2_psll_w;
3695 else
3696 llvm_unreachable("Unexpected size");
3697 } else if (Name.ends_with(".256")) {
3698 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3699 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3700 : Intrinsic::x86_avx2_psll_d;
3701 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3702 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3703 : Intrinsic::x86_avx2_psll_q;
3704 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3705 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3706 : Intrinsic::x86_avx2_psll_w;
3707 else
3708 llvm_unreachable("Unexpected size");
3709 } else {
3710 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3711 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3712 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3713 Intrinsic::x86_avx512_psll_d_512;
3714 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3715 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3716 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3717 Intrinsic::x86_avx512_psll_q_512;
3718 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3719 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3720 : Intrinsic::x86_avx512_psll_w_512;
3721 else
3722 llvm_unreachable("Unexpected size");
3723 }
3724
3725 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3726 } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) {
3727 bool IsImmediate = Name[16] == 'i' ||
3728 (Name.size() > 18 && Name[18] == 'i');
3729 bool IsVariable = Name[16] == 'v';
3730 char Size = Name[16] == '.' ? Name[17] :
3731 Name[17] == '.' ? Name[18] :
3732 Name[18] == '.' ? Name[19] :
3733 Name[20];
3734
3735 Intrinsic::ID IID;
3736 if (IsVariable && Name[17] != '.') {
3737 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3738 IID = Intrinsic::x86_avx2_psrlv_q;
3739 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3740 IID = Intrinsic::x86_avx2_psrlv_q_256;
3741 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3742 IID = Intrinsic::x86_avx2_psrlv_d;
3743 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3744 IID = Intrinsic::x86_avx2_psrlv_d_256;
3745 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3746 IID = Intrinsic::x86_avx512_psrlv_w_128;
3747 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3748 IID = Intrinsic::x86_avx512_psrlv_w_256;
3749 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3750 IID = Intrinsic::x86_avx512_psrlv_w_512;
3751 else
3752 llvm_unreachable("Unexpected size");
3753 } else if (Name.ends_with(".128")) {
3754 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3755 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3756 : Intrinsic::x86_sse2_psrl_d;
3757 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3758 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3759 : Intrinsic::x86_sse2_psrl_q;
3760 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3761 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3762 : Intrinsic::x86_sse2_psrl_w;
3763 else
3764 llvm_unreachable("Unexpected size");
3765 } else if (Name.ends_with(".256")) {
3766 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3767 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3768 : Intrinsic::x86_avx2_psrl_d;
3769 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3770 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3771 : Intrinsic::x86_avx2_psrl_q;
3772 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3773 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3774 : Intrinsic::x86_avx2_psrl_w;
3775 else
3776 llvm_unreachable("Unexpected size");
3777 } else {
3778 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3779 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3780 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3781 Intrinsic::x86_avx512_psrl_d_512;
3782 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3783 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3784 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3785 Intrinsic::x86_avx512_psrl_q_512;
3786 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3787 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3788 : Intrinsic::x86_avx512_psrl_w_512;
3789 else
3790 llvm_unreachable("Unexpected size");
3791 }
3792
3793 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3794 } else if (IsX86 && Name.starts_with("avx512.mask.psra")) {
3795 bool IsImmediate = Name[16] == 'i' ||
3796 (Name.size() > 18 && Name[18] == 'i');
3797 bool IsVariable = Name[16] == 'v';
3798 char Size = Name[16] == '.' ? Name[17] :
3799 Name[17] == '.' ? Name[18] :
3800 Name[18] == '.' ? Name[19] :
3801 Name[20];
3802
3803 Intrinsic::ID IID;
3804 if (IsVariable && Name[17] != '.') {
3805 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3806 IID = Intrinsic::x86_avx2_psrav_d;
3807 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3808 IID = Intrinsic::x86_avx2_psrav_d_256;
3809 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3810 IID = Intrinsic::x86_avx512_psrav_w_128;
3811 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3812 IID = Intrinsic::x86_avx512_psrav_w_256;
3813 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3814 IID = Intrinsic::x86_avx512_psrav_w_512;
3815 else
3816 llvm_unreachable("Unexpected size");
3817 } else if (Name.ends_with(".128")) {
3818 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3819 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3820 : Intrinsic::x86_sse2_psra_d;
3821 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3822 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3823 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3824 Intrinsic::x86_avx512_psra_q_128;
3825 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3826 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3827 : Intrinsic::x86_sse2_psra_w;
3828 else
3829 llvm_unreachable("Unexpected size");
3830 } else if (Name.ends_with(".256")) {
3831 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3832 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3833 : Intrinsic::x86_avx2_psra_d;
3834 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3835 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3836 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3837 Intrinsic::x86_avx512_psra_q_256;
3838 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3839 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3840 : Intrinsic::x86_avx2_psra_w;
3841 else
3842 llvm_unreachable("Unexpected size");
3843 } else {
3844 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3845 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3846 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3847 Intrinsic::x86_avx512_psra_d_512;
3848 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3849 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3850 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3851 Intrinsic::x86_avx512_psra_q_512;
3852 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3853 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3854 : Intrinsic::x86_avx512_psra_w_512;
3855 else
3856 llvm_unreachable("Unexpected size");
3857 }
3858
3859 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3860 } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) {
3861 Rep = upgradeMaskedMove(Builder, *CI);
3862 } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) {
3863 Rep = upgradeMaskToInt(Builder, *CI);
3864 } else if (IsX86 && Name.ends_with(".movntdqa")) {
3865 MDNode *Node = MDNode::get(
3866 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3867
3868 Value *Ptr = CI->getArgOperand(0);
3869
3870 // Convert the type of the pointer to a pointer to the stored type.
3871 Value *BC = Builder.CreateBitCast(
3872 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3873 LoadInst *LI = Builder.CreateAlignedLoad(
3874 CI->getType(), BC,
3876 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3877 Rep = LI;
3878 } else if (IsX86 && (Name.starts_with("fma.vfmadd.") ||
3879 Name.starts_with("fma.vfmsub.") ||
3880 Name.starts_with("fma.vfnmadd.") ||
3881 Name.starts_with("fma.vfnmsub."))) {
3882 bool NegMul = Name[6] == 'n';
3883 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3884 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3885
3886 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3887 CI->getArgOperand(2) };
3888
3889 if (IsScalar) {
3890 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3891 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3892 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3893 }
3894
3895 if (NegMul && !IsScalar)
3896 Ops[0] = Builder.CreateFNeg(Ops[0]);
3897 if (NegMul && IsScalar)
3898 Ops[1] = Builder.CreateFNeg(Ops[1]);
3899 if (NegAcc)
3900 Ops[2] = Builder.CreateFNeg(Ops[2]);
3901
3903 Intrinsic::fma,
3904 Ops[0]->getType()),
3905 Ops);
3906
3907 if (IsScalar)
3908 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3909 (uint64_t)0);
3910 } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) {
3911 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3912 CI->getArgOperand(2) };
3913
3914 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3915 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3916 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3917
3919 Intrinsic::fma,
3920 Ops[0]->getType()),
3921 Ops);
3922
3924 Rep, (uint64_t)0);
3925 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") ||
3926 Name.starts_with("avx512.maskz.vfmadd.s") ||
3927 Name.starts_with("avx512.mask3.vfmadd.s") ||
3928 Name.starts_with("avx512.mask3.vfmsub.s") ||
3929 Name.starts_with("avx512.mask3.vfnmsub.s"))) {
3930 bool IsMask3 = Name[11] == '3';
3931 bool IsMaskZ = Name[11] == 'z';
3932 // Drop the "avx512.mask." to make it easier.
3933 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3934 bool NegMul = Name[2] == 'n';
3935 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3936
3937 Value *A = CI->getArgOperand(0);
3938 Value *B = CI->getArgOperand(1);
3939 Value *C = CI->getArgOperand(2);
3940
3941 if (NegMul && (IsMask3 || IsMaskZ))
3942 A = Builder.CreateFNeg(A);
3943 if (NegMul && !(IsMask3 || IsMaskZ))
3944 B = Builder.CreateFNeg(B);
3945 if (NegAcc)
3946 C = Builder.CreateFNeg(C);
3947
3948 A = Builder.CreateExtractElement(A, (uint64_t)0);
3949 B = Builder.CreateExtractElement(B, (uint64_t)0);
3950 C = Builder.CreateExtractElement(C, (uint64_t)0);
3951
3952 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3953 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3954 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3955
3956 Intrinsic::ID IID;
3957 if (Name.back() == 'd')
3958 IID = Intrinsic::x86_avx512_vfmadd_f64;
3959 else
3960 IID = Intrinsic::x86_avx512_vfmadd_f32;
3961 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3962 Rep = Builder.CreateCall(FMA, Ops);
3963 } else {
3965 Intrinsic::fma,
3966 A->getType());
3967 Rep = Builder.CreateCall(FMA, { A, B, C });
3968 }
3969
3970 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3971 IsMask3 ? C : A;
3972
3973 // For Mask3 with NegAcc, we need to create a new extractelement that
3974 // avoids the negation above.
3975 if (NegAcc && IsMask3)
3976 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3977 (uint64_t)0);
3978
3979 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3980 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3981 Rep, (uint64_t)0);
3982 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") ||
3983 Name.starts_with("avx512.mask.vfnmadd.p") ||
3984 Name.starts_with("avx512.mask.vfnmsub.p") ||
3985 Name.starts_with("avx512.mask3.vfmadd.p") ||
3986 Name.starts_with("avx512.mask3.vfmsub.p") ||
3987 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3988 Name.starts_with("avx512.maskz.vfmadd.p"))) {
3989 bool IsMask3 = Name[11] == '3';
3990 bool IsMaskZ = Name[11] == 'z';
3991 // Drop the "avx512.mask." to make it easier.
3992 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3993 bool NegMul = Name[2] == 'n';
3994 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3995
3996 Value *A = CI->getArgOperand(0);
3997 Value *B = CI->getArgOperand(1);
3998 Value *C = CI->getArgOperand(2);
3999
4000 if (NegMul && (IsMask3 || IsMaskZ))
4001 A = Builder.CreateFNeg(A);
4002 if (NegMul && !(IsMask3 || IsMaskZ))
4003 B = Builder.CreateFNeg(B);
4004 if (NegAcc)
4005 C = Builder.CreateFNeg(C);
4006
4007 if (CI->arg_size() == 5 &&
4008 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4009 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4010 Intrinsic::ID IID;
4011 // Check the character before ".512" in string.
4012 if (Name[Name.size()-5] == 's')
4013 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4014 else
4015 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4016
4017 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
4018 { A, B, C, CI->getArgOperand(4) });
4019 } else {
4021 Intrinsic::fma,
4022 A->getType());
4023 Rep = Builder.CreateCall(FMA, { A, B, C });
4024 }
4025
4026 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
4027 IsMask3 ? CI->getArgOperand(2) :
4028 CI->getArgOperand(0);
4029
4030 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4031 } else if (IsX86 && Name.starts_with("fma.vfmsubadd.p")) {
4032 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4033 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4034 Intrinsic::ID IID;
4035 if (VecWidth == 128 && EltWidth == 32)
4036 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4037 else if (VecWidth == 256 && EltWidth == 32)
4038 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4039 else if (VecWidth == 128 && EltWidth == 64)
4040 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4041 else if (VecWidth == 256 && EltWidth == 64)
4042 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4043 else
4044 llvm_unreachable("Unexpected intrinsic");
4045
4046 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4047 CI->getArgOperand(2) };
4048 Ops[2] = Builder.CreateFNeg(Ops[2]);
4049 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
4050 Ops);
4051 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4052 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4053 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4054 Name.starts_with("avx512.mask3.vfmsubadd.p"))) {
4055 bool IsMask3 = Name[11] == '3';
4056 bool IsMaskZ = Name[11] == 'z';
4057 // Drop the "avx512.mask." to make it easier.
4058 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4059 bool IsSubAdd = Name[3] == 's';
4060 if (CI->arg_size() == 5) {
4061 Intrinsic::ID IID;
4062 // Check the character before ".512" in string.
4063 if (Name[Name.size()-5] == 's')
4064 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4065 else
4066 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4067
4068 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4069 CI->getArgOperand(2), CI->getArgOperand(4) };
4070 if (IsSubAdd)
4071 Ops[2] = Builder.CreateFNeg(Ops[2]);
4072
4073 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
4074 Ops);
4075 } else {
4076 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4077
4078 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4079 CI->getArgOperand(2) };
4080
4081 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
4082 Ops[0]->getType());
4083 Value *Odd = Builder.CreateCall(FMA, Ops);
4084 Ops[2] = Builder.CreateFNeg(Ops[2]);
4085 Value *Even = Builder.CreateCall(FMA, Ops);
4086
4087 if (IsSubAdd)
4088 std::swap(Even, Odd);
4089
4090 SmallVector<int, 32> Idxs(NumElts);
4091 for (int i = 0; i != NumElts; ++i)
4092 Idxs[i] = i + (i % 2) * NumElts;
4093
4094 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4095 }
4096
4097 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
4098 IsMask3 ? CI->getArgOperand(2) :
4099 CI->getArgOperand(0);
4100
4101 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4102 } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") ||
4103 Name.starts_with("avx512.maskz.pternlog."))) {
4104 bool ZeroMask = Name[11] == 'z';
4105 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4106 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4107 Intrinsic::ID IID;
4108 if (VecWidth == 128 && EltWidth == 32)
4109 IID = Intrinsic::x86_avx512_pternlog_d_128;
4110 else if (VecWidth == 256 && EltWidth == 32)
4111 IID = Intrinsic::x86_avx512_pternlog_d_256;
4112 else if (VecWidth == 512 && EltWidth == 32)
4113 IID = Intrinsic::x86_avx512_pternlog_d_512;
4114 else if (VecWidth == 128 && EltWidth == 64)
4115 IID = Intrinsic::x86_avx512_pternlog_q_128;
4116 else if (VecWidth == 256 && EltWidth == 64)
4117 IID = Intrinsic::x86_avx512_pternlog_q_256;
4118 else if (VecWidth == 512 && EltWidth == 64)
4119 IID = Intrinsic::x86_avx512_pternlog_q_512;
4120 else
4121 llvm_unreachable("Unexpected intrinsic");
4122
4123 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4124 CI->getArgOperand(2), CI->getArgOperand(3) };
4125 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4126 Args);
4127 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4128 : CI->getArgOperand(0);
4129 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4130 } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") ||
4131 Name.starts_with("avx512.maskz.vpmadd52"))) {
4132 bool ZeroMask = Name[11] == 'z';
4133 bool High = Name[20] == 'h' || Name[21] == 'h';
4134 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4135 Intrinsic::ID IID;
4136 if (VecWidth == 128 && !High)
4137 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4138 else if (VecWidth == 256 && !High)
4139 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4140 else if (VecWidth == 512 && !High)
4141 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4142 else if (VecWidth == 128 && High)
4143 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4144 else if (VecWidth == 256 && High)
4145 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4146 else if (VecWidth == 512 && High)
4147 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4148 else
4149 llvm_unreachable("Unexpected intrinsic");
4150
4151 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4152 CI->getArgOperand(2) };
4153 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4154 Args);
4155 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4156 : CI->getArgOperand(0);
4157 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4158 } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") ||
4159 Name.starts_with("avx512.mask.vpermt2var.") ||
4160 Name.starts_with("avx512.maskz.vpermt2var."))) {
4161 bool ZeroMask = Name[11] == 'z';
4162 bool IndexForm = Name[17] == 'i';
4163 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4164 } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") ||
4165 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4166 Name.starts_with("avx512.mask.vpdpbusds.") ||
4167 Name.starts_with("avx512.maskz.vpdpbusds."))) {
4168 bool ZeroMask = Name[11] == 'z';
4169 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4170 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4171 Intrinsic::ID IID;
4172 if (VecWidth == 128 && !IsSaturating)
4173 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4174 else if (VecWidth == 256 && !IsSaturating)
4175 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4176 else if (VecWidth == 512 && !IsSaturating)
4177 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4178 else if (VecWidth == 128 && IsSaturating)
4179 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4180 else if (VecWidth == 256 && IsSaturating)
4181 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4182 else if (VecWidth == 512 && IsSaturating)
4183 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4184 else
4185 llvm_unreachable("Unexpected intrinsic");
4186
4187 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4188 CI->getArgOperand(2) };
4189 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4190 Args);
4191 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4192 : CI->getArgOperand(0);
4193 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4194 } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") ||
4195 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4196 Name.starts_with("avx512.mask.vpdpwssds.") ||
4197 Name.starts_with("avx512.maskz.vpdpwssds."))) {
4198 bool ZeroMask = Name[11] == 'z';
4199 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4200 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4201 Intrinsic::ID IID;
4202 if (VecWidth == 128 && !IsSaturating)
4203 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4204 else if (VecWidth == 256 && !IsSaturating)
4205 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4206 else if (VecWidth == 512 && !IsSaturating)
4207 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4208 else if (VecWidth == 128 && IsSaturating)
4209 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4210 else if (VecWidth == 256 && IsSaturating)
4211 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4212 else if (VecWidth == 512 && IsSaturating)
4213 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4214 else
4215 llvm_unreachable("Unexpected intrinsic");
4216
4217 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4218 CI->getArgOperand(2) };
4219 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4220 Args);
4221 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4222 : CI->getArgOperand(0);
4223 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4224 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4225 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4226 Name == "subborrow.u32" || Name == "subborrow.u64")) {
4227 Intrinsic::ID IID;
4228 if (Name[0] == 'a' && Name.back() == '2')
4229 IID = Intrinsic::x86_addcarry_32;
4230 else if (Name[0] == 'a' && Name.back() == '4')
4231 IID = Intrinsic::x86_addcarry_64;
4232 else if (Name[0] == 's' && Name.back() == '2')
4233 IID = Intrinsic::x86_subborrow_32;
4234 else if (Name[0] == 's' && Name.back() == '4')
4235 IID = Intrinsic::x86_subborrow_64;
4236 else
4237 llvm_unreachable("Unexpected intrinsic");
4238
4239 // Make a call with 3 operands.
4240 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4241 CI->getArgOperand(2)};
4242 Value *NewCall = Builder.CreateCall(
4244 Args);
4245
4246 // Extract the second result and store it.
4247 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4248 // Cast the pointer to the right type.
4249 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
4250 llvm::PointerType::getUnqual(Data->getType()));
4251 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4252 // Replace the original call result with the first result of the new call.
4253 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4254
4255 CI->replaceAllUsesWith(CF);
4256 Rep = nullptr;
4257 } else if (IsX86 && Name.starts_with("avx512.mask.") &&
4258 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4259 // Rep will be updated by the call in the condition.
4260 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4261 Value *Arg = CI->getArgOperand(0);
4262 Value *Neg = Builder.CreateNeg(Arg, "neg");
4263 Value *Cmp = Builder.CreateICmpSGE(
4264 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4265 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4266 } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4267 Name.starts_with("atomic.load.add.f64.p"))) {
4268 Value *Ptr = CI->getArgOperand(0);
4269 Value *Val = CI->getArgOperand(1);
4270 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4271 AtomicOrdering::SequentiallyConsistent);
4272 } else if (IsNVVM && Name.consume_front("max.") &&
4273 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4274 Name == "ui" || Name == "ull")) {
4275 Value *Arg0 = CI->getArgOperand(0);
4276 Value *Arg1 = CI->getArgOperand(1);
4277 Value *Cmp = Name.starts_with("u")
4278 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4279 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4280 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4281 } else if (IsNVVM && Name.consume_front("min.") &&
4282 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4283 Name == "ui" || Name == "ull")) {
4284 Value *Arg0 = CI->getArgOperand(0);
4285 Value *Arg1 = CI->getArgOperand(1);
4286 Value *Cmp = Name.starts_with("u")
4287 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4288 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4289 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4290 } else if (IsNVVM && Name == "clz.ll") {
4291 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4292 Value *Arg = CI->getArgOperand(0);
4293 Value *Ctlz = Builder.CreateCall(
4294 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4295 {Arg->getType()}),
4296 {Arg, Builder.getFalse()}, "ctlz");
4297 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4298 } else if (IsNVVM && Name == "popc.ll") {
4299 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4300 // i64.
4301 Value *Arg = CI->getArgOperand(0);
4302 Value *Popc = Builder.CreateCall(
4303 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4304 {Arg->getType()}),
4305 Arg, "ctpop");
4306 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4307 } else if (IsNVVM) {
4308 if (Name == "h2f") {
4309 Rep =
4311 F->getParent(), Intrinsic::convert_from_fp16,
4312 {Builder.getFloatTy()}),
4313 CI->getArgOperand(0), "h2f");
4314 } else {
4316 if (IID != Intrinsic::not_intrinsic &&
4317 !F->getReturnType()->getScalarType()->isBFloatTy()) {
4318 rename(F);
4319 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4321 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4322 Value *Arg = CI->getArgOperand(I);
4323 Type *OldType = Arg->getType();
4324 Type *NewType = NewFn->getArg(I)->getType();
4325 Args.push_back((OldType->isIntegerTy() &&
4326 NewType->getScalarType()->isBFloatTy())
4327 ? Builder.CreateBitCast(Arg, NewType)
4328 : Arg);
4329 }
4330 Rep = Builder.CreateCall(NewFn, Args);
4331 if (F->getReturnType()->isIntegerTy())
4332 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4333 }
4334 }
4335 } else if (IsARM) {
4336 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4337 } else if (IsAMDGCN) {
4338 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4339 } else if (IsDbg) {
4340 // We might have decided we don't want the new format after all between
4341 // first requesting the upgrade and now; skip the conversion if that is
4342 // the case, and check here to see if the intrinsic needs to be upgraded
4343 // normally.
4344 if (!CI->getModule()->IsNewDbgInfoFormat) {
4345 bool NeedsUpgrade =
4346 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4347 if (!NeedsUpgrade)
4348 return;
4349 FallthroughToDefaultUpgrade = true;
4350 } else {
4352 }
4353 } else {
4354 llvm_unreachable("Unknown function for CallBase upgrade.");
4355 }
4356
4357 if (!FallthroughToDefaultUpgrade) {
4358 if (Rep)
4359 CI->replaceAllUsesWith(Rep);
4360 CI->eraseFromParent();
4361 return;
4362 }
4363 }
4364
4365 const auto &DefaultCase = [&]() -> void {
4366 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4367 // Handle generic mangling change.
4368 assert(
4369 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4370 "Unknown function for CallBase upgrade and isn't just a name change");
4371 CI->setCalledFunction(NewFn);
4372 return;
4373 }
4374
4375 // This must be an upgrade from a named to a literal struct.
4376 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4377 assert(OldST != NewFn->getReturnType() &&
4378 "Return type must have changed");
4379 assert(OldST->getNumElements() ==
4380 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4381 "Must have same number of elements");
4382
4383 SmallVector<Value *> Args(CI->args());
4384 Value *NewCI = Builder.CreateCall(NewFn, Args);
4385 Value *Res = PoisonValue::get(OldST);
4386 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4387 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4388 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4389 }
4390 CI->replaceAllUsesWith(Res);
4391 CI->eraseFromParent();
4392 return;
4393 }
4394
4395 // We're probably about to produce something invalid. Let the verifier catch
4396 // it instead of dying here.
4397 CI->setCalledOperand(
4399 return;
4400 };
4401 CallInst *NewCall = nullptr;
4402 switch (NewFn->getIntrinsicID()) {
4403 default: {
4404 DefaultCase();
4405 return;
4406 }
4407 case Intrinsic::arm_neon_vst1:
4408 case Intrinsic::arm_neon_vst2:
4409 case Intrinsic::arm_neon_vst3:
4410 case Intrinsic::arm_neon_vst4:
4411 case Intrinsic::arm_neon_vst2lane:
4412 case Intrinsic::arm_neon_vst3lane:
4413 case Intrinsic::arm_neon_vst4lane: {
4414 SmallVector<Value *, 4> Args(CI->args());
4415 NewCall = Builder.CreateCall(NewFn, Args);
4416 break;
4417 }
4418 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4419 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4420 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4421 LLVMContext &Ctx = F->getParent()->getContext();
4422 SmallVector<Value *, 4> Args(CI->args());
4423 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4424 cast<ConstantInt>(Args[3])->getZExtValue());
4425 NewCall = Builder.CreateCall(NewFn, Args);
4426 break;
4427 }
4428 case Intrinsic::aarch64_sve_ld3_sret:
4429 case Intrinsic::aarch64_sve_ld4_sret:
4430 case Intrinsic::aarch64_sve_ld2_sret: {
4431 StringRef Name = F->getName();
4432 Name = Name.substr(5);
4433 unsigned N = StringSwitch<unsigned>(Name)
4434 .StartsWith("aarch64.sve.ld2", 2)
4435 .StartsWith("aarch64.sve.ld3", 3)
4436 .StartsWith("aarch64.sve.ld4", 4)
4437 .Default(0);
4438 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4439 unsigned MinElts = RetTy->getMinNumElements() / N;
4440 SmallVector<Value *, 2> Args(CI->args());
4441 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4443 for (unsigned I = 0; I < N; I++) {
4444 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4445 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4446 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4447 }
4448 NewCall = dyn_cast<CallInst>(Ret);
4449 break;
4450 }
4451
4452 case Intrinsic::coro_end: {
4453 SmallVector<Value *, 3> Args(CI->args());
4454 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4455 NewCall = Builder.CreateCall(NewFn, Args);
4456 break;
4457 }
4458
4459 case Intrinsic::vector_extract: {
4460 StringRef Name = F->getName();
4461 Name = Name.substr(5); // Strip llvm
4462 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4463 DefaultCase();
4464 return;
4465 }
4466 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4467 unsigned MinElts = RetTy->getMinNumElements();
4468 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4469 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4470 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4471 break;
4472 }
4473
4474 case Intrinsic::vector_insert: {
4475 StringRef Name = F->getName();
4476 Name = Name.substr(5);
4477 if (!Name.starts_with("aarch64.sve.tuple")) {
4478 DefaultCase();
4479 return;
4480 }
4481 if (Name.starts_with("aarch64.sve.tuple.set")) {
4482 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4483 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4484 Value *NewIdx =
4485 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4486 NewCall = Builder.CreateCall(
4487 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4488 break;
4489 }
4490 if (Name.starts_with("aarch64.sve.tuple.create")) {
4491 unsigned N = StringSwitch<unsigned>(Name)
4492 .StartsWith("aarch64.sve.tuple.create2", 2)
4493 .StartsWith("aarch64.sve.tuple.create3", 3)
4494 .StartsWith("aarch64.sve.tuple.create4", 4)
4495 .Default(0);
4496 assert(N > 1 && "Create is expected to be between 2-4");
4497 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4499 unsigned MinElts = RetTy->getMinNumElements() / N;
4500 for (unsigned I = 0; I < N; I++) {
4501 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4502 Value *V = CI->getArgOperand(I);
4503 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4504 }
4505 NewCall = dyn_cast<CallInst>(Ret);
4506 }
4507 break;
4508 }
4509
4510 case Intrinsic::arm_neon_bfdot:
4511 case Intrinsic::arm_neon_bfmmla:
4512 case Intrinsic::arm_neon_bfmlalb:
4513 case Intrinsic::arm_neon_bfmlalt:
4514 case Intrinsic::aarch64_neon_bfdot:
4515 case Intrinsic::aarch64_neon_bfmmla:
4516 case Intrinsic::aarch64_neon_bfmlalb:
4517 case Intrinsic::aarch64_neon_bfmlalt: {
4519 assert(CI->arg_size() == 3 &&
4520 "Mismatch between function args and call args");
4521 size_t OperandWidth =
4523 assert((OperandWidth == 64 || OperandWidth == 128) &&
4524 "Unexpected operand width");
4525 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4526 auto Iter = CI->args().begin();
4527 Args.push_back(*Iter++);
4528 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4529 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4530 NewCall = Builder.CreateCall(NewFn, Args);
4531 break;
4532 }
4533
4534 case Intrinsic::bitreverse:
4535 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4536 break;
4537
4538 case Intrinsic::ctlz:
4539 case Intrinsic::cttz:
4540 assert(CI->arg_size() == 1 &&
4541 "Mismatch between function args and call args");
4542 NewCall =
4543 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4544 break;
4545
4546 case Intrinsic::objectsize: {
4547 Value *NullIsUnknownSize =
4548 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4549 Value *Dynamic =
4550 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4551 NewCall = Builder.CreateCall(
4552 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4553 break;
4554 }
4555
4556 case Intrinsic::ctpop:
4557 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4558 break;
4559
4560 case Intrinsic::convert_from_fp16:
4561 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4562 break;
4563
4564 case Intrinsic::dbg_value: {
4565 StringRef Name = F->getName();
4566 Name = Name.substr(5); // Strip llvm.
4567 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4568 if (Name.starts_with("dbg.addr")) {
4569 DIExpression *Expr = cast<DIExpression>(
4570 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4571 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4572 NewCall =
4573 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4574 MetadataAsValue::get(C, Expr)});
4575 break;
4576 }
4577
4578 // Upgrade from the old version that had an extra offset argument.
4579 assert(CI->arg_size() == 4);
4580 // Drop nonzero offsets instead of attempting to upgrade them.
4581 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4582 if (Offset->isZeroValue()) {
4583 NewCall = Builder.CreateCall(
4584 NewFn,
4585 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4586 break;
4587 }
4588 CI->eraseFromParent();
4589 return;
4590 }
4591
4592 case Intrinsic::ptr_annotation:
4593 // Upgrade from versions that lacked the annotation attribute argument.
4594 if (CI->arg_size() != 4) {
4595 DefaultCase();
4596 return;
4597 }
4598
4599 // Create a new call with an added null annotation attribute argument.
4600 NewCall =
4601 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4602 CI->getArgOperand(2), CI->getArgOperand(3),
4603 Constant::getNullValue(Builder.getPtrTy())});
4604 NewCall->takeName(CI);
4605 CI->replaceAllUsesWith(NewCall);
4606 CI->eraseFromParent();
4607 return;
4608
4609 case Intrinsic::var_annotation:
4610 // Upgrade from versions that lacked the annotation attribute argument.
4611 if (CI->arg_size() != 4) {
4612 DefaultCase();
4613 return;
4614 }
4615 // Create a new call with an added null annotation attribute argument.
4616 NewCall =
4617 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4618 CI->getArgOperand(2), CI->getArgOperand(3),
4619 Constant::getNullValue(Builder.getPtrTy())});
4620 NewCall->takeName(CI);
4621 CI->replaceAllUsesWith(NewCall);
4622 CI->eraseFromParent();
4623 return;
4624
4625 case Intrinsic::riscv_aes32dsi:
4626 case Intrinsic::riscv_aes32dsmi:
4627 case Intrinsic::riscv_aes32esi:
4628 case Intrinsic::riscv_aes32esmi:
4629 case Intrinsic::riscv_sm4ks:
4630 case Intrinsic::riscv_sm4ed: {
4631 // The last argument to these intrinsics used to be i8 and changed to i32.
4632 // The type overload for sm4ks and sm4ed was removed.
4633 Value *Arg2 = CI->getArgOperand(2);
4634 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4635 return;
4636
4637 Value *Arg0 = CI->getArgOperand(0);
4638 Value *Arg1 = CI->getArgOperand(1);
4639 if (CI->getType()->isIntegerTy(64)) {
4640 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4641 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4642 }
4643
4644 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4645 cast<ConstantInt>(Arg2)->getZExtValue());
4646
4647 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4648 Value *Res = NewCall;
4649 if (Res->getType() != CI->getType())
4650 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4651 NewCall->takeName(CI);
4652 CI->replaceAllUsesWith(Res);
4653 CI->eraseFromParent();
4654 return;
4655 }
4656 case Intrinsic::riscv_sha256sig0:
4657 case Intrinsic::riscv_sha256sig1:
4658 case Intrinsic::riscv_sha256sum0:
4659 case Intrinsic::riscv_sha256sum1:
4660 case Intrinsic::riscv_sm3p0:
4661 case Intrinsic::riscv_sm3p1: {
4662 // The last argument to these intrinsics used to be i8 and changed to i32.
4663 // The type overload for sm4ks and sm4ed was removed.
4664 if (!CI->getType()->isIntegerTy(64))
4665 return;
4666
4667 Value *Arg =
4668 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4669
4670 NewCall = Builder.CreateCall(NewFn, Arg);
4671 Value *Res =
4672 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4673 NewCall->takeName(CI);
4674 CI->replaceAllUsesWith(Res);
4675 CI->eraseFromParent();
4676 return;
4677 }
4678
4679 case Intrinsic::x86_xop_vfrcz_ss:
4680 case Intrinsic::x86_xop_vfrcz_sd:
4681 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4682 break;
4683
4684 case Intrinsic::x86_xop_vpermil2pd:
4685 case Intrinsic::x86_xop_vpermil2ps:
4686 case Intrinsic::x86_xop_vpermil2pd_256:
4687 case Intrinsic::x86_xop_vpermil2ps_256: {
4688 SmallVector<Value *, 4> Args(CI->args());
4689 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4690 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4691 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4692 NewCall = Builder.CreateCall(NewFn, Args);
4693 break;
4694 }
4695
4696 case Intrinsic::x86_sse41_ptestc:
4697 case Intrinsic::x86_sse41_ptestz:
4698 case Intrinsic::x86_sse41_ptestnzc: {
4699 // The arguments for these intrinsics used to be v4f32, and changed
4700 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4701 // So, the only thing required is a bitcast for both arguments.
4702 // First, check the arguments have the old type.
4703 Value *Arg0 = CI->getArgOperand(0);
4704 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4705 return;
4706
4707 // Old intrinsic, add bitcasts
4708 Value *Arg1 = CI->getArgOperand(1);
4709
4710 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4711
4712 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4713 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4714
4715 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4716 break;
4717 }
4718
4719 case Intrinsic::x86_rdtscp: {
4720 // This used to take 1 arguments. If we have no arguments, it is already
4721 // upgraded.
4722 if (CI->getNumOperands() == 0)
4723 return;
4724
4725 NewCall = Builder.CreateCall(NewFn);
4726 // Extract the second result and store it.
4727 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4728 // Cast the pointer to the right type.
4729 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4730 llvm::PointerType::getUnqual(Data->getType()));
4731 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4732 // Replace the original call result with the first result of the new call.
4733 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4734
4735 NewCall->takeName(CI);
4736 CI->replaceAllUsesWith(TSC);
4737 CI->eraseFromParent();
4738 return;
4739 }
4740
4741 case Intrinsic::x86_sse41_insertps:
4742 case Intrinsic::x86_sse41_dppd:
4743 case Intrinsic::x86_sse41_dpps:
4744 case Intrinsic::x86_sse41_mpsadbw:
4745 case Intrinsic::x86_avx_dp_ps_256:
4746 case Intrinsic::x86_avx2_mpsadbw: {
4747 // Need to truncate the last argument from i32 to i8 -- this argument models
4748 // an inherently 8-bit immediate operand to these x86 instructions.
4749 SmallVector<Value *, 4> Args(CI->args());
4750
4751 // Replace the last argument with a trunc.
4752 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4753 NewCall = Builder.CreateCall(NewFn, Args);
4754 break;
4755 }
4756
4757 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4758 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4759 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4760 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4761 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4762 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4763 SmallVector<Value *, 4> Args(CI->args());
4764 unsigned NumElts =
4765 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4766 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4767
4768 NewCall = Builder.CreateCall(NewFn, Args);
4769 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4770
4771 NewCall->takeName(CI);
4772 CI->replaceAllUsesWith(Res);
4773 CI->eraseFromParent();
4774 return;
4775 }
4776
4777 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4778 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4779 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4780 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4781 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4782 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4783 SmallVector<Value *, 4> Args(CI->args());
4784 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4785 if (NewFn->getIntrinsicID() ==
4786 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4787 Args[1] = Builder.CreateBitCast(
4788 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4789
4790 NewCall = Builder.CreateCall(NewFn, Args);
4791 Value *Res = Builder.CreateBitCast(
4792 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4793
4794 NewCall->takeName(CI);
4795 CI->replaceAllUsesWith(Res);
4796 CI->eraseFromParent();
4797 return;
4798 }
4799 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4800 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4801 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4802 SmallVector<Value *, 4> Args(CI->args());
4803 unsigned NumElts =
4804 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4805 Args[1] = Builder.CreateBitCast(
4806 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4807 Args[2] = Builder.CreateBitCast(
4808 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4809
4810 NewCall = Builder.CreateCall(NewFn, Args);
4811 break;
4812 }
4813
4814 case Intrinsic::thread_pointer: {
4815 NewCall = Builder.CreateCall(NewFn, {});
4816 break;
4817 }
4818
4819 case Intrinsic::memcpy:
4820 case Intrinsic::memmove:
4821 case Intrinsic::memset: {
4822 // We have to make sure that the call signature is what we're expecting.
4823 // We only want to change the old signatures by removing the alignment arg:
4824 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4825 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4826 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4827 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4828 // Note: i8*'s in the above can be any pointer type
4829 if (CI->arg_size() != 5) {
4830 DefaultCase();
4831 return;
4832 }
4833 // Remove alignment argument (3), and add alignment attributes to the
4834 // dest/src pointers.
4835 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4836 CI->getArgOperand(2), CI->getArgOperand(4)};
4837 NewCall = Builder.CreateCall(NewFn, Args);
4838 AttributeList OldAttrs = CI->getAttributes();
4840 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4841 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4842 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4843 NewCall->setAttributes(NewAttrs);
4844 auto *MemCI = cast<MemIntrinsic>(NewCall);
4845 // All mem intrinsics support dest alignment.
4846 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4847 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4848 // Memcpy/Memmove also support source alignment.
4849 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4850 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4851 break;
4852 }
4853 }
4854 assert(NewCall && "Should have either set this variable or returned through "
4855 "the default case");
4856 NewCall->takeName(CI);
4857 CI->replaceAllUsesWith(NewCall);
4858 CI->eraseFromParent();
4859}
4860
4862 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4863
4864 // Check if this function should be upgraded and get the replacement function
4865 // if there is one.
4866 Function *NewFn;
4867 if (UpgradeIntrinsicFunction(F, NewFn)) {
4868 // Replace all users of the old function with the new function or new
4869 // instructions. This is not a range loop because the call is deleted.
4870 for (User *U : make_early_inc_range(F->users()))
4871 if (CallBase *CB = dyn_cast<CallBase>(U))
4872 UpgradeIntrinsicCall(CB, NewFn);
4873
4874 // Remove old function, no longer used, from the module.
4875 F->eraseFromParent();
4876 }
4877}
4878
4880 const unsigned NumOperands = MD.getNumOperands();
4881 if (NumOperands == 0)
4882 return &MD; // Invalid, punt to a verifier error.
4883
4884 // Check if the tag uses struct-path aware TBAA format.
4885 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4886 return &MD;
4887
4888 auto &Context = MD.getContext();
4889 if (NumOperands == 3) {
4890 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4891 MDNode *ScalarType = MDNode::get(Context, Elts);
4892 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4893 Metadata *Elts2[] = {ScalarType, ScalarType,
4896 MD.getOperand(2)};
4897 return MDNode::get(Context, Elts2);
4898 }
4899 // Create a MDNode <MD, MD, offset 0>
4901 Type::getInt64Ty(Context)))};
4902 return MDNode::get(Context, Elts);
4903}
4904
4906 Instruction *&Temp) {
4907 if (Opc != Instruction::BitCast)
4908 return nullptr;
4909
4910 Temp = nullptr;
4911 Type *SrcTy = V->getType();
4912 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4913 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4914 LLVMContext &Context = V->getContext();
4915
4916 // We have no information about target data layout, so we assume that
4917 // the maximum pointer size is 64bit.
4918 Type *MidTy = Type::getInt64Ty(Context);
4919 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4920
4921 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4922 }
4923
4924 return nullptr;
4925}
4926
4928 if (Opc != Instruction::BitCast)
4929 return nullptr;
4930
4931 Type *SrcTy = C->getType();
4932 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4933 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4934 LLVMContext &Context = C->getContext();
4935
4936 // We have no information about target data layout, so we assume that
4937 // the maximum pointer size is 64bit.
4938 Type *MidTy = Type::getInt64Ty(Context);
4939
4941 DestTy);
4942 }
4943
4944 return nullptr;
4945}
4946
4947/// Check the debug info version number, if it is out-dated, drop the debug
4948/// info. Return true if module is modified.
4951 return false;
4952
4953 unsigned Version = getDebugMetadataVersionFromModule(M);
4954 if (Version == DEBUG_METADATA_VERSION) {
4955 bool BrokenDebugInfo = false;
4956 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4957 report_fatal_error("Broken module found, compilation aborted!");
4958 if (!BrokenDebugInfo)
4959 // Everything is ok.
4960 return false;
4961 else {
4962 // Diagnose malformed debug info.
4964 M.getContext().diagnose(Diag);
4965 }
4966 }
4967 bool Modified = StripDebugInfo(M);
4968 if (Modified && Version != DEBUG_METADATA_VERSION) {
4969 // Diagnose a version mismatch.
4970 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4971 M.getContext().diagnose(DiagVersion);
4972 }
4973 return Modified;
4974}
4975
4976/// This checks for objc retain release marker which should be upgraded. It
4977/// returns true if module is modified.
4979 bool Changed = false;
4980 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4981 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4982 if (ModRetainReleaseMarker) {
4983 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4984 if (Op) {
4985 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4986 if (ID) {
4987 SmallVector<StringRef, 4> ValueComp;
4988 ID->getString().split(ValueComp, "#");
4989 if (ValueComp.size() == 2) {
4990 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4991 ID = MDString::get(M.getContext(), NewValue);
4992 }
4993 M.addModuleFlag(Module::Error, MarkerKey, ID);
4994 M.eraseNamedMetadata(ModRetainReleaseMarker);
4995 Changed = true;
4996 }
4997 }
4998 }
4999 return Changed;
5000}
5001
5003 // This lambda converts normal function calls to ARC runtime functions to
5004 // intrinsic calls.
5005 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5006 llvm::Intrinsic::ID IntrinsicFunc) {
5007 Function *Fn = M.getFunction(OldFunc);
5008
5009 if (!Fn)
5010 return;
5011
5012 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
5013
5014 for (User *U : make_early_inc_range(Fn->users())) {
5015 CallInst *CI = dyn_cast<CallInst>(U);
5016 if (!CI || CI->getCalledFunction() != Fn)
5017 continue;
5018
5019 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5020 FunctionType *NewFuncTy = NewFn->getFunctionType();
5022
5023 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5024 // value to the return type of the old function.
5025 if (NewFuncTy->getReturnType() != CI->getType() &&
5026 !CastInst::castIsValid(Instruction::BitCast, CI,
5027 NewFuncTy->getReturnType()))
5028 continue;
5029
5030 bool InvalidCast = false;
5031
5032 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5033 Value *Arg = CI->getArgOperand(I);
5034
5035 // Bitcast argument to the parameter type of the new function if it's
5036 // not a variadic argument.
5037 if (I < NewFuncTy->getNumParams()) {
5038 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5039 // to the parameter type of the new function.
5040 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5041 NewFuncTy->getParamType(I))) {
5042 InvalidCast = true;
5043 break;
5044 }
5045 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5046 }
5047 Args.push_back(Arg);
5048 }
5049
5050 if (InvalidCast)
5051 continue;
5052
5053 // Create a call instruction that calls the new function.
5054 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5055 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5056 NewCall->takeName(CI);
5057
5058 // Bitcast the return value back to the type of the old call.
5059 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5060
5061 if (!CI->use_empty())
5062 CI->replaceAllUsesWith(NewRetVal);
5063 CI->eraseFromParent();
5064 }
5065
5066 if (Fn->use_empty())
5067 Fn->eraseFromParent();
5068 };
5069
5070 // Unconditionally convert a call to "clang.arc.use" to a call to
5071 // "llvm.objc.clang.arc.use".
5072 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5073
5074 // Upgrade the retain release marker. If there is no need to upgrade
5075 // the marker, that means either the module is already new enough to contain
5076 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5078 return;
5079
5080 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5081 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5082 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5083 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5084 {"objc_autoreleaseReturnValue",
5085 llvm::Intrinsic::objc_autoreleaseReturnValue},
5086 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5087 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5088 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5089 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5090 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5091 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5092 {"objc_release", llvm::Intrinsic::objc_release},
5093 {"objc_retain", llvm::Intrinsic::objc_retain},
5094 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5095 {"objc_retainAutoreleaseReturnValue",
5096 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5097 {"objc_retainAutoreleasedReturnValue",
5098 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5099 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5100 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5101 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5102 {"objc_unsafeClaimAutoreleasedReturnValue",
5103 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5104 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5105 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5106 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5107 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5108 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5109 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5110 {"objc_arc_annotation_topdown_bbstart",
5111 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5112 {"objc_arc_annotation_topdown_bbend",
5113 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5114 {"objc_arc_annotation_bottomup_bbstart",
5115 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5116 {"objc_arc_annotation_bottomup_bbend",
5117 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5118
5119 for (auto &I : RuntimeFuncs)
5120 UpgradeToIntrinsic(I.first, I.second);
5121}
5122
5124 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5125 if (!ModFlags)
5126 return false;
5127
5128 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5129 bool HasSwiftVersionFlag = false;
5130 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5131 uint32_t SwiftABIVersion;
5132 auto Int8Ty = Type::getInt8Ty(M.getContext());
5133 auto Int32Ty = Type::getInt32Ty(M.getContext());
5134
5135 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5136 MDNode *Op = ModFlags->getOperand(I);
5137 if (Op->getNumOperands() != 3)
5138 continue;
5139 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5140 if (!ID)
5141 continue;
5142 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5143 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5144 Type::getInt32Ty(M.getContext()), B)),
5145 MDString::get(M.getContext(), ID->getString()),
5146 Op->getOperand(2)};
5147 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5148 Changed = true;
5149 };
5150
5151 if (ID->getString() == "Objective-C Image Info Version")
5152 HasObjCFlag = true;
5153 if (ID->getString() == "Objective-C Class Properties")
5154 HasClassProperties = true;
5155 // Upgrade PIC from Error/Max to Min.
5156 if (ID->getString() == "PIC Level") {
5157 if (auto *Behavior =
5158 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5159 uint64_t V = Behavior->getLimitedValue();
5160 if (V == Module::Error || V == Module::Max)
5161 SetBehavior(Module::Min);
5162 }
5163 }
5164 // Upgrade "PIE Level" from Error to Max.
5165 if (ID->getString() == "PIE Level")
5166 if (auto *Behavior =
5167 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5168 if (Behavior->getLimitedValue() == Module::Error)
5169 SetBehavior(Module::Max);
5170
5171 // Upgrade branch protection and return address signing module flags. The
5172 // module flag behavior for these fields were Error and now they are Min.
5173 if (ID->getString() == "branch-target-enforcement" ||
5174 ID->getString().starts_with("sign-return-address")) {
5175 if (auto *Behavior =
5176 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5177 if (Behavior->getLimitedValue() == Module::Error) {
5178 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5179 Metadata *Ops[3] = {
5180 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5181 Op->getOperand(1), Op->getOperand(2)};
5182 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5183 Changed = true;
5184 }
5185 }
5186 }
5187
5188 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5189 // section name so that llvm-lto will not complain about mismatching
5190 // module flags that is functionally the same.
5191 if (ID->getString() == "Objective-C Image Info Section") {
5192 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5193 SmallVector<StringRef, 4> ValueComp;
5194 Value->getString().split(ValueComp, " ");
5195 if (ValueComp.size() != 1) {
5196 std::string NewValue;
5197 for (auto &S : ValueComp)
5198 NewValue += S.str();
5199 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5200 MDString::get(M.getContext(), NewValue)};
5201 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5202 Changed = true;
5203 }
5204 }
5205 }
5206
5207 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5208 // If the higher bits are set, it adds new module flag for swift info.
5209 if (ID->getString() == "Objective-C Garbage Collection") {
5210 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5211 if (Md) {
5212 assert(Md->getValue() && "Expected non-empty metadata");
5213 auto Type = Md->getValue()->getType();
5214 if (Type == Int8Ty)
5215 continue;
5216 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5217 if ((Val & 0xff) != Val) {
5218 HasSwiftVersionFlag = true;
5219 SwiftABIVersion = (Val & 0xff00) >> 8;
5220 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5221 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5222 }
5223 Metadata *Ops[3] = {
5224 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5225 Op->getOperand(1),
5226 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5227 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5228 Changed = true;
5229 }
5230 }
5231
5232 if (ID->getString() == "amdgpu_code_object_version") {
5233 Metadata *Ops[3] = {
5234 Op->getOperand(0),
5235 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5236 Op->getOperand(2)};
5237 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5238 Changed = true;
5239 }
5240 }
5241
5242 // "Objective-C Class Properties" is recently added for Objective-C. We
5243 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5244 // flag of value 0, so we can correclty downgrade this flag when trying to
5245 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5246 // this module flag.
5247 if (HasObjCFlag && !HasClassProperties) {
5248 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5249 (uint32_t)0);
5250 Changed = true;
5251 }
5252
5253 if (HasSwiftVersionFlag) {
5254 M.addModuleFlag(Module::Error, "Swift ABI Version",
5255 SwiftABIVersion);
5256 M.addModuleFlag(Module::Error, "Swift Major Version",
5257 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5258 M.addModuleFlag(Module::Error, "Swift Minor Version",
5259 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5260 Changed = true;
5261 }
5262
5263 return Changed;
5264}
5265
5267 auto TrimSpaces = [](StringRef Section) -> std::string {
5268 SmallVector<StringRef, 5> Components;
5269 Section.split(Components, ',');
5270
5271 SmallString<32> Buffer;
5272 raw_svector_ostream OS(Buffer);
5273
5274 for (auto Component : Components)
5275 OS << ',' << Component.trim();
5276
5277 return std::string(OS.str().substr(1));
5278 };
5279
5280 for (auto &GV : M.globals()) {
5281 if (!GV.hasSection())
5282 continue;
5283
5284 StringRef Section = GV.getSection();
5285
5286 if (!Section.starts_with("__DATA, __objc_catlist"))
5287 continue;
5288
5289 // __DATA, __objc_catlist, regular, no_dead_strip
5290 // __DATA,__objc_catlist,regular,no_dead_strip
5291 GV.setSection(TrimSpaces(Section));
5292 }
5293}
5294
5295namespace {
5296// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5297// callsites within a function that did not also have the strictfp attribute.
5298// Since 10.0, if strict FP semantics are needed within a function, the
5299// function must have the strictfp attribute and all calls within the function
5300// must also have the strictfp attribute. This latter restriction is
5301// necessary to prevent unwanted libcall simplification when a function is
5302// being cloned (such as for inlining).
5303//
5304// The "dangling" strictfp attribute usage was only used to prevent constant
5305// folding and other libcall simplification. The nobuiltin attribute on the
5306// callsite has the same effect.
5307struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5308 StrictFPUpgradeVisitor() = default;
5309
5310 void visitCallBase(CallBase &Call) {
5311 if (!Call.isStrictFP())
5312 return;
5313 if (isa<ConstrainedFPIntrinsic>(&Call))
5314 return;
5315 // If we get here, the caller doesn't have the strictfp attribute
5316 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5317 Call.removeFnAttr(Attribute::StrictFP);
5318 Call.addFnAttr(Attribute::NoBuiltin);
5319 }
5320};
5321} // namespace
5322
5324 // If a function definition doesn't have the strictfp attribute,
5325 // convert any callsite strictfp attributes to nobuiltin.
5326 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5327 StrictFPUpgradeVisitor SFPV;
5328 SFPV.visit(F);
5329 }
5330
5331 // Remove all incompatibile attributes from function.
5332 F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5333 for (auto &Arg : F.args())
5334 Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5335
5336 // Older versions of LLVM treated an "implicit-section-name" attribute
5337 // similarly to directly setting the section on a Function.
5338 if (Attribute A = F.getFnAttribute("implicit-section-name");
5339 A.isValid() && A.isStringAttribute()) {
5340 F.setSection(A.getValueAsString());
5341 F.removeFnAttr("implicit-section-name");
5342 }
5343}
5344
5345static bool isOldLoopArgument(Metadata *MD) {
5346 auto *T = dyn_cast_or_null<MDTuple>(MD);
5347 if (!T)
5348 return false;
5349 if (T->getNumOperands() < 1)
5350 return false;
5351 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5352 if (!S)
5353 return false;
5354 return S->getString().starts_with("llvm.vectorizer.");
5355}
5356
5358 StringRef OldPrefix = "llvm.vectorizer.";
5359 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5360
5361 if (OldTag == "llvm.vectorizer.unroll")
5362 return MDString::get(C, "llvm.loop.interleave.count");
5363
5364 return MDString::get(
5365 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5366 .str());
5367}
5368
5370 auto *T = dyn_cast_or_null<MDTuple>(MD);
5371 if (!T)
5372 return MD;
5373 if (T->getNumOperands() < 1)
5374 return MD;
5375 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5376 if (!OldTag)
5377 return MD;
5378 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5379 return MD;
5380
5381 // This has an old tag. Upgrade it.
5383 Ops.reserve(T->getNumOperands());
5384 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5385 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5386 Ops.push_back(T->getOperand(I));
5387
5388 return MDTuple::get(T->getContext(), Ops);
5389}
5390
5392 auto *T = dyn_cast<MDTuple>(&N);
5393 if (!T)
5394 return &N;
5395
5396 if (none_of(T->operands(), isOldLoopArgument))
5397 return &N;
5398
5400 Ops.reserve(T->getNumOperands());
5401 for (Metadata *MD : T->operands())
5403
5404 return MDTuple::get(T->getContext(), Ops);
5405}
5406
5408 Triple T(TT);
5409 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5410 // the address space of globals to 1. This does not apply to SPIRV Logical.
5411 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5412 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5413 !DL.contains("-G") && !DL.starts_with("G")) {
5414 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5415 }
5416
5417 if (T.isLoongArch64() || T.isRISCV64()) {
5418 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5419 auto I = DL.find("-n64-");
5420 if (I != StringRef::npos)
5421 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5422 return DL.str();
5423 }
5424
5425 std::string Res = DL.str();
5426 // AMDGCN data layout upgrades.
5427 if (T.isAMDGCN()) {
5428 // Define address spaces for constants.
5429 if (!DL.contains("-G") && !DL.starts_with("G"))
5430 Res.append(Res.empty() ? "G1" : "-G1");
5431
5432 // Add missing non-integral declarations.
5433 // This goes before adding new address spaces to prevent incoherent string
5434 // values.
5435 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5436 Res.append("-ni:7:8:9");
5437 // Update ni:7 to ni:7:8:9.
5438 if (DL.ends_with("ni:7"))
5439 Res.append(":8:9");
5440 if (DL.ends_with("ni:7:8"))
5441 Res.append(":9");
5442
5443 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5444 // resources) An empty data layout has already been upgraded to G1 by now.
5445 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5446 Res.append("-p7:160:256:256:32");
5447 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5448 Res.append("-p8:128:128");
5449 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5450 Res.append("-p9:192:256:256:32");
5451
5452 return Res;
5453 }
5454
5455 // AArch64 data layout upgrades.
5456 if (T.isAArch64()) {
5457 // Add "-Fn32"
5458 if (!DL.empty() && !DL.contains("-Fn32"))
5459 Res.append("-Fn32");
5460 return Res;
5461 }
5462
5463 if (!T.isX86())
5464 return Res;
5465
5466 // If the datalayout matches the expected format, add pointer size address
5467 // spaces to the datalayout.
5468 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5469 if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5471 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5472 if (R.match(Res, &Groups))
5473 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5474 }
5475
5476 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5477 // for i128 operations prior to this being reflected in the data layout, and
5478 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5479 // boundaries, so although this is a breaking change, the upgrade is expected
5480 // to fix more IR than it breaks.
5481 // Intel MCU is an exception and uses 4-byte-alignment.
5482 if (!T.isOSIAMCU()) {
5483 std::string I128 = "-i128:128";
5484 if (StringRef Ref = Res; !Ref.contains(I128)) {
5486 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5487 if (R.match(Res, &Groups))
5488 Res = (Groups[1] + I128 + Groups[3]).str();
5489 }
5490 }
5491
5492 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5493 // Raising the alignment is safe because Clang did not produce f80 values in
5494 // the MSVC environment before this upgrade was added.
5495 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5496 StringRef Ref = Res;
5497 auto I = Ref.find("-f80:32-");
5498 if (I != StringRef::npos)
5499 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5500 }
5501
5502 return Res;
5503}
5504
5506 StringRef FramePointer;
5507 Attribute A = B.getAttribute("no-frame-pointer-elim");
5508 if (A.isValid()) {
5509 // The value can be "true" or "false".
5510 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5511 B.removeAttribute("no-frame-pointer-elim");
5512 }
5513 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5514 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5515 if (FramePointer != "all")
5516 FramePointer = "non-leaf";
5517 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5518 }
5519 if (!FramePointer.empty())
5520 B.addAttribute("frame-pointer", FramePointer);
5521
5522 A = B.getAttribute("null-pointer-is-valid");
5523 if (A.isValid()) {
5524 // The value can be "true" or "false".
5525 bool NullPointerIsValid = A.getValueAsString() == "true";
5526 B.removeAttribute("null-pointer-is-valid");
5527 if (NullPointerIsValid)
5528 B.addAttribute(Attribute::NullPointerIsValid);
5529 }
5530}
5531
5532void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5533 // clang.arc.attachedcall bundles are now required to have an operand.
5534 // If they don't, it's okay to drop them entirely: when there is an operand,
5535 // the "attachedcall" is meaningful and required, but without an operand,
5536 // it's just a marker NOP. Dropping it merely prevents an optimization.
5537 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5538 return OBD.getTag() == "clang.arc.attachedcall" &&
5539 OBD.inputs().empty();
5540 });
5541}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:88
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:72
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:99
static MDType * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:52
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:56
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
uint64_t High
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:77
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:371
Type * getElementType() const
Definition: DerivedTypes.h:384
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:695
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:822
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:707
@ FAdd
*p = old + v
Definition: Instructions.h:732
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:747
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:743
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:739
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:751
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1546
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1323
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1401
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1501
unsigned arg_size() const
Definition: InstrTypes.h:1408
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1504
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1650
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2231
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2177
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2217
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1357
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1500
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
DWARF expression.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
Records a position in IR for a source label (DILabel).
Base class for non-instruction debug metadata records that have positions within IR.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Type * getReturnType() const
Definition: DerivedTypes.h:124
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:165
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:242
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:418
size_t arg_size() const
Definition: Function.h:864
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Argument * getArg(unsigned i) const
Definition: Function.h:849
LinkageTypes getLinkage() const
Definition: GlobalValue.h:546
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:459
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1558
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2470
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:509
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2521
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1612
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1043
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2092
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2458
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:537
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1805
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1531
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2168
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1192
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2514
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:578
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2267
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1090
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2031
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:524
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:474
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2079
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:519
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1719
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2275
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1747
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2239
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1342
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2125
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1788
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1414
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2019
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2492
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1473
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:598
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1325
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:469
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2547
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1852
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2005
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1495
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:567
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2251
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2194
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:178
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1824
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2410
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1454
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2108
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1517
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2259
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2349
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1585
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1728
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:514
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:547
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1359
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:476
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
Definition: Instructions.h:173
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
LLVMContext & getContext() const
Definition: Metadata.h:1231
A single uniqued string.
Definition: Metadata.h:720
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:600
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
Metadata wrapper in the Value hierarchy.
Definition: Metadata.h:176
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:115
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:118
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:150
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:147
bool IsNewDbgInfoFormat
Is this Module using intrinsics to record the position of debugging information, or non-intrinsic rec...
Definition: Module.h:219
A tuple of MDNodes.
Definition: Metadata.h:1729
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1389
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1381
unsigned getNumOperands() const
Definition: Metadata.cpp:1377
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1189
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1204
StringRef getTag() const
Definition: InstrTypes.h:1212
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
ArrayRef< int > getShuffleMask() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:289
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:602
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
static constexpr size_t npos
Definition: StringRef.h:52
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:83
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:342
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getBFloatTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Function.cpp:1328
std::optional< Function * > remangleIntrinsicFunction(Function *F)
Definition: Function.cpp:1804
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1042
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1484
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
unsigned getDebugMetadataVersionFromModule(const Module &M)
Return Debug Info Metadata Version by checking module flags.
Definition: DebugInfo.cpp:925
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:591
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
@ Dynamic
Denotes mode unknown at compile time.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:52
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7106
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117