34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
62 cl::desc(
"Disable autoupgrade of debug info"));
72 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
87 Type *LastArgType =
F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
103 if (
F->getReturnType()->isVectorTy())
116 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
117 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
134 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
135 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
149 if (
F->getReturnType()->getScalarType()->isBFloatTy())
159 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
173 if (Name.consume_front(
"avx."))
174 return (Name.starts_with(
"blend.p") ||
175 Name ==
"cvt.ps2.pd.256" ||
176 Name ==
"cvtdq2.pd.256" ||
177 Name ==
"cvtdq2.ps.256" ||
178 Name.starts_with(
"movnt.") ||
179 Name.starts_with(
"sqrt.p") ||
180 Name.starts_with(
"storeu.") ||
181 Name.starts_with(
"vbroadcast.s") ||
182 Name.starts_with(
"vbroadcastf128") ||
183 Name.starts_with(
"vextractf128.") ||
184 Name.starts_with(
"vinsertf128.") ||
185 Name.starts_with(
"vperm2f128.") ||
186 Name.starts_with(
"vpermil."));
188 if (Name.consume_front(
"avx2."))
189 return (Name ==
"movntdqa" ||
190 Name.starts_with(
"pabs.") ||
191 Name.starts_with(
"padds.") ||
192 Name.starts_with(
"paddus.") ||
193 Name.starts_with(
"pblendd.") ||
195 Name.starts_with(
"pbroadcast") ||
196 Name.starts_with(
"pcmpeq.") ||
197 Name.starts_with(
"pcmpgt.") ||
198 Name.starts_with(
"pmax") ||
199 Name.starts_with(
"pmin") ||
200 Name.starts_with(
"pmovsx") ||
201 Name.starts_with(
"pmovzx") ||
203 Name ==
"pmulu.dq" ||
204 Name.starts_with(
"psll.dq") ||
205 Name.starts_with(
"psrl.dq") ||
206 Name.starts_with(
"psubs.") ||
207 Name.starts_with(
"psubus.") ||
208 Name.starts_with(
"vbroadcast") ||
209 Name ==
"vbroadcasti128" ||
210 Name ==
"vextracti128" ||
211 Name ==
"vinserti128" ||
212 Name ==
"vperm2i128");
214 if (Name.consume_front(
"avx512.")) {
215 if (Name.consume_front(
"mask."))
217 return (Name.starts_with(
"add.p") ||
218 Name.starts_with(
"and.") ||
219 Name.starts_with(
"andn.") ||
220 Name.starts_with(
"broadcast.s") ||
221 Name.starts_with(
"broadcastf32x4.") ||
222 Name.starts_with(
"broadcastf32x8.") ||
223 Name.starts_with(
"broadcastf64x2.") ||
224 Name.starts_with(
"broadcastf64x4.") ||
225 Name.starts_with(
"broadcasti32x4.") ||
226 Name.starts_with(
"broadcasti32x8.") ||
227 Name.starts_with(
"broadcasti64x2.") ||
228 Name.starts_with(
"broadcasti64x4.") ||
229 Name.starts_with(
"cmp.b") ||
230 Name.starts_with(
"cmp.d") ||
231 Name.starts_with(
"cmp.q") ||
232 Name.starts_with(
"cmp.w") ||
233 Name.starts_with(
"compress.b") ||
234 Name.starts_with(
"compress.d") ||
235 Name.starts_with(
"compress.p") ||
236 Name.starts_with(
"compress.q") ||
237 Name.starts_with(
"compress.store.") ||
238 Name.starts_with(
"compress.w") ||
239 Name.starts_with(
"conflict.") ||
240 Name.starts_with(
"cvtdq2pd.") ||
241 Name.starts_with(
"cvtdq2ps.") ||
242 Name ==
"cvtpd2dq.256" ||
243 Name ==
"cvtpd2ps.256" ||
244 Name ==
"cvtps2pd.128" ||
245 Name ==
"cvtps2pd.256" ||
246 Name.starts_with(
"cvtqq2pd.") ||
247 Name ==
"cvtqq2ps.256" ||
248 Name ==
"cvtqq2ps.512" ||
249 Name ==
"cvttpd2dq.256" ||
250 Name ==
"cvttps2dq.128" ||
251 Name ==
"cvttps2dq.256" ||
252 Name.starts_with(
"cvtudq2pd.") ||
253 Name.starts_with(
"cvtudq2ps.") ||
254 Name.starts_with(
"cvtuqq2pd.") ||
255 Name ==
"cvtuqq2ps.256" ||
256 Name ==
"cvtuqq2ps.512" ||
257 Name.starts_with(
"dbpsadbw.") ||
258 Name.starts_with(
"div.p") ||
259 Name.starts_with(
"expand.b") ||
260 Name.starts_with(
"expand.d") ||
261 Name.starts_with(
"expand.load.") ||
262 Name.starts_with(
"expand.p") ||
263 Name.starts_with(
"expand.q") ||
264 Name.starts_with(
"expand.w") ||
265 Name.starts_with(
"fpclass.p") ||
266 Name.starts_with(
"insert") ||
267 Name.starts_with(
"load.") ||
268 Name.starts_with(
"loadu.") ||
269 Name.starts_with(
"lzcnt.") ||
270 Name.starts_with(
"max.p") ||
271 Name.starts_with(
"min.p") ||
272 Name.starts_with(
"movddup") ||
273 Name.starts_with(
"move.s") ||
274 Name.starts_with(
"movshdup") ||
275 Name.starts_with(
"movsldup") ||
276 Name.starts_with(
"mul.p") ||
277 Name.starts_with(
"or.") ||
278 Name.starts_with(
"pabs.") ||
279 Name.starts_with(
"packssdw.") ||
280 Name.starts_with(
"packsswb.") ||
281 Name.starts_with(
"packusdw.") ||
282 Name.starts_with(
"packuswb.") ||
283 Name.starts_with(
"padd.") ||
284 Name.starts_with(
"padds.") ||
285 Name.starts_with(
"paddus.") ||
286 Name.starts_with(
"palignr.") ||
287 Name.starts_with(
"pand.") ||
288 Name.starts_with(
"pandn.") ||
289 Name.starts_with(
"pavg") ||
290 Name.starts_with(
"pbroadcast") ||
291 Name.starts_with(
"pcmpeq.") ||
292 Name.starts_with(
"pcmpgt.") ||
293 Name.starts_with(
"perm.df.") ||
294 Name.starts_with(
"perm.di.") ||
295 Name.starts_with(
"permvar.") ||
296 Name.starts_with(
"pmaddubs.w.") ||
297 Name.starts_with(
"pmaddw.d.") ||
298 Name.starts_with(
"pmax") ||
299 Name.starts_with(
"pmin") ||
300 Name ==
"pmov.qd.256" ||
301 Name ==
"pmov.qd.512" ||
302 Name ==
"pmov.wb.256" ||
303 Name ==
"pmov.wb.512" ||
304 Name.starts_with(
"pmovsx") ||
305 Name.starts_with(
"pmovzx") ||
306 Name.starts_with(
"pmul.dq.") ||
307 Name.starts_with(
"pmul.hr.sw.") ||
308 Name.starts_with(
"pmulh.w.") ||
309 Name.starts_with(
"pmulhu.w.") ||
310 Name.starts_with(
"pmull.") ||
311 Name.starts_with(
"pmultishift.qb.") ||
312 Name.starts_with(
"pmulu.dq.") ||
313 Name.starts_with(
"por.") ||
314 Name.starts_with(
"prol.") ||
315 Name.starts_with(
"prolv.") ||
316 Name.starts_with(
"pror.") ||
317 Name.starts_with(
"prorv.") ||
318 Name.starts_with(
"pshuf.b.") ||
319 Name.starts_with(
"pshuf.d.") ||
320 Name.starts_with(
"pshufh.w.") ||
321 Name.starts_with(
"pshufl.w.") ||
322 Name.starts_with(
"psll.d") ||
323 Name.starts_with(
"psll.q") ||
324 Name.starts_with(
"psll.w") ||
325 Name.starts_with(
"pslli") ||
326 Name.starts_with(
"psllv") ||
327 Name.starts_with(
"psra.d") ||
328 Name.starts_with(
"psra.q") ||
329 Name.starts_with(
"psra.w") ||
330 Name.starts_with(
"psrai") ||
331 Name.starts_with(
"psrav") ||
332 Name.starts_with(
"psrl.d") ||
333 Name.starts_with(
"psrl.q") ||
334 Name.starts_with(
"psrl.w") ||
335 Name.starts_with(
"psrli") ||
336 Name.starts_with(
"psrlv") ||
337 Name.starts_with(
"psub.") ||
338 Name.starts_with(
"psubs.") ||
339 Name.starts_with(
"psubus.") ||
340 Name.starts_with(
"pternlog.") ||
341 Name.starts_with(
"punpckh") ||
342 Name.starts_with(
"punpckl") ||
343 Name.starts_with(
"pxor.") ||
344 Name.starts_with(
"shuf.f") ||
345 Name.starts_with(
"shuf.i") ||
346 Name.starts_with(
"shuf.p") ||
347 Name.starts_with(
"sqrt.p") ||
348 Name.starts_with(
"store.b.") ||
349 Name.starts_with(
"store.d.") ||
350 Name.starts_with(
"store.p") ||
351 Name.starts_with(
"store.q.") ||
352 Name.starts_with(
"store.w.") ||
353 Name ==
"store.ss" ||
354 Name.starts_with(
"storeu.") ||
355 Name.starts_with(
"sub.p") ||
356 Name.starts_with(
"ucmp.") ||
357 Name.starts_with(
"unpckh.") ||
358 Name.starts_with(
"unpckl.") ||
359 Name.starts_with(
"valign.") ||
360 Name ==
"vcvtph2ps.128" ||
361 Name ==
"vcvtph2ps.256" ||
362 Name.starts_with(
"vextract") ||
363 Name.starts_with(
"vfmadd.") ||
364 Name.starts_with(
"vfmaddsub.") ||
365 Name.starts_with(
"vfnmadd.") ||
366 Name.starts_with(
"vfnmsub.") ||
367 Name.starts_with(
"vpdpbusd.") ||
368 Name.starts_with(
"vpdpbusds.") ||
369 Name.starts_with(
"vpdpwssd.") ||
370 Name.starts_with(
"vpdpwssds.") ||
371 Name.starts_with(
"vpermi2var.") ||
372 Name.starts_with(
"vpermil.p") ||
373 Name.starts_with(
"vpermilvar.") ||
374 Name.starts_with(
"vpermt2var.") ||
375 Name.starts_with(
"vpmadd52") ||
376 Name.starts_with(
"vpshld.") ||
377 Name.starts_with(
"vpshldv.") ||
378 Name.starts_with(
"vpshrd.") ||
379 Name.starts_with(
"vpshrdv.") ||
380 Name.starts_with(
"vpshufbitqmb.") ||
381 Name.starts_with(
"xor."));
383 if (Name.consume_front(
"mask3."))
385 return (Name.starts_with(
"vfmadd.") ||
386 Name.starts_with(
"vfmaddsub.") ||
387 Name.starts_with(
"vfmsub.") ||
388 Name.starts_with(
"vfmsubadd.") ||
389 Name.starts_with(
"vfnmsub."));
391 if (Name.consume_front(
"maskz."))
393 return (Name.starts_with(
"pternlog.") ||
394 Name.starts_with(
"vfmadd.") ||
395 Name.starts_with(
"vfmaddsub.") ||
396 Name.starts_with(
"vpdpbusd.") ||
397 Name.starts_with(
"vpdpbusds.") ||
398 Name.starts_with(
"vpdpwssd.") ||
399 Name.starts_with(
"vpdpwssds.") ||
400 Name.starts_with(
"vpermt2var.") ||
401 Name.starts_with(
"vpmadd52") ||
402 Name.starts_with(
"vpshldv.") ||
403 Name.starts_with(
"vpshrdv."));
406 return (Name ==
"movntdqa" ||
407 Name ==
"pmul.dq.512" ||
408 Name ==
"pmulu.dq.512" ||
409 Name.starts_with(
"broadcastm") ||
410 Name.starts_with(
"cmp.p") ||
411 Name.starts_with(
"cvtb2mask.") ||
412 Name.starts_with(
"cvtd2mask.") ||
413 Name.starts_with(
"cvtmask2") ||
414 Name.starts_with(
"cvtq2mask.") ||
415 Name ==
"cvtusi2sd" ||
416 Name.starts_with(
"cvtw2mask.") ||
421 Name ==
"kortestc.w" ||
422 Name ==
"kortestz.w" ||
423 Name.starts_with(
"kunpck") ||
426 Name.starts_with(
"padds.") ||
427 Name.starts_with(
"pbroadcast") ||
428 Name.starts_with(
"prol") ||
429 Name.starts_with(
"pror") ||
430 Name.starts_with(
"psll.dq") ||
431 Name.starts_with(
"psrl.dq") ||
432 Name.starts_with(
"psubs.") ||
433 Name.starts_with(
"ptestm") ||
434 Name.starts_with(
"ptestnm") ||
435 Name.starts_with(
"storent.") ||
436 Name.starts_with(
"vbroadcast.s") ||
437 Name.starts_with(
"vpshld.") ||
438 Name.starts_with(
"vpshrd."));
441 if (Name.consume_front(
"fma."))
442 return (Name.starts_with(
"vfmadd.") ||
443 Name.starts_with(
"vfmsub.") ||
444 Name.starts_with(
"vfmsubadd.") ||
445 Name.starts_with(
"vfnmadd.") ||
446 Name.starts_with(
"vfnmsub."));
448 if (Name.consume_front(
"fma4."))
449 return Name.starts_with(
"vfmadd.s");
451 if (Name.consume_front(
"sse."))
452 return (Name ==
"add.ss" ||
453 Name ==
"cvtsi2ss" ||
454 Name ==
"cvtsi642ss" ||
457 Name.starts_with(
"sqrt.p") ||
459 Name.starts_with(
"storeu.") ||
462 if (Name.consume_front(
"sse2."))
463 return (Name ==
"add.sd" ||
464 Name ==
"cvtdq2pd" ||
465 Name ==
"cvtdq2ps" ||
466 Name ==
"cvtps2pd" ||
467 Name ==
"cvtsi2sd" ||
468 Name ==
"cvtsi642sd" ||
469 Name ==
"cvtss2sd" ||
472 Name.starts_with(
"padds.") ||
473 Name.starts_with(
"paddus.") ||
474 Name.starts_with(
"pcmpeq.") ||
475 Name.starts_with(
"pcmpgt.") ||
480 Name ==
"pmulu.dq" ||
481 Name.starts_with(
"pshuf") ||
482 Name.starts_with(
"psll.dq") ||
483 Name.starts_with(
"psrl.dq") ||
484 Name.starts_with(
"psubs.") ||
485 Name.starts_with(
"psubus.") ||
486 Name.starts_with(
"sqrt.p") ||
488 Name ==
"storel.dq" ||
489 Name.starts_with(
"storeu.") ||
492 if (Name.consume_front(
"sse41."))
493 return (Name.starts_with(
"blendp") ||
494 Name ==
"movntdqa" ||
504 Name.starts_with(
"pmovsx") ||
505 Name.starts_with(
"pmovzx") ||
508 if (Name.consume_front(
"sse42."))
509 return Name ==
"crc32.64.8";
511 if (Name.consume_front(
"sse4a."))
512 return Name.starts_with(
"movnt.");
514 if (Name.consume_front(
"ssse3."))
515 return (Name ==
"pabs.b.128" ||
516 Name ==
"pabs.d.128" ||
517 Name ==
"pabs.w.128");
519 if (Name.consume_front(
"xop."))
520 return (Name ==
"vpcmov" ||
521 Name ==
"vpcmov.256" ||
522 Name.starts_with(
"vpcom") ||
523 Name.starts_with(
"vprot"));
525 return (Name ==
"addcarry.u32" ||
526 Name ==
"addcarry.u64" ||
527 Name ==
"addcarryx.u32" ||
528 Name ==
"addcarryx.u64" ||
529 Name ==
"subborrow.u32" ||
530 Name ==
"subborrow.u64" ||
531 Name.starts_with(
"vcvtph2ps."));
537 if (!Name.consume_front(
"x86."))
545 if (Name ==
"rdtscp") {
547 if (
F->getFunctionType()->getNumParams() == 0)
552 Intrinsic::x86_rdtscp);
559 if (Name.consume_front(
"sse41.ptest")) {
561 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
562 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
563 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
576 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
586 if (Name.consume_front(
"avx512.")) {
587 if (Name.consume_front(
"mask.cmp.")) {
590 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
599 }
else if (Name.starts_with(
"vpdpbusd.") ||
600 Name.starts_with(
"vpdpbusds.")) {
603 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
612 }
else if (Name.starts_with(
"vpdpwssd.") ||
613 Name.starts_with(
"vpdpwssds.")) {
616 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
629 if (Name.consume_front(
"avx2.")) {
630 if (Name.consume_front(
"vpdpb")) {
633 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
648 }
else if (Name.consume_front(
"vpdpw")) {
651 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
670 if (Name.consume_front(
"avx10.")) {
671 if (Name.consume_front(
"vpdpb")) {
674 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
683 }
else if (Name.consume_front(
"vpdpw")) {
685 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
698 if (Name.consume_front(
"avx512bf16.")) {
701 .
Case(
"cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .
Case(
"cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .
Case(
"cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .
Case(
"mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .
Case(
"cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .
Case(
"cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
719 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
728 if (Name.consume_front(
"xop.")) {
730 if (Name.starts_with(
"vpermil2")) {
733 auto Idx =
F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
746 }
else if (
F->arg_size() == 2)
749 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
761 if (Name ==
"seh.recoverfp") {
763 Intrinsic::eh_recoverfp);
775 if (Name.starts_with(
"rbit")) {
778 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
782 if (Name ==
"thread.pointer") {
785 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
789 bool Neon = Name.consume_front(
"neon.");
794 if (Name.consume_front(
"bfdot.")) {
798 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
803 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
807 std::array<Type *, 2> Tys{
818 if (Name.consume_front(
"bfm")) {
820 if (Name.consume_back(
".v4f32.v16i8")) {
866 F->arg_begin()->getType());
870 if (Name.consume_front(
"vst")) {
872 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
883 auto fArgs =
F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
899 if (Name.consume_front(
"mve.")) {
901 if (Name ==
"vctp64") {
911 if (Name.starts_with(
"vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
918 if (Name.consume_back(
".v4i1")) {
920 if (Name.consume_back(
".predicated.v2i64.v4i32"))
922 return Name ==
"mull.int" || Name ==
"vqdmull";
924 if (Name.consume_back(
".v2i64")) {
926 bool IsGather = Name.consume_front(
"vldr.gather.");
927 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
928 if (Name.consume_front(
"base.")) {
930 Name.consume_front(
"wb.");
933 return Name ==
"predicated.v2i64";
936 if (Name.consume_front(
"offset.predicated."))
937 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
938 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
951 if (Name.consume_front(
"cde.vcx")) {
953 if (Name.consume_back(
".predicated.v2i64.v4i1"))
955 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
956 Name ==
"3q" || Name ==
"3qa";
970 F->arg_begin()->getType());
974 if (Name.starts_with(
"addp")) {
976 if (
F->arg_size() != 2)
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
987 if (Name.starts_with(
"bfcvt")) {
994 if (Name.consume_front(
"sve.")) {
996 if (Name.consume_front(
"bf")) {
997 if (Name.consume_back(
".lane")) {
1001 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1015 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1020 if (Name.consume_front(
"addqv")) {
1022 if (!
F->getReturnType()->isFPOrFPVectorTy())
1025 auto Args =
F->getFunctionType()->params();
1026 Type *Tys[] = {
F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1032 if (Name.consume_front(
"ld")) {
1034 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.
match(Name)) {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1047 LoadIDs[Name[0] -
'2'], Ty);
1053 if (Name.consume_front(
"tuple.")) {
1055 if (Name.starts_with(
"get")) {
1057 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1063 if (Name.starts_with(
"set")) {
1065 auto Args =
F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1072 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.
match(Name)) {
1075 auto Args =
F->getFunctionType()->params();
1076 Type *Tys[] = {
F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1084 if (Name.starts_with(
"rev.nxv")) {
1087 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1099 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1121 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1135 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1146 if (Name.consume_front(
"mapa.shared.cluster"))
1147 if (
F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1151 if (Name.consume_front(
"cp.async.bulk.")) {
1154 .
Case(
"global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .
Case(
"shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1161 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1170 if (Name.consume_front(
"fma.rn."))
1172 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .
Case(
"ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1175 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1176 .
Case(
"ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1177 .
Case(
"ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1178 .
Case(
"ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1179 .
Case(
"ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1180 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1181 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1182 .
Case(
"sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1183 .
Case(
"sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1186 if (Name.consume_front(
"fmax."))
1188 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1189 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1190 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1191 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1192 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1193 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1194 .
Case(
"ftz.nan.xorsign.abs.bf16",
1195 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1196 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1198 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1199 .
Case(
"ftz.xorsign.abs.bf16x2",
1200 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1201 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1202 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1203 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1204 .
Case(
"nan.xorsign.abs.bf16x2",
1205 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1206 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1207 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1210 if (Name.consume_front(
"fmin."))
1212 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1213 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1214 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1215 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1216 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1217 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1218 .
Case(
"ftz.nan.xorsign.abs.bf16",
1219 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1220 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1222 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1223 .
Case(
"ftz.xorsign.abs.bf16x2",
1224 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1225 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1226 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1227 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1228 .
Case(
"nan.xorsign.abs.bf16x2",
1229 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1230 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1231 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1234 if (Name.consume_front(
"neg."))
1236 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1237 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1244 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1245 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1246 Name.consume_front(
"param");
1250 bool CanUpgradeDebugIntrinsicsToRecords) {
1251 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1256 if (!Name.consume_front(
"llvm.") || Name.empty())
1262 bool IsArm = Name.consume_front(
"arm.");
1263 if (IsArm || Name.consume_front(
"aarch64.")) {
1269 if (Name.consume_front(
"amdgcn.")) {
1270 if (Name ==
"alignbit") {
1273 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1277 if (Name.consume_front(
"atomic.")) {
1278 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1279 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1288 if (
F->arg_size() == 7 &&
1289 F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
1295 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1296 Name.consume_front(
"flat.atomic.")) {
1297 if (Name.starts_with(
"fadd") ||
1299 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1300 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1308 if (Name.starts_with(
"ldexp.")) {
1311 F->getParent(), Intrinsic::ldexp,
1312 {F->getReturnType(), F->getArg(1)->getType()});
1321 if (
F->arg_size() == 1) {
1329 F->arg_begin()->getType());
1334 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1337 Intrinsic::coro_end);
1344 if (Name.consume_front(
"dbg.")) {
1346 if (CanUpgradeDebugIntrinsicsToRecords) {
1347 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1348 Name ==
"declare" || Name ==
"label") {
1357 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1360 Intrinsic::dbg_value);
1367 if (Name.consume_front(
"experimental.vector.")) {
1373 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1374 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1375 .
StartsWith(
"splice.", Intrinsic::vector_splice)
1376 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1377 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1378 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1380 Intrinsic::vector_partial_reduce_add)
1383 const auto *FT =
F->getFunctionType();
1385 if (
ID == Intrinsic::vector_extract ||
1386 ID == Intrinsic::vector_interleave2)
1389 if (
ID != Intrinsic::vector_interleave2)
1391 if (
ID == Intrinsic::vector_insert ||
1392 ID == Intrinsic::vector_partial_reduce_add)
1400 if (Name.consume_front(
"reduce.")) {
1402 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1403 if (R.match(Name, &
Groups))
1405 .
Case(
"add", Intrinsic::vector_reduce_add)
1406 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1407 .
Case(
"and", Intrinsic::vector_reduce_and)
1408 .
Case(
"or", Intrinsic::vector_reduce_or)
1409 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1410 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1411 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1412 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1413 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1414 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1415 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1420 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1425 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1426 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1431 auto Args =
F->getFunctionType()->params();
1433 {Args[V2 ? 1 : 0]});
1440 if (Name.consume_front(
"experimental.stepvector.")) {
1444 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1449 if (Name.starts_with(
"flt.rounds")) {
1452 Intrinsic::get_rounding);
1457 if (Name.starts_with(
"invariant.group.barrier")) {
1459 auto Args =
F->getFunctionType()->params();
1460 Type* ObjectPtr[1] = {Args[0]};
1463 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1468 if ((Name.starts_with(
"lifetime.start") ||
1469 Name.starts_with(
"lifetime.end")) &&
1470 F->arg_size() == 2) {
1472 ? Intrinsic::lifetime_start
1473 : Intrinsic::lifetime_end;
1476 F->getArg(0)->getType());
1485 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1486 .StartsWith(
"memmove.", Intrinsic::memmove)
1488 if (
F->arg_size() == 5) {
1492 F->getFunctionType()->params().slice(0, 3);
1498 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1501 const auto *FT =
F->getFunctionType();
1502 Type *ParamTypes[2] = {
1503 FT->getParamType(0),
1507 Intrinsic::memset, ParamTypes);
1513 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1514 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1515 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1516 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1518 if (MaskedID &&
F->arg_size() == 4) {
1520 if (MaskedID == Intrinsic::masked_load ||
1521 MaskedID == Intrinsic::masked_gather) {
1523 F->getParent(), MaskedID,
1524 {F->getReturnType(), F->getArg(0)->getType()});
1528 F->getParent(), MaskedID,
1529 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1535 if (Name.consume_front(
"nvvm.")) {
1537 if (
F->arg_size() == 1) {
1540 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1541 .Case(
"clz.i", Intrinsic::ctlz)
1542 .
Case(
"popc.i", Intrinsic::ctpop)
1546 {F->getReturnType()});
1552 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1580 bool Expand =
false;
1581 if (Name.consume_front(
"abs."))
1584 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1585 else if (Name.consume_front(
"fabs."))
1587 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1588 else if (Name.consume_front(
"ex2.approx."))
1591 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1592 else if (Name.consume_front(
"max.") || Name.consume_front(
"min."))
1594 Expand = Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
1595 Name ==
"ui" || Name ==
"ull";
1596 else if (Name.consume_front(
"atomic.load."))
1605 else if (Name.consume_front(
"bitcast."))
1608 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1609 else if (Name.consume_front(
"rotate."))
1611 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1612 else if (Name.consume_front(
"ptr.gen.to."))
1615 else if (Name.consume_front(
"ptr."))
1618 else if (Name.consume_front(
"ldg.global."))
1620 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1621 Name.starts_with(
"p."));
1624 .
Case(
"barrier0",
true)
1625 .
Case(
"barrier.n",
true)
1626 .
Case(
"barrier.sync.cnt",
true)
1627 .
Case(
"barrier.sync",
true)
1628 .
Case(
"barrier",
true)
1629 .
Case(
"bar.sync",
true)
1630 .
Case(
"barrier0.popc",
true)
1631 .
Case(
"barrier0.and",
true)
1632 .
Case(
"barrier0.or",
true)
1633 .
Case(
"clz.ll",
true)
1634 .
Case(
"popc.ll",
true)
1636 .
Case(
"swap.lo.hi.b64",
true)
1637 .
Case(
"tanh.approx.f32",
true)
1649 if (Name.starts_with(
"objectsize.")) {
1650 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1651 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1654 Intrinsic::objectsize, Tys);
1661 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1664 F->getParent(), Intrinsic::ptr_annotation,
1665 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1671 if (Name.consume_front(
"riscv.")) {
1674 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1675 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1676 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1677 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1680 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1693 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1694 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1703 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1704 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1705 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1706 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1711 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1723 if (Name ==
"stackprotectorcheck") {
1730 if (Name ==
"thread.pointer") {
1732 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1738 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1741 F->getParent(), Intrinsic::var_annotation,
1742 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1749 if (Name.consume_front(
"wasm.")) {
1752 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1753 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1754 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1759 F->getReturnType());
1763 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1765 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1767 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1786 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1795 auto *FT =
F->getFunctionType();
1798 std::string
Name =
F->getName().str();
1801 Name,
F->getParent());
1812 if (Result != std::nullopt) {
1825 bool CanUpgradeDebugIntrinsicsToRecords) {
1845 GV->
getName() ==
"llvm.global_dtors")) ||
1860 unsigned N =
Init->getNumOperands();
1861 std::vector<Constant *> NewCtors(
N);
1862 for (
unsigned i = 0; i !=
N; ++i) {
1865 Ctor->getAggregateElement(1),
1879 unsigned NumElts = ResultTy->getNumElements() * 8;
1883 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1893 for (
unsigned l = 0; l != NumElts; l += 16)
1894 for (
unsigned i = 0; i != 16; ++i) {
1895 unsigned Idx = NumElts + i - Shift;
1897 Idx -= NumElts - 16;
1898 Idxs[l + i] = Idx + l;
1901 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1905 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1913 unsigned NumElts = ResultTy->getNumElements() * 8;
1917 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1927 for (
unsigned l = 0; l != NumElts; l += 16)
1928 for (
unsigned i = 0; i != 16; ++i) {
1929 unsigned Idx = i + Shift;
1931 Idx += NumElts - 16;
1932 Idxs[l + i] = Idx + l;
1935 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1939 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1947 Mask = Builder.CreateBitCast(Mask, MaskTy);
1953 for (
unsigned i = 0; i != NumElts; ++i)
1955 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
1966 if (
C->isAllOnesValue())
1971 return Builder.CreateSelect(Mask, Op0, Op1);
1978 if (
C->isAllOnesValue())
1982 Mask->getType()->getIntegerBitWidth());
1983 Mask = Builder.CreateBitCast(Mask, MaskTy);
1984 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
1985 return Builder.CreateSelect(Mask, Op0, Op1);
1998 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
1999 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2004 ShiftVal &= (NumElts - 1);
2013 if (ShiftVal > 16) {
2021 for (
unsigned l = 0; l < NumElts; l += 16) {
2022 for (
unsigned i = 0; i != 16; ++i) {
2023 unsigned Idx = ShiftVal + i;
2024 if (!IsVALIGN && Idx >= 16)
2025 Idx += NumElts - 16;
2026 Indices[l + i] = Idx + l;
2031 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2037 bool ZeroMask,
bool IndexForm) {
2040 unsigned EltWidth = Ty->getScalarSizeInBits();
2041 bool IsFloat = Ty->isFPOrFPVectorTy();
2043 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2044 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2045 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2046 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2047 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2048 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2049 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2050 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2051 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2052 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2053 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2054 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2055 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2056 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2057 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2058 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2059 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2060 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2061 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2062 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2063 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2064 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2065 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2066 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2067 else if (VecWidth == 128 && EltWidth == 16)
2068 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2069 else if (VecWidth == 256 && EltWidth == 16)
2070 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2071 else if (VecWidth == 512 && EltWidth == 16)
2072 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2073 else if (VecWidth == 128 && EltWidth == 8)
2074 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2075 else if (VecWidth == 256 && EltWidth == 8)
2076 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2077 else if (VecWidth == 512 && EltWidth == 8)
2078 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2089 Value *V = Builder.CreateIntrinsic(IID, Args);
2101 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2112 bool IsRotateRight) {
2122 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2123 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2126 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2127 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2172 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2177 bool IsShiftRight,
bool ZeroMask) {
2191 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2192 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2195 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2196 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2211 const Align Alignment =
2213 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2218 if (
C->isAllOnesValue())
2219 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2224 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2230 const Align Alignment =
2239 if (
C->isAllOnesValue())
2240 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2245 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2251 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2252 {Op0, Builder.getInt1(
false)});
2267 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2268 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2269 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2270 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2271 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2274 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2275 LHS = Builder.CreateAnd(
LHS, Mask);
2276 RHS = Builder.CreateAnd(
RHS, Mask);
2293 if (!
C || !
C->isAllOnesValue())
2294 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2299 for (
unsigned i = 0; i != NumElts; ++i)
2301 for (
unsigned i = NumElts; i != 8; ++i)
2302 Indices[i] = NumElts + i % NumElts;
2303 Vec = Builder.CreateShuffleVector(Vec,
2307 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2311 unsigned CC,
bool Signed) {
2319 }
else if (CC == 7) {
2355 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2356 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2358 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2359 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2368 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2374 Name = Name.substr(12);
2379 if (Name.starts_with(
"max.p")) {
2380 if (VecWidth == 128 && EltWidth == 32)
2381 IID = Intrinsic::x86_sse_max_ps;
2382 else if (VecWidth == 128 && EltWidth == 64)
2383 IID = Intrinsic::x86_sse2_max_pd;
2384 else if (VecWidth == 256 && EltWidth == 32)
2385 IID = Intrinsic::x86_avx_max_ps_256;
2386 else if (VecWidth == 256 && EltWidth == 64)
2387 IID = Intrinsic::x86_avx_max_pd_256;
2390 }
else if (Name.starts_with(
"min.p")) {
2391 if (VecWidth == 128 && EltWidth == 32)
2392 IID = Intrinsic::x86_sse_min_ps;
2393 else if (VecWidth == 128 && EltWidth == 64)
2394 IID = Intrinsic::x86_sse2_min_pd;
2395 else if (VecWidth == 256 && EltWidth == 32)
2396 IID = Intrinsic::x86_avx_min_ps_256;
2397 else if (VecWidth == 256 && EltWidth == 64)
2398 IID = Intrinsic::x86_avx_min_pd_256;
2401 }
else if (Name.starts_with(
"pshuf.b.")) {
2402 if (VecWidth == 128)
2403 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2404 else if (VecWidth == 256)
2405 IID = Intrinsic::x86_avx2_pshuf_b;
2406 else if (VecWidth == 512)
2407 IID = Intrinsic::x86_avx512_pshuf_b_512;
2410 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2411 if (VecWidth == 128)
2412 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2413 else if (VecWidth == 256)
2414 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2415 else if (VecWidth == 512)
2416 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2419 }
else if (Name.starts_with(
"pmulh.w.")) {
2420 if (VecWidth == 128)
2421 IID = Intrinsic::x86_sse2_pmulh_w;
2422 else if (VecWidth == 256)
2423 IID = Intrinsic::x86_avx2_pmulh_w;
2424 else if (VecWidth == 512)
2425 IID = Intrinsic::x86_avx512_pmulh_w_512;
2428 }
else if (Name.starts_with(
"pmulhu.w.")) {
2429 if (VecWidth == 128)
2430 IID = Intrinsic::x86_sse2_pmulhu_w;
2431 else if (VecWidth == 256)
2432 IID = Intrinsic::x86_avx2_pmulhu_w;
2433 else if (VecWidth == 512)
2434 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2437 }
else if (Name.starts_with(
"pmaddw.d.")) {
2438 if (VecWidth == 128)
2439 IID = Intrinsic::x86_sse2_pmadd_wd;
2440 else if (VecWidth == 256)
2441 IID = Intrinsic::x86_avx2_pmadd_wd;
2442 else if (VecWidth == 512)
2443 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2446 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2447 if (VecWidth == 128)
2448 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2449 else if (VecWidth == 256)
2450 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2451 else if (VecWidth == 512)
2452 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2455 }
else if (Name.starts_with(
"packsswb.")) {
2456 if (VecWidth == 128)
2457 IID = Intrinsic::x86_sse2_packsswb_128;
2458 else if (VecWidth == 256)
2459 IID = Intrinsic::x86_avx2_packsswb;
2460 else if (VecWidth == 512)
2461 IID = Intrinsic::x86_avx512_packsswb_512;
2464 }
else if (Name.starts_with(
"packssdw.")) {
2465 if (VecWidth == 128)
2466 IID = Intrinsic::x86_sse2_packssdw_128;
2467 else if (VecWidth == 256)
2468 IID = Intrinsic::x86_avx2_packssdw;
2469 else if (VecWidth == 512)
2470 IID = Intrinsic::x86_avx512_packssdw_512;
2473 }
else if (Name.starts_with(
"packuswb.")) {
2474 if (VecWidth == 128)
2475 IID = Intrinsic::x86_sse2_packuswb_128;
2476 else if (VecWidth == 256)
2477 IID = Intrinsic::x86_avx2_packuswb;
2478 else if (VecWidth == 512)
2479 IID = Intrinsic::x86_avx512_packuswb_512;
2482 }
else if (Name.starts_with(
"packusdw.")) {
2483 if (VecWidth == 128)
2484 IID = Intrinsic::x86_sse41_packusdw;
2485 else if (VecWidth == 256)
2486 IID = Intrinsic::x86_avx2_packusdw;
2487 else if (VecWidth == 512)
2488 IID = Intrinsic::x86_avx512_packusdw_512;
2491 }
else if (Name.starts_with(
"vpermilvar.")) {
2492 if (VecWidth == 128 && EltWidth == 32)
2493 IID = Intrinsic::x86_avx_vpermilvar_ps;
2494 else if (VecWidth == 128 && EltWidth == 64)
2495 IID = Intrinsic::x86_avx_vpermilvar_pd;
2496 else if (VecWidth == 256 && EltWidth == 32)
2497 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2498 else if (VecWidth == 256 && EltWidth == 64)
2499 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2500 else if (VecWidth == 512 && EltWidth == 32)
2501 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2502 else if (VecWidth == 512 && EltWidth == 64)
2503 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2506 }
else if (Name ==
"cvtpd2dq.256") {
2507 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2508 }
else if (Name ==
"cvtpd2ps.256") {
2509 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2510 }
else if (Name ==
"cvttpd2dq.256") {
2511 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2512 }
else if (Name ==
"cvttps2dq.128") {
2513 IID = Intrinsic::x86_sse2_cvttps2dq;
2514 }
else if (Name ==
"cvttps2dq.256") {
2515 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2516 }
else if (Name.starts_with(
"permvar.")) {
2518 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2519 IID = Intrinsic::x86_avx2_permps;
2520 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2521 IID = Intrinsic::x86_avx2_permd;
2522 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2523 IID = Intrinsic::x86_avx512_permvar_df_256;
2524 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2525 IID = Intrinsic::x86_avx512_permvar_di_256;
2526 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2527 IID = Intrinsic::x86_avx512_permvar_sf_512;
2528 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2529 IID = Intrinsic::x86_avx512_permvar_si_512;
2530 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2531 IID = Intrinsic::x86_avx512_permvar_df_512;
2532 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2533 IID = Intrinsic::x86_avx512_permvar_di_512;
2534 else if (VecWidth == 128 && EltWidth == 16)
2535 IID = Intrinsic::x86_avx512_permvar_hi_128;
2536 else if (VecWidth == 256 && EltWidth == 16)
2537 IID = Intrinsic::x86_avx512_permvar_hi_256;
2538 else if (VecWidth == 512 && EltWidth == 16)
2539 IID = Intrinsic::x86_avx512_permvar_hi_512;
2540 else if (VecWidth == 128 && EltWidth == 8)
2541 IID = Intrinsic::x86_avx512_permvar_qi_128;
2542 else if (VecWidth == 256 && EltWidth == 8)
2543 IID = Intrinsic::x86_avx512_permvar_qi_256;
2544 else if (VecWidth == 512 && EltWidth == 8)
2545 IID = Intrinsic::x86_avx512_permvar_qi_512;
2548 }
else if (Name.starts_with(
"dbpsadbw.")) {
2549 if (VecWidth == 128)
2550 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2551 else if (VecWidth == 256)
2552 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2553 else if (VecWidth == 512)
2554 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2557 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2558 if (VecWidth == 128)
2559 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2560 else if (VecWidth == 256)
2561 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2562 else if (VecWidth == 512)
2563 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2566 }
else if (Name.starts_with(
"conflict.")) {
2567 if (Name[9] ==
'd' && VecWidth == 128)
2568 IID = Intrinsic::x86_avx512_conflict_d_128;
2569 else if (Name[9] ==
'd' && VecWidth == 256)
2570 IID = Intrinsic::x86_avx512_conflict_d_256;
2571 else if (Name[9] ==
'd' && VecWidth == 512)
2572 IID = Intrinsic::x86_avx512_conflict_d_512;
2573 else if (Name[9] ==
'q' && VecWidth == 128)
2574 IID = Intrinsic::x86_avx512_conflict_q_128;
2575 else if (Name[9] ==
'q' && VecWidth == 256)
2576 IID = Intrinsic::x86_avx512_conflict_q_256;
2577 else if (Name[9] ==
'q' && VecWidth == 512)
2578 IID = Intrinsic::x86_avx512_conflict_q_512;
2581 }
else if (Name.starts_with(
"pavg.")) {
2582 if (Name[5] ==
'b' && VecWidth == 128)
2583 IID = Intrinsic::x86_sse2_pavg_b;
2584 else if (Name[5] ==
'b' && VecWidth == 256)
2585 IID = Intrinsic::x86_avx2_pavg_b;
2586 else if (Name[5] ==
'b' && VecWidth == 512)
2587 IID = Intrinsic::x86_avx512_pavg_b_512;
2588 else if (Name[5] ==
'w' && VecWidth == 128)
2589 IID = Intrinsic::x86_sse2_pavg_w;
2590 else if (Name[5] ==
'w' && VecWidth == 256)
2591 IID = Intrinsic::x86_avx2_pavg_w;
2592 else if (Name[5] ==
'w' && VecWidth == 512)
2593 IID = Intrinsic::x86_avx512_pavg_w_512;
2602 Rep = Builder.CreateIntrinsic(IID, Args);
2613 if (AsmStr->find(
"mov\tfp") == 0 &&
2614 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2615 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2616 AsmStr->replace(Pos, 1,
";");
2622 Value *Rep =
nullptr;
2624 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2626 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2627 Value *Cmp = Builder.CreateICmpSGE(
2629 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2630 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2631 Type *Ty = (Name ==
"abs.bf16")
2635 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2636 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2637 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2638 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2639 : Intrinsic::nvvm_fabs;
2640 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2641 }
else if (Name.consume_front(
"ex2.approx.")) {
2643 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2644 : Intrinsic::nvvm_ex2_approx;
2645 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2646 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2647 Name.starts_with(
"atomic.load.add.f64.p")) {
2652 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2653 Name.starts_with(
"atomic.load.dec.32.p")) {
2658 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2660 }
else if (Name.consume_front(
"max.") &&
2661 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2662 Name ==
"ui" || Name ==
"ull")) {
2665 Value *Cmp = Name.starts_with(
"u")
2666 ? Builder.CreateICmpUGE(Arg0, Arg1,
"max.cond")
2667 : Builder.CreateICmpSGE(Arg0, Arg1,
"max.cond");
2668 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"max");
2669 }
else if (Name.consume_front(
"min.") &&
2670 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2671 Name ==
"ui" || Name ==
"ull")) {
2674 Value *Cmp = Name.starts_with(
"u")
2675 ? Builder.CreateICmpULE(Arg0, Arg1,
"min.cond")
2676 : Builder.CreateICmpSLE(Arg0, Arg1,
"min.cond");
2677 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"min");
2678 }
else if (Name ==
"clz.ll") {
2681 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2682 {Arg, Builder.getFalse()},
2684 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2685 }
else if (Name ==
"popc.ll") {
2689 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2690 Arg,
nullptr,
"ctpop");
2691 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2692 }
else if (Name ==
"h2f") {
2693 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2696 }
else if (Name.consume_front(
"bitcast.") &&
2697 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2700 }
else if (Name ==
"rotate.b32") {
2703 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2704 {Arg, Arg, ShiftAmt});
2705 }
else if (Name ==
"rotate.b64") {
2709 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2710 {Arg, Arg, ZExtShiftAmt});
2711 }
else if (Name ==
"rotate.right.b64") {
2715 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2716 {Arg, Arg, ZExtShiftAmt});
2717 }
else if (Name ==
"swap.lo.hi.b64") {
2720 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2721 {Arg, Arg, Builder.getInt64(32)});
2722 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2725 Name.starts_with(
".to.gen"))) {
2727 }
else if (Name.consume_front(
"ldg.global")) {
2731 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2734 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2736 }
else if (Name ==
"tanh.approx.f32") {
2740 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2742 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2744 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2745 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2747 }
else if (Name ==
"barrier") {
2748 Rep = Builder.CreateIntrinsic(
2749 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2751 }
else if (Name ==
"barrier.sync") {
2752 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2754 }
else if (Name ==
"barrier.sync.cnt") {
2755 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2757 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2758 Name ==
"barrier0.or") {
2760 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2764 .
Case(
"barrier0.popc",
2765 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2766 .
Case(
"barrier0.and",
2767 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2768 .
Case(
"barrier0.or",
2769 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2770 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2771 Rep = Builder.CreateZExt(Bar, CI->
getType());
2775 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2785 ? Builder.CreateBitCast(Arg, NewType)
2788 Rep = Builder.CreateCall(NewFn, Args);
2789 if (
F->getReturnType()->isIntegerTy())
2790 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2800 Value *Rep =
nullptr;
2802 if (Name.starts_with(
"sse4a.movnt.")) {
2814 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2817 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2818 }
else if (Name.starts_with(
"avx.movnt.") ||
2819 Name.starts_with(
"avx512.storent.")) {
2831 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2832 }
else if (Name ==
"sse2.storel.dq") {
2837 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2838 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2839 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2840 }
else if (Name.starts_with(
"sse.storeu.") ||
2841 Name.starts_with(
"sse2.storeu.") ||
2842 Name.starts_with(
"avx.storeu.")) {
2845 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2846 }
else if (Name ==
"avx512.mask.store.ss") {
2850 }
else if (Name.starts_with(
"avx512.mask.store")) {
2852 bool Aligned = Name[17] !=
'u';
2855 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2858 bool CmpEq = Name[9] ==
'e';
2861 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2862 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2869 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2870 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2872 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2873 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2874 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2875 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2876 Name.starts_with(
"sse2.sqrt.p") ||
2877 Name.starts_with(
"sse.sqrt.p")) {
2878 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2879 {CI->getArgOperand(0)});
2880 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2884 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2885 : Intrinsic::x86_avx512_sqrt_pd_512;
2888 Rep = Builder.CreateIntrinsic(IID, Args);
2890 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2891 {CI->getArgOperand(0)});
2895 }
else if (Name.starts_with(
"avx512.ptestm") ||
2896 Name.starts_with(
"avx512.ptestnm")) {
2900 Rep = Builder.CreateAnd(Op0, Op1);
2906 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2908 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2911 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2914 }
else if (Name.starts_with(
"avx512.kunpck")) {
2919 for (
unsigned i = 0; i != NumElts; ++i)
2928 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2929 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2930 }
else if (Name ==
"avx512.kand.w") {
2933 Rep = Builder.CreateAnd(
LHS,
RHS);
2934 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2935 }
else if (Name ==
"avx512.kandn.w") {
2938 LHS = Builder.CreateNot(
LHS);
2939 Rep = Builder.CreateAnd(
LHS,
RHS);
2940 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2941 }
else if (Name ==
"avx512.kor.w") {
2944 Rep = Builder.CreateOr(
LHS,
RHS);
2945 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2946 }
else if (Name ==
"avx512.kxor.w") {
2949 Rep = Builder.CreateXor(
LHS,
RHS);
2950 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2951 }
else if (Name ==
"avx512.kxnor.w") {
2954 LHS = Builder.CreateNot(
LHS);
2955 Rep = Builder.CreateXor(
LHS,
RHS);
2956 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2957 }
else if (Name ==
"avx512.knot.w") {
2959 Rep = Builder.CreateNot(Rep);
2960 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2961 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2964 Rep = Builder.CreateOr(
LHS,
RHS);
2965 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2967 if (Name[14] ==
'c')
2971 Rep = Builder.CreateICmpEQ(Rep,
C);
2972 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2973 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
2974 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
2975 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
2976 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
2979 ConstantInt::get(I32Ty, 0));
2981 ConstantInt::get(I32Ty, 0));
2983 if (Name.contains(
".add."))
2984 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2985 else if (Name.contains(
".sub."))
2986 EltOp = Builder.CreateFSub(Elt0, Elt1);
2987 else if (Name.contains(
".mul."))
2988 EltOp = Builder.CreateFMul(Elt0, Elt1);
2990 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2991 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
2992 ConstantInt::get(I32Ty, 0));
2993 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
2995 bool CmpEq = Name[16] ==
'e';
2997 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3005 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3008 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3011 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3018 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3023 if (VecWidth == 128 && EltWidth == 32)
3024 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3025 else if (VecWidth == 256 && EltWidth == 32)
3026 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3027 else if (VecWidth == 512 && EltWidth == 32)
3028 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3029 else if (VecWidth == 128 && EltWidth == 64)
3030 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3031 else if (VecWidth == 256 && EltWidth == 64)
3032 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3033 else if (VecWidth == 512 && EltWidth == 64)
3034 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3041 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3043 Type *OpTy = Args[0]->getType();
3047 if (VecWidth == 128 && EltWidth == 32)
3048 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3049 else if (VecWidth == 256 && EltWidth == 32)
3050 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3051 else if (VecWidth == 512 && EltWidth == 32)
3052 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3053 else if (VecWidth == 128 && EltWidth == 64)
3054 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3055 else if (VecWidth == 256 && EltWidth == 64)
3056 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3057 else if (VecWidth == 512 && EltWidth == 64)
3058 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3063 if (VecWidth == 512)
3065 Args.push_back(Mask);
3067 Rep = Builder.CreateIntrinsic(IID, Args);
3068 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3072 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3075 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3076 Name.starts_with(
"avx512.cvtw2mask.") ||
3077 Name.starts_with(
"avx512.cvtd2mask.") ||
3078 Name.starts_with(
"avx512.cvtq2mask.")) {
3083 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3084 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3085 Name.starts_with(
"avx512.mask.pabs")) {
3087 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3088 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3089 Name.starts_with(
"avx512.mask.pmaxs")) {
3091 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3092 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3093 Name.starts_with(
"avx512.mask.pmaxu")) {
3095 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3096 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3097 Name.starts_with(
"avx512.mask.pmins")) {
3099 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3100 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3101 Name.starts_with(
"avx512.mask.pminu")) {
3103 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3104 Name ==
"avx512.pmulu.dq.512" ||
3105 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3107 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3108 Name ==
"avx512.pmul.dq.512" ||
3109 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3111 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3112 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3117 }
else if (Name ==
"avx512.cvtusi2sd") {
3122 }
else if (Name ==
"sse2.cvtss2sd") {
3124 Rep = Builder.CreateFPExt(
3127 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3128 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3129 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3130 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3131 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3132 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3133 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3134 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3135 Name ==
"avx512.mask.cvtqq2ps.256" ||
3136 Name ==
"avx512.mask.cvtqq2ps.512" ||
3137 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3138 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3139 Name ==
"avx.cvt.ps2.pd.256" ||
3140 Name ==
"avx512.mask.cvtps2pd.128" ||
3141 Name ==
"avx512.mask.cvtps2pd.256") {
3146 unsigned NumDstElts = DstTy->getNumElements();
3148 assert(NumDstElts == 2 &&
"Unexpected vector size");
3149 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3152 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3153 bool IsUnsigned = Name.contains(
"cvtu");
3155 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3159 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3160 : Intrinsic::x86_avx512_sitofp_round;
3161 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3164 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3165 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3171 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3172 Name.starts_with(
"vcvtph2ps.")) {
3176 unsigned NumDstElts = DstTy->getNumElements();
3177 if (NumDstElts != SrcTy->getNumElements()) {
3178 assert(NumDstElts == 4 &&
"Unexpected vector size");
3179 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3181 Rep = Builder.CreateBitCast(
3183 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3187 }
else if (Name.starts_with(
"avx512.mask.load")) {
3189 bool Aligned = Name[16] !=
'u';
3192 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3195 ResultTy->getNumElements());
3197 Rep = Builder.CreateIntrinsic(
3198 Intrinsic::masked_expandload, ResultTy,
3200 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3206 Rep = Builder.CreateIntrinsic(
3207 Intrinsic::masked_compressstore, ResultTy,
3209 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3210 Name.starts_with(
"avx512.mask.expand.")) {
3214 ResultTy->getNumElements());
3216 bool IsCompress = Name[12] ==
'c';
3217 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3218 : Intrinsic::x86_avx512_mask_expand;
3219 Rep = Builder.CreateIntrinsic(
3221 }
else if (Name.starts_with(
"xop.vpcom")) {
3223 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3224 Name.ends_with(
"uq"))
3226 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3227 Name.ends_with(
"d") || Name.ends_with(
"q"))
3236 Name = Name.substr(9);
3237 if (Name.starts_with(
"lt"))
3239 else if (Name.starts_with(
"le"))
3241 else if (Name.starts_with(
"gt"))
3243 else if (Name.starts_with(
"ge"))
3245 else if (Name.starts_with(
"eq"))
3247 else if (Name.starts_with(
"ne"))
3249 else if (Name.starts_with(
"false"))
3251 else if (Name.starts_with(
"true"))
3258 }
else if (Name.starts_with(
"xop.vpcmov")) {
3260 Value *NotSel = Builder.CreateNot(Sel);
3263 Rep = Builder.CreateOr(Sel0, Sel1);
3264 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3265 Name.starts_with(
"avx512.mask.prol")) {
3267 }
else if (Name.starts_with(
"avx512.pror") ||
3268 Name.starts_with(
"avx512.mask.pror")) {
3270 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3271 Name.starts_with(
"avx512.mask.vpshld") ||
3272 Name.starts_with(
"avx512.maskz.vpshld")) {
3273 bool ZeroMask = Name[11] ==
'z';
3275 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3276 Name.starts_with(
"avx512.mask.vpshrd") ||
3277 Name.starts_with(
"avx512.maskz.vpshrd")) {
3278 bool ZeroMask = Name[11] ==
'z';
3280 }
else if (Name ==
"sse42.crc32.64.8") {
3283 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3285 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3286 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3287 Name.starts_with(
"avx512.vbroadcast.s")) {
3290 Type *EltTy = VecTy->getElementType();
3291 unsigned EltNum = VecTy->getNumElements();
3295 for (
unsigned I = 0;
I < EltNum; ++
I)
3296 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3297 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3298 Name.starts_with(
"sse41.pmovzx") ||
3299 Name.starts_with(
"avx2.pmovsx") ||
3300 Name.starts_with(
"avx2.pmovzx") ||
3301 Name.starts_with(
"avx512.mask.pmovsx") ||
3302 Name.starts_with(
"avx512.mask.pmovzx")) {
3304 unsigned NumDstElts = DstTy->getNumElements();
3308 for (
unsigned i = 0; i != NumDstElts; ++i)
3313 bool DoSext = Name.contains(
"pmovsx");
3315 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3320 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3321 Name ==
"avx512.mask.pmov.qd.512" ||
3322 Name ==
"avx512.mask.pmov.wb.256" ||
3323 Name ==
"avx512.mask.pmov.wb.512") {
3328 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3329 Name ==
"avx2.vbroadcasti128") {
3335 if (NumSrcElts == 2)
3336 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3338 Rep = Builder.CreateShuffleVector(Load,
3340 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3341 Name.starts_with(
"avx512.mask.shuf.f")) {
3346 unsigned ControlBitsMask = NumLanes - 1;
3347 unsigned NumControlBits = NumLanes / 2;
3350 for (
unsigned l = 0; l != NumLanes; ++l) {
3351 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3353 if (l >= NumLanes / 2)
3354 LaneMask += NumLanes;
3355 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3356 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3362 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3363 Name.starts_with(
"avx512.mask.broadcasti")) {
3366 unsigned NumDstElts =
3370 for (
unsigned i = 0; i != NumDstElts; ++i)
3371 ShuffleMask[i] = i % NumSrcElts;
3377 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3378 Name.starts_with(
"avx2.vbroadcast") ||
3379 Name.starts_with(
"avx512.pbroadcast") ||
3380 Name.starts_with(
"avx512.mask.broadcast.s")) {
3387 Rep = Builder.CreateShuffleVector(
Op, M);
3392 }
else if (Name.starts_with(
"sse2.padds.") ||
3393 Name.starts_with(
"avx2.padds.") ||
3394 Name.starts_with(
"avx512.padds.") ||
3395 Name.starts_with(
"avx512.mask.padds.")) {
3397 }
else if (Name.starts_with(
"sse2.psubs.") ||
3398 Name.starts_with(
"avx2.psubs.") ||
3399 Name.starts_with(
"avx512.psubs.") ||
3400 Name.starts_with(
"avx512.mask.psubs.")) {
3402 }
else if (Name.starts_with(
"sse2.paddus.") ||
3403 Name.starts_with(
"avx2.paddus.") ||
3404 Name.starts_with(
"avx512.mask.paddus.")) {
3406 }
else if (Name.starts_with(
"sse2.psubus.") ||
3407 Name.starts_with(
"avx2.psubus.") ||
3408 Name.starts_with(
"avx512.mask.psubus.")) {
3410 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3415 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3419 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3424 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3429 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3430 Name ==
"avx512.psll.dq.512") {
3434 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3435 Name ==
"avx512.psrl.dq.512") {
3439 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3440 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3441 Name.starts_with(
"avx2.pblendd.")) {
3446 unsigned NumElts = VecTy->getNumElements();
3449 for (
unsigned i = 0; i != NumElts; ++i)
3450 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3452 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3453 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3454 Name ==
"avx2.vinserti128" ||
3455 Name.starts_with(
"avx512.mask.insert")) {
3459 unsigned DstNumElts =
3461 unsigned SrcNumElts =
3463 unsigned Scale = DstNumElts / SrcNumElts;
3470 for (
unsigned i = 0; i != SrcNumElts; ++i)
3472 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3473 Idxs[i] = SrcNumElts;
3474 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3488 for (
unsigned i = 0; i != DstNumElts; ++i)
3491 for (
unsigned i = 0; i != SrcNumElts; ++i)
3492 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3493 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3499 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3500 Name ==
"avx2.vextracti128" ||
3501 Name.starts_with(
"avx512.mask.vextract")) {
3504 unsigned DstNumElts =
3506 unsigned SrcNumElts =
3508 unsigned Scale = SrcNumElts / DstNumElts;
3515 for (
unsigned i = 0; i != DstNumElts; ++i) {
3516 Idxs[i] = i + (Imm * DstNumElts);
3518 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3524 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3525 Name.starts_with(
"avx512.mask.perm.di.")) {
3529 unsigned NumElts = VecTy->getNumElements();
3532 for (
unsigned i = 0; i != NumElts; ++i)
3533 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3535 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3540 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3552 unsigned HalfSize = NumElts / 2;
3564 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3565 for (
unsigned i = 0; i < HalfSize; ++i)
3566 ShuffleMask[i] = StartIndex + i;
3569 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3570 for (
unsigned i = 0; i < HalfSize; ++i)
3571 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3573 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3575 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3576 Name.starts_with(
"avx512.mask.vpermil.p") ||
3577 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3581 unsigned NumElts = VecTy->getNumElements();
3583 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3584 unsigned IdxMask = ((1 << IdxSize) - 1);
3590 for (
unsigned i = 0; i != NumElts; ++i)
3591 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3593 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3598 }
else if (Name ==
"sse2.pshufl.w" ||
3599 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3605 for (
unsigned l = 0; l != NumElts; l += 8) {
3606 for (
unsigned i = 0; i != 4; ++i)
3607 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3608 for (
unsigned i = 4; i != 8; ++i)
3609 Idxs[i + l] = i + l;
3612 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3617 }
else if (Name ==
"sse2.pshufh.w" ||
3618 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3624 for (
unsigned l = 0; l != NumElts; l += 8) {
3625 for (
unsigned i = 0; i != 4; ++i)
3626 Idxs[i + l] = i + l;
3627 for (
unsigned i = 0; i != 4; ++i)
3628 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3631 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3636 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3643 unsigned HalfLaneElts = NumLaneElts / 2;
3646 for (
unsigned i = 0; i != NumElts; ++i) {
3648 Idxs[i] = i - (i % NumLaneElts);
3650 if ((i % NumLaneElts) >= HalfLaneElts)
3654 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3657 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3661 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3662 Name.starts_with(
"avx512.mask.movshdup") ||
3663 Name.starts_with(
"avx512.mask.movsldup")) {
3669 if (Name.starts_with(
"avx512.mask.movshdup."))
3673 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3674 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3675 Idxs[i + l + 0] = i + l +
Offset;
3676 Idxs[i + l + 1] = i + l +
Offset;
3679 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3683 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3684 Name.starts_with(
"avx512.mask.unpckl.")) {
3691 for (
int l = 0; l != NumElts; l += NumLaneElts)
3692 for (
int i = 0; i != NumLaneElts; ++i)
3693 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3695 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3699 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3700 Name.starts_with(
"avx512.mask.unpckh.")) {
3707 for (
int l = 0; l != NumElts; l += NumLaneElts)
3708 for (
int i = 0; i != NumLaneElts; ++i)
3709 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3711 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3715 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3716 Name.starts_with(
"avx512.mask.pand.")) {
3719 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3721 Rep = Builder.CreateBitCast(Rep, FTy);
3724 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3725 Name.starts_with(
"avx512.mask.pandn.")) {
3728 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3729 Rep = Builder.CreateAnd(Rep,
3731 Rep = Builder.CreateBitCast(Rep, FTy);
3734 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3735 Name.starts_with(
"avx512.mask.por.")) {
3738 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3740 Rep = Builder.CreateBitCast(Rep, FTy);
3743 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3744 Name.starts_with(
"avx512.mask.pxor.")) {
3747 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3749 Rep = Builder.CreateBitCast(Rep, FTy);
3752 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3756 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3760 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3764 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3765 if (Name.ends_with(
".512")) {
3767 if (Name[17] ==
's')
3768 IID = Intrinsic::x86_avx512_add_ps_512;
3770 IID = Intrinsic::x86_avx512_add_pd_512;
3772 Rep = Builder.CreateIntrinsic(
3780 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3781 if (Name.ends_with(
".512")) {
3783 if (Name[17] ==
's')
3784 IID = Intrinsic::x86_avx512_div_ps_512;
3786 IID = Intrinsic::x86_avx512_div_pd_512;
3788 Rep = Builder.CreateIntrinsic(
3796 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3797 if (Name.ends_with(
".512")) {
3799 if (Name[17] ==
's')
3800 IID = Intrinsic::x86_avx512_mul_ps_512;
3802 IID = Intrinsic::x86_avx512_mul_pd_512;
3804 Rep = Builder.CreateIntrinsic(
3812 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3813 if (Name.ends_with(
".512")) {
3815 if (Name[17] ==
's')
3816 IID = Intrinsic::x86_avx512_sub_ps_512;
3818 IID = Intrinsic::x86_avx512_sub_pd_512;
3820 Rep = Builder.CreateIntrinsic(
3828 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3829 Name.starts_with(
"avx512.mask.min.p")) &&
3830 Name.drop_front(18) ==
".512") {
3831 bool IsDouble = Name[17] ==
'd';
3832 bool IsMin = Name[13] ==
'i';
3834 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3835 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3838 Rep = Builder.CreateIntrinsic(
3843 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3845 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3846 {CI->getArgOperand(0), Builder.getInt1(false)});
3849 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3850 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3851 bool IsVariable = Name[16] ==
'v';
3852 char Size = Name[16] ==
'.' ? Name[17]
3853 : Name[17] ==
'.' ? Name[18]
3854 : Name[18] ==
'.' ? Name[19]
3858 if (IsVariable && Name[17] !=
'.') {
3859 if (
Size ==
'd' && Name[17] ==
'2')
3860 IID = Intrinsic::x86_avx2_psllv_q;
3861 else if (
Size ==
'd' && Name[17] ==
'4')
3862 IID = Intrinsic::x86_avx2_psllv_q_256;
3863 else if (
Size ==
's' && Name[17] ==
'4')
3864 IID = Intrinsic::x86_avx2_psllv_d;
3865 else if (
Size ==
's' && Name[17] ==
'8')
3866 IID = Intrinsic::x86_avx2_psllv_d_256;
3867 else if (
Size ==
'h' && Name[17] ==
'8')
3868 IID = Intrinsic::x86_avx512_psllv_w_128;
3869 else if (
Size ==
'h' && Name[17] ==
'1')
3870 IID = Intrinsic::x86_avx512_psllv_w_256;
3871 else if (Name[17] ==
'3' && Name[18] ==
'2')
3872 IID = Intrinsic::x86_avx512_psllv_w_512;
3875 }
else if (Name.ends_with(
".128")) {
3877 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3878 : Intrinsic::x86_sse2_psll_d;
3879 else if (
Size ==
'q')
3880 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3881 : Intrinsic::x86_sse2_psll_q;
3882 else if (
Size ==
'w')
3883 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3884 : Intrinsic::x86_sse2_psll_w;
3887 }
else if (Name.ends_with(
".256")) {
3889 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3890 : Intrinsic::x86_avx2_psll_d;
3891 else if (
Size ==
'q')
3892 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3893 : Intrinsic::x86_avx2_psll_q;
3894 else if (
Size ==
'w')
3895 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3896 : Intrinsic::x86_avx2_psll_w;
3901 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3902 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3903 : Intrinsic::x86_avx512_psll_d_512;
3904 else if (
Size ==
'q')
3905 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3906 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3907 : Intrinsic::x86_avx512_psll_q_512;
3908 else if (
Size ==
'w')
3909 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3910 : Intrinsic::x86_avx512_psll_w_512;
3916 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3917 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3918 bool IsVariable = Name[16] ==
'v';
3919 char Size = Name[16] ==
'.' ? Name[17]
3920 : Name[17] ==
'.' ? Name[18]
3921 : Name[18] ==
'.' ? Name[19]
3925 if (IsVariable && Name[17] !=
'.') {
3926 if (
Size ==
'd' && Name[17] ==
'2')
3927 IID = Intrinsic::x86_avx2_psrlv_q;
3928 else if (
Size ==
'd' && Name[17] ==
'4')
3929 IID = Intrinsic::x86_avx2_psrlv_q_256;
3930 else if (
Size ==
's' && Name[17] ==
'4')
3931 IID = Intrinsic::x86_avx2_psrlv_d;
3932 else if (
Size ==
's' && Name[17] ==
'8')
3933 IID = Intrinsic::x86_avx2_psrlv_d_256;
3934 else if (
Size ==
'h' && Name[17] ==
'8')
3935 IID = Intrinsic::x86_avx512_psrlv_w_128;
3936 else if (
Size ==
'h' && Name[17] ==
'1')
3937 IID = Intrinsic::x86_avx512_psrlv_w_256;
3938 else if (Name[17] ==
'3' && Name[18] ==
'2')
3939 IID = Intrinsic::x86_avx512_psrlv_w_512;
3942 }
else if (Name.ends_with(
".128")) {
3944 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3945 : Intrinsic::x86_sse2_psrl_d;
3946 else if (
Size ==
'q')
3947 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3948 : Intrinsic::x86_sse2_psrl_q;
3949 else if (
Size ==
'w')
3950 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3951 : Intrinsic::x86_sse2_psrl_w;
3954 }
else if (Name.ends_with(
".256")) {
3956 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3957 : Intrinsic::x86_avx2_psrl_d;
3958 else if (
Size ==
'q')
3959 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3960 : Intrinsic::x86_avx2_psrl_q;
3961 else if (
Size ==
'w')
3962 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3963 : Intrinsic::x86_avx2_psrl_w;
3968 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3969 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3970 : Intrinsic::x86_avx512_psrl_d_512;
3971 else if (
Size ==
'q')
3972 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3973 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3974 : Intrinsic::x86_avx512_psrl_q_512;
3975 else if (
Size ==
'w')
3976 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3977 : Intrinsic::x86_avx512_psrl_w_512;
3983 }
else if (Name.starts_with(
"avx512.mask.psra")) {
3984 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3985 bool IsVariable = Name[16] ==
'v';
3986 char Size = Name[16] ==
'.' ? Name[17]
3987 : Name[17] ==
'.' ? Name[18]
3988 : Name[18] ==
'.' ? Name[19]
3992 if (IsVariable && Name[17] !=
'.') {
3993 if (
Size ==
's' && Name[17] ==
'4')
3994 IID = Intrinsic::x86_avx2_psrav_d;
3995 else if (
Size ==
's' && Name[17] ==
'8')
3996 IID = Intrinsic::x86_avx2_psrav_d_256;
3997 else if (
Size ==
'h' && Name[17] ==
'8')
3998 IID = Intrinsic::x86_avx512_psrav_w_128;
3999 else if (
Size ==
'h' && Name[17] ==
'1')
4000 IID = Intrinsic::x86_avx512_psrav_w_256;
4001 else if (Name[17] ==
'3' && Name[18] ==
'2')
4002 IID = Intrinsic::x86_avx512_psrav_w_512;
4005 }
else if (Name.ends_with(
".128")) {
4007 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4008 : Intrinsic::x86_sse2_psra_d;
4009 else if (
Size ==
'q')
4010 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4011 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4012 : Intrinsic::x86_avx512_psra_q_128;
4013 else if (
Size ==
'w')
4014 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4015 : Intrinsic::x86_sse2_psra_w;
4018 }
else if (Name.ends_with(
".256")) {
4020 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4021 : Intrinsic::x86_avx2_psra_d;
4022 else if (
Size ==
'q')
4023 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4024 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4025 : Intrinsic::x86_avx512_psra_q_256;
4026 else if (
Size ==
'w')
4027 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4028 : Intrinsic::x86_avx2_psra_w;
4033 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4034 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4035 : Intrinsic::x86_avx512_psra_d_512;
4036 else if (
Size ==
'q')
4037 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4038 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4039 : Intrinsic::x86_avx512_psra_q_512;
4040 else if (
Size ==
'w')
4041 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4042 : Intrinsic::x86_avx512_psra_w_512;
4048 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4050 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4052 }
else if (Name.ends_with(
".movntdqa")) {
4056 LoadInst *LI = Builder.CreateAlignedLoad(
4061 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4062 Name.starts_with(
"fma.vfmsub.") ||
4063 Name.starts_with(
"fma.vfnmadd.") ||
4064 Name.starts_with(
"fma.vfnmsub.")) {
4065 bool NegMul = Name[6] ==
'n';
4066 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4067 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4078 if (NegMul && !IsScalar)
4079 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4080 if (NegMul && IsScalar)
4081 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4083 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4085 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4089 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4097 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4101 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4102 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4103 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4104 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4105 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4106 bool IsMask3 = Name[11] ==
'3';
4107 bool IsMaskZ = Name[11] ==
'z';
4109 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4110 bool NegMul = Name[2] ==
'n';
4111 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4117 if (NegMul && (IsMask3 || IsMaskZ))
4118 A = Builder.CreateFNeg(
A);
4119 if (NegMul && !(IsMask3 || IsMaskZ))
4120 B = Builder.CreateFNeg(
B);
4122 C = Builder.CreateFNeg(
C);
4124 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4125 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4126 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4133 if (Name.back() ==
'd')
4134 IID = Intrinsic::x86_avx512_vfmadd_f64;
4136 IID = Intrinsic::x86_avx512_vfmadd_f32;
4137 Rep = Builder.CreateIntrinsic(IID,
Ops);
4139 Rep = Builder.CreateFMA(
A,
B,
C);
4148 if (NegAcc && IsMask3)
4153 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4155 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4156 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4157 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4158 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4159 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4160 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4161 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4162 bool IsMask3 = Name[11] ==
'3';
4163 bool IsMaskZ = Name[11] ==
'z';
4165 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4166 bool NegMul = Name[2] ==
'n';
4167 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4173 if (NegMul && (IsMask3 || IsMaskZ))
4174 A = Builder.CreateFNeg(
A);
4175 if (NegMul && !(IsMask3 || IsMaskZ))
4176 B = Builder.CreateFNeg(
B);
4178 C = Builder.CreateFNeg(
C);
4185 if (Name[Name.size() - 5] ==
's')
4186 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4188 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4192 Rep = Builder.CreateFMA(
A,
B,
C);
4200 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4204 if (VecWidth == 128 && EltWidth == 32)
4205 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4206 else if (VecWidth == 256 && EltWidth == 32)
4207 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4208 else if (VecWidth == 128 && EltWidth == 64)
4209 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4210 else if (VecWidth == 256 && EltWidth == 64)
4211 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4217 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4218 Rep = Builder.CreateIntrinsic(IID,
Ops);
4219 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4220 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4221 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4222 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4223 bool IsMask3 = Name[11] ==
'3';
4224 bool IsMaskZ = Name[11] ==
'z';
4226 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4227 bool IsSubAdd = Name[3] ==
's';
4231 if (Name[Name.size() - 5] ==
's')
4232 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4234 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4239 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4241 Rep = Builder.CreateIntrinsic(IID,
Ops);
4250 Value *Odd = Builder.CreateCall(FMA,
Ops);
4251 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4252 Value *Even = Builder.CreateCall(FMA,
Ops);
4258 for (
int i = 0; i != NumElts; ++i)
4259 Idxs[i] = i + (i % 2) * NumElts;
4261 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4269 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4270 Name.starts_with(
"avx512.maskz.pternlog.")) {
4271 bool ZeroMask = Name[11] ==
'z';
4275 if (VecWidth == 128 && EltWidth == 32)
4276 IID = Intrinsic::x86_avx512_pternlog_d_128;
4277 else if (VecWidth == 256 && EltWidth == 32)
4278 IID = Intrinsic::x86_avx512_pternlog_d_256;
4279 else if (VecWidth == 512 && EltWidth == 32)
4280 IID = Intrinsic::x86_avx512_pternlog_d_512;
4281 else if (VecWidth == 128 && EltWidth == 64)
4282 IID = Intrinsic::x86_avx512_pternlog_q_128;
4283 else if (VecWidth == 256 && EltWidth == 64)
4284 IID = Intrinsic::x86_avx512_pternlog_q_256;
4285 else if (VecWidth == 512 && EltWidth == 64)
4286 IID = Intrinsic::x86_avx512_pternlog_q_512;
4292 Rep = Builder.CreateIntrinsic(IID, Args);
4296 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4297 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4298 bool ZeroMask = Name[11] ==
'z';
4299 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4302 if (VecWidth == 128 && !
High)
4303 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4304 else if (VecWidth == 256 && !
High)
4305 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4306 else if (VecWidth == 512 && !
High)
4307 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4308 else if (VecWidth == 128 &&
High)
4309 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4310 else if (VecWidth == 256 &&
High)
4311 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4312 else if (VecWidth == 512 &&
High)
4313 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4319 Rep = Builder.CreateIntrinsic(IID, Args);
4323 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4324 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4325 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4326 bool ZeroMask = Name[11] ==
'z';
4327 bool IndexForm = Name[17] ==
'i';
4329 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4330 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4331 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4332 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4333 bool ZeroMask = Name[11] ==
'z';
4334 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4337 if (VecWidth == 128 && !IsSaturating)
4338 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4339 else if (VecWidth == 256 && !IsSaturating)
4340 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4341 else if (VecWidth == 512 && !IsSaturating)
4342 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4343 else if (VecWidth == 128 && IsSaturating)
4344 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4345 else if (VecWidth == 256 && IsSaturating)
4346 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4347 else if (VecWidth == 512 && IsSaturating)
4348 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4358 if (Args[1]->
getType()->isVectorTy() &&
4361 ->isIntegerTy(32) &&
4362 Args[2]->
getType()->isVectorTy() &&
4365 ->isIntegerTy(32)) {
4366 Type *NewArgType =
nullptr;
4367 if (VecWidth == 128)
4369 else if (VecWidth == 256)
4371 else if (VecWidth == 512)
4376 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4377 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4380 Rep = Builder.CreateIntrinsic(IID, Args);
4384 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4385 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4386 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4387 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4388 bool ZeroMask = Name[11] ==
'z';
4389 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4392 if (VecWidth == 128 && !IsSaturating)
4393 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4394 else if (VecWidth == 256 && !IsSaturating)
4395 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4396 else if (VecWidth == 512 && !IsSaturating)
4397 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4398 else if (VecWidth == 128 && IsSaturating)
4399 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4400 else if (VecWidth == 256 && IsSaturating)
4401 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4402 else if (VecWidth == 512 && IsSaturating)
4403 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4413 if (Args[1]->
getType()->isVectorTy() &&
4416 ->isIntegerTy(32) &&
4417 Args[2]->
getType()->isVectorTy() &&
4420 ->isIntegerTy(32)) {
4421 Type *NewArgType =
nullptr;
4422 if (VecWidth == 128)
4424 else if (VecWidth == 256)
4426 else if (VecWidth == 512)
4431 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4432 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4435 Rep = Builder.CreateIntrinsic(IID, Args);
4439 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4440 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4441 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4443 if (Name[0] ==
'a' && Name.back() ==
'2')
4444 IID = Intrinsic::x86_addcarry_32;
4445 else if (Name[0] ==
'a' && Name.back() ==
'4')
4446 IID = Intrinsic::x86_addcarry_64;
4447 else if (Name[0] ==
's' && Name.back() ==
'2')
4448 IID = Intrinsic::x86_subborrow_32;
4449 else if (Name[0] ==
's' && Name.back() ==
'4')
4450 IID = Intrinsic::x86_subborrow_64;
4457 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4460 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4463 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4467 }
else if (Name.starts_with(
"avx512.mask.") &&
4477 if (Name.starts_with(
"neon.bfcvt")) {
4478 if (Name.starts_with(
"neon.bfcvtn2")) {
4480 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4482 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4483 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4486 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4487 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4489 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4493 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4494 return Builder.CreateShuffleVector(
4497 return Builder.CreateFPTrunc(CI->
getOperand(0),
4500 }
else if (Name.starts_with(
"sve.fcvt")) {
4503 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4504 .
Case(
"sve.fcvtnt.bf16f32",
4505 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4517 if (Args[1]->
getType() != BadPredTy)
4520 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4521 BadPredTy, Args[1]);
4522 Args[1] = Builder.CreateIntrinsic(
4523 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4525 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4534 if (Name ==
"mve.vctp64.old") {
4537 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4540 Value *C1 = Builder.CreateIntrinsic(
4541 Intrinsic::arm_mve_pred_v2i,
4543 return Builder.CreateIntrinsic(
4544 Intrinsic::arm_mve_pred_i2v,
4546 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4547 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4548 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4549 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4551 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4552 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4553 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4554 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4556 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4557 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4558 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4559 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4560 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4561 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4562 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4563 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4564 std::vector<Type *> Tys;
4568 case Intrinsic::arm_mve_mull_int_predicated:
4569 case Intrinsic::arm_mve_vqdmull_predicated:
4570 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4573 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4574 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4575 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4579 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4583 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4587 case Intrinsic::arm_cde_vcx1q_predicated:
4588 case Intrinsic::arm_cde_vcx1qa_predicated:
4589 case Intrinsic::arm_cde_vcx2q_predicated:
4590 case Intrinsic::arm_cde_vcx2qa_predicated:
4591 case Intrinsic::arm_cde_vcx3q_predicated:
4592 case Intrinsic::arm_cde_vcx3qa_predicated:
4599 std::vector<Value *>
Ops;
4601 Type *Ty =
Op->getType();
4602 if (Ty->getScalarSizeInBits() == 1) {
4603 Value *C1 = Builder.CreateIntrinsic(
4604 Intrinsic::arm_mve_pred_v2i,
4606 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4611 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4625 F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4630 Args.push_back(Builder.getFalse());
4633 F->getParent(), Intrinsic::amdgcn_wmma_i32_16x16x64_iu8,
4634 {CI->getArgOperand(4)->getType(), CI->getArgOperand(1)->getType()});
4639 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4644 NewCall->copyMetadata(*CI);
4665 if (NumOperands < 3)
4678 bool IsVolatile =
false;
4682 if (NumOperands > 3)
4687 if (NumOperands > 5) {
4689 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4703 if (VT->getElementType()->isIntegerTy(16)) {
4706 Val = Builder.CreateBitCast(Val, AsBF16);
4714 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4716 unsigned AddrSpace = PtrTy->getAddressSpace();
4719 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4721 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4726 MDNode *RangeNotPrivate =
4729 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4735 return Builder.CreateBitCast(RMW, RetTy);
4756 return MAV->getMetadata();
4763 return I->getDebugLoc().getAsMDNode();
4771 if (Name ==
"label") {
4774 }
else if (Name ==
"assign") {
4781 }
else if (Name ==
"declare") {
4786 }
else if (Name ==
"addr") {
4796 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4798 }
else if (Name ==
"value") {
4801 unsigned ExprOp = 2;
4815 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4837 assert(Name.starts_with(
"llvm.") &&
"Intrinsic doesn't start with 'llvm.'");
4838 Name = Name.substr(5);
4840 bool IsX86 = Name.consume_front(
"x86.");
4841 bool IsNVVM = Name.consume_front(
"nvvm.");
4842 bool IsAArch64 = Name.consume_front(
"aarch64.");
4843 bool IsARM = Name.consume_front(
"arm.");
4844 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4845 bool IsDbg = Name.consume_front(
"dbg.");
4846 Value *Rep =
nullptr;
4848 if (!IsX86 && Name ==
"stackprotectorcheck") {
4850 }
else if (IsNVVM) {
4854 }
else if (IsAArch64) {
4858 }
else if (IsAMDGCN) {
4872 const auto &DefaultCase = [&]() ->
void {
4880 "Unknown function for CallBase upgrade and isn't just a name change");
4888 "Return type must have changed");
4889 assert(OldST->getNumElements() ==
4891 "Must have same number of elements");
4894 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4897 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4898 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4899 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4918 case Intrinsic::arm_neon_vst1:
4919 case Intrinsic::arm_neon_vst2:
4920 case Intrinsic::arm_neon_vst3:
4921 case Intrinsic::arm_neon_vst4:
4922 case Intrinsic::arm_neon_vst2lane:
4923 case Intrinsic::arm_neon_vst3lane:
4924 case Intrinsic::arm_neon_vst4lane: {
4926 NewCall = Builder.CreateCall(NewFn, Args);
4929 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4930 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4931 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4936 NewCall = Builder.CreateCall(NewFn, Args);
4939 case Intrinsic::aarch64_sve_ld3_sret:
4940 case Intrinsic::aarch64_sve_ld4_sret:
4941 case Intrinsic::aarch64_sve_ld2_sret: {
4943 Name = Name.substr(5);
4950 unsigned MinElts = RetTy->getMinNumElements() /
N;
4952 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4954 for (
unsigned I = 0;
I <
N;
I++) {
4955 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
4956 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
4962 case Intrinsic::coro_end: {
4965 NewCall = Builder.CreateCall(NewFn, Args);
4969 case Intrinsic::vector_extract: {
4971 Name = Name.substr(5);
4972 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
4977 unsigned MinElts = RetTy->getMinNumElements();
4980 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
4984 case Intrinsic::vector_insert: {
4986 Name = Name.substr(5);
4987 if (!Name.starts_with(
"aarch64.sve.tuple")) {
4991 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
4996 NewCall = Builder.CreateCall(
5000 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5006 assert(
N > 1 &&
"Create is expected to be between 2-4");
5009 unsigned MinElts = RetTy->getMinNumElements() /
N;
5010 for (
unsigned I = 0;
I <
N;
I++) {
5012 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5019 case Intrinsic::arm_neon_bfdot:
5020 case Intrinsic::arm_neon_bfmmla:
5021 case Intrinsic::arm_neon_bfmlalb:
5022 case Intrinsic::arm_neon_bfmlalt:
5023 case Intrinsic::aarch64_neon_bfdot:
5024 case Intrinsic::aarch64_neon_bfmmla:
5025 case Intrinsic::aarch64_neon_bfmlalb:
5026 case Intrinsic::aarch64_neon_bfmlalt: {
5029 "Mismatch between function args and call args");
5030 size_t OperandWidth =
5032 assert((OperandWidth == 64 || OperandWidth == 128) &&
5033 "Unexpected operand width");
5035 auto Iter = CI->
args().begin();
5036 Args.push_back(*Iter++);
5037 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5038 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5039 NewCall = Builder.CreateCall(NewFn, Args);
5043 case Intrinsic::bitreverse:
5044 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5047 case Intrinsic::ctlz:
5048 case Intrinsic::cttz: {
5055 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5059 case Intrinsic::objectsize: {
5060 Value *NullIsUnknownSize =
5064 NewCall = Builder.CreateCall(
5069 case Intrinsic::ctpop:
5070 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5073 case Intrinsic::convert_from_fp16:
5074 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5077 case Intrinsic::dbg_value: {
5079 Name = Name.substr(5);
5081 if (Name.starts_with(
"dbg.addr")) {
5095 if (
Offset->isZeroValue()) {
5096 NewCall = Builder.CreateCall(
5105 case Intrinsic::ptr_annotation:
5113 NewCall = Builder.CreateCall(
5122 case Intrinsic::var_annotation:
5129 NewCall = Builder.CreateCall(
5138 case Intrinsic::riscv_aes32dsi:
5139 case Intrinsic::riscv_aes32dsmi:
5140 case Intrinsic::riscv_aes32esi:
5141 case Intrinsic::riscv_aes32esmi:
5142 case Intrinsic::riscv_sm4ks:
5143 case Intrinsic::riscv_sm4ed: {
5153 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5154 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5160 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5161 Value *Res = NewCall;
5163 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5169 case Intrinsic::nvvm_mapa_shared_cluster: {
5173 Value *Res = NewCall;
5174 Res = Builder.CreateAddrSpaceCast(
5181 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5182 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5185 Args[0] = Builder.CreateAddrSpaceCast(
5188 NewCall = Builder.CreateCall(NewFn, Args);
5194 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5195 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5196 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5197 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5198 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5199 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5200 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5201 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5208 Args[0] = Builder.CreateAddrSpaceCast(
5217 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5219 NewCall = Builder.CreateCall(NewFn, Args);
5225 case Intrinsic::riscv_sha256sig0:
5226 case Intrinsic::riscv_sha256sig1:
5227 case Intrinsic::riscv_sha256sum0:
5228 case Intrinsic::riscv_sha256sum1:
5229 case Intrinsic::riscv_sm3p0:
5230 case Intrinsic::riscv_sm3p1: {
5237 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5239 NewCall = Builder.CreateCall(NewFn, Arg);
5241 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5248 case Intrinsic::x86_xop_vfrcz_ss:
5249 case Intrinsic::x86_xop_vfrcz_sd:
5250 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5253 case Intrinsic::x86_xop_vpermil2pd:
5254 case Intrinsic::x86_xop_vpermil2ps:
5255 case Intrinsic::x86_xop_vpermil2pd_256:
5256 case Intrinsic::x86_xop_vpermil2ps_256: {
5260 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5261 NewCall = Builder.CreateCall(NewFn, Args);
5265 case Intrinsic::x86_sse41_ptestc:
5266 case Intrinsic::x86_sse41_ptestz:
5267 case Intrinsic::x86_sse41_ptestnzc: {
5281 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5282 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5284 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5288 case Intrinsic::x86_rdtscp: {
5294 NewCall = Builder.CreateCall(NewFn);
5296 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5299 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5307 case Intrinsic::x86_sse41_insertps:
5308 case Intrinsic::x86_sse41_dppd:
5309 case Intrinsic::x86_sse41_dpps:
5310 case Intrinsic::x86_sse41_mpsadbw:
5311 case Intrinsic::x86_avx_dp_ps_256:
5312 case Intrinsic::x86_avx2_mpsadbw: {
5318 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5319 NewCall = Builder.CreateCall(NewFn, Args);
5323 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5324 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5325 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5326 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5327 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5328 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5334 NewCall = Builder.CreateCall(NewFn, Args);
5343 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5344 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5345 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5346 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5347 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5348 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5352 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5353 Args[1] = Builder.CreateBitCast(
5356 NewCall = Builder.CreateCall(NewFn, Args);
5357 Value *Res = Builder.CreateBitCast(
5365 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5366 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5367 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5371 Args[1] = Builder.CreateBitCast(
5373 Args[2] = Builder.CreateBitCast(
5376 NewCall = Builder.CreateCall(NewFn, Args);
5380 case Intrinsic::thread_pointer: {
5381 NewCall = Builder.CreateCall(NewFn, {});
5385 case Intrinsic::memcpy:
5386 case Intrinsic::memmove:
5387 case Intrinsic::memset: {
5403 NewCall = Builder.CreateCall(NewFn, Args);
5405 AttributeList NewAttrs = AttributeList::get(
5406 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5407 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5408 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5413 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5416 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5420 case Intrinsic::masked_load:
5421 case Intrinsic::masked_gather:
5422 case Intrinsic::masked_store:
5423 case Intrinsic::masked_scatter: {
5429 auto GetMaybeAlign = [](
Value *
Op) {
5439 auto GetAlign = [&](
Value *
Op) {
5448 case Intrinsic::masked_load:
5449 NewCall = Builder.CreateMaskedLoad(
5453 case Intrinsic::masked_gather:
5454 NewCall = Builder.CreateMaskedGather(
5460 case Intrinsic::masked_store:
5461 NewCall = Builder.CreateMaskedStore(
5465 case Intrinsic::masked_scatter:
5466 NewCall = Builder.CreateMaskedScatter(
5468 DL.getValueOrABITypeAlignment(
5482 case Intrinsic::lifetime_start:
5483 case Intrinsic::lifetime_end: {
5495 NewCall = Builder.CreateLifetimeStart(Ptr);
5497 NewCall = Builder.CreateLifetimeEnd(Ptr);
5506 case Intrinsic::x86_avx512_vpdpbusd_128:
5507 case Intrinsic::x86_avx512_vpdpbusd_256:
5508 case Intrinsic::x86_avx512_vpdpbusd_512:
5509 case Intrinsic::x86_avx512_vpdpbusds_128:
5510 case Intrinsic::x86_avx512_vpdpbusds_256:
5511 case Intrinsic::x86_avx512_vpdpbusds_512:
5512 case Intrinsic::x86_avx2_vpdpbssd_128:
5513 case Intrinsic::x86_avx2_vpdpbssd_256:
5514 case Intrinsic::x86_avx10_vpdpbssd_512:
5515 case Intrinsic::x86_avx2_vpdpbssds_128:
5516 case Intrinsic::x86_avx2_vpdpbssds_256:
5517 case Intrinsic::x86_avx10_vpdpbssds_512:
5518 case Intrinsic::x86_avx2_vpdpbsud_128:
5519 case Intrinsic::x86_avx2_vpdpbsud_256:
5520 case Intrinsic::x86_avx10_vpdpbsud_512:
5521 case Intrinsic::x86_avx2_vpdpbsuds_128:
5522 case Intrinsic::x86_avx2_vpdpbsuds_256:
5523 case Intrinsic::x86_avx10_vpdpbsuds_512:
5524 case Intrinsic::x86_avx2_vpdpbuud_128:
5525 case Intrinsic::x86_avx2_vpdpbuud_256:
5526 case Intrinsic::x86_avx10_vpdpbuud_512:
5527 case Intrinsic::x86_avx2_vpdpbuuds_128:
5528 case Intrinsic::x86_avx2_vpdpbuuds_256:
5529 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5534 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5535 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5537 NewCall = Builder.CreateCall(NewFn, Args);
5540 case Intrinsic::x86_avx512_vpdpwssd_128:
5541 case Intrinsic::x86_avx512_vpdpwssd_256:
5542 case Intrinsic::x86_avx512_vpdpwssd_512:
5543 case Intrinsic::x86_avx512_vpdpwssds_128:
5544 case Intrinsic::x86_avx512_vpdpwssds_256:
5545 case Intrinsic::x86_avx512_vpdpwssds_512:
5546 case Intrinsic::x86_avx2_vpdpwsud_128:
5547 case Intrinsic::x86_avx2_vpdpwsud_256:
5548 case Intrinsic::x86_avx10_vpdpwsud_512:
5549 case Intrinsic::x86_avx2_vpdpwsuds_128:
5550 case Intrinsic::x86_avx2_vpdpwsuds_256:
5551 case Intrinsic::x86_avx10_vpdpwsuds_512:
5552 case Intrinsic::x86_avx2_vpdpwusd_128:
5553 case Intrinsic::x86_avx2_vpdpwusd_256:
5554 case Intrinsic::x86_avx10_vpdpwusd_512:
5555 case Intrinsic::x86_avx2_vpdpwusds_128:
5556 case Intrinsic::x86_avx2_vpdpwusds_256:
5557 case Intrinsic::x86_avx10_vpdpwusds_512:
5558 case Intrinsic::x86_avx2_vpdpwuud_128:
5559 case Intrinsic::x86_avx2_vpdpwuud_256:
5560 case Intrinsic::x86_avx10_vpdpwuud_512:
5561 case Intrinsic::x86_avx2_vpdpwuuds_128:
5562 case Intrinsic::x86_avx2_vpdpwuuds_256:
5563 case Intrinsic::x86_avx10_vpdpwuuds_512:
5568 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5569 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5571 NewCall = Builder.CreateCall(NewFn, Args);
5574 assert(NewCall &&
"Should have either set this variable or returned through "
5575 "the default case");
5582 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5596 F->eraseFromParent();
5602 if (NumOperands == 0)
5610 if (NumOperands == 3) {
5614 Metadata *Elts2[] = {ScalarType, ScalarType,
5628 if (
Opc != Instruction::BitCast)
5632 Type *SrcTy = V->getType();
5649 if (
Opc != Instruction::BitCast)
5652 Type *SrcTy =
C->getType();
5679 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5680 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5681 if (Flag->getNumOperands() < 3)
5683 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5684 return K->getString() ==
"Debug Info Version";
5687 if (OpIt != ModFlags->op_end()) {
5688 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5695 bool BrokenDebugInfo =
false;
5698 if (!BrokenDebugInfo)
5704 M.getContext().diagnose(Diag);
5711 M.getContext().diagnose(DiagVersion);
5721 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5724 if (
F->hasFnAttribute(Attr)) {
5727 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5729 auto [Part, Rest] = S.
split(
',');
5735 const unsigned Dim = DimC -
'x';
5736 assert(Dim < 3 &&
"Unexpected dim char");
5746 F->addFnAttr(Attr, NewAttr);
5750 return S ==
"x" || S ==
"y" || S ==
"z";
5755 if (K ==
"kernel") {
5767 const unsigned Idx = (AlignIdxValuePair >> 16);
5768 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5773 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5778 if (K ==
"minctasm") {
5783 if (K ==
"maxnreg") {
5788 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5792 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5796 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5800 if (K ==
"grid_constant") {
5815 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5822 if (!SeenNodes.
insert(MD).second)
5829 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5836 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5838 const MDOperand &V = MD->getOperand(j + 1);
5841 NewOperands.
append({K, V});
5844 if (NewOperands.
size() > 1)
5857 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5858 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5859 if (ModRetainReleaseMarker) {
5865 ID->getString().split(ValueComp,
"#");
5866 if (ValueComp.
size() == 2) {
5867 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5871 M.eraseNamedMetadata(ModRetainReleaseMarker);
5882 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5908 bool InvalidCast =
false;
5910 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
5923 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
5925 Args.push_back(Arg);
5932 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5937 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
5950 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5958 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5959 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5960 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5961 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5962 {
"objc_autoreleaseReturnValue",
5963 llvm::Intrinsic::objc_autoreleaseReturnValue},
5964 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5965 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5966 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5967 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5968 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5969 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5970 {
"objc_release", llvm::Intrinsic::objc_release},
5971 {
"objc_retain", llvm::Intrinsic::objc_retain},
5972 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5973 {
"objc_retainAutoreleaseReturnValue",
5974 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5975 {
"objc_retainAutoreleasedReturnValue",
5976 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5977 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5978 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5979 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5980 {
"objc_unsafeClaimAutoreleasedReturnValue",
5981 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5982 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5983 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5984 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5985 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5986 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5987 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5988 {
"objc_arc_annotation_topdown_bbstart",
5989 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5990 {
"objc_arc_annotation_topdown_bbend",
5991 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5992 {
"objc_arc_annotation_bottomup_bbstart",
5993 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5994 {
"objc_arc_annotation_bottomup_bbend",
5995 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5997 for (
auto &
I : RuntimeFuncs)
5998 UpgradeToIntrinsic(
I.first,
I.second);
6002 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6006 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6007 bool HasSwiftVersionFlag =
false;
6008 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6015 if (
Op->getNumOperands() != 3)
6029 if (
ID->getString() ==
"Objective-C Image Info Version")
6031 if (
ID->getString() ==
"Objective-C Class Properties")
6032 HasClassProperties =
true;
6034 if (
ID->getString() ==
"PIC Level") {
6035 if (
auto *Behavior =
6037 uint64_t V = Behavior->getLimitedValue();
6043 if (
ID->getString() ==
"PIE Level")
6044 if (
auto *Behavior =
6051 if (
ID->getString() ==
"branch-target-enforcement" ||
6052 ID->getString().starts_with(
"sign-return-address")) {
6053 if (
auto *Behavior =
6059 Op->getOperand(1),
Op->getOperand(2)};
6069 if (
ID->getString() ==
"Objective-C Image Info Section") {
6072 Value->getString().split(ValueComp,
" ");
6073 if (ValueComp.
size() != 1) {
6074 std::string NewValue;
6075 for (
auto &S : ValueComp)
6076 NewValue += S.str();
6087 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6090 assert(Md->getValue() &&
"Expected non-empty metadata");
6091 auto Type = Md->getValue()->getType();
6094 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6095 if ((Val & 0xff) != Val) {
6096 HasSwiftVersionFlag =
true;
6097 SwiftABIVersion = (Val & 0xff00) >> 8;
6098 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6099 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6110 if (
ID->getString() ==
"amdgpu_code_object_version") {
6113 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6125 if (HasObjCFlag && !HasClassProperties) {
6131 if (HasSwiftVersionFlag) {
6135 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6137 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6145 auto TrimSpaces = [](
StringRef Section) -> std::string {
6147 Section.split(Components,
',');
6152 for (
auto Component : Components)
6153 OS <<
',' << Component.trim();
6158 for (
auto &GV : M.globals()) {
6159 if (!GV.hasSection())
6164 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6169 GV.setSection(TrimSpaces(Section));
6185struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6186 StrictFPUpgradeVisitor() =
default;
6189 if (!
Call.isStrictFP())
6195 Call.removeFnAttr(Attribute::StrictFP);
6196 Call.addFnAttr(Attribute::NoBuiltin);
6201struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6202 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6203 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6205 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6220 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6221 StrictFPUpgradeVisitor SFPV;
6226 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6227 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6228 for (
auto &Arg :
F.args())
6230 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6234 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6235 A.isValid() &&
A.isStringAttribute()) {
6236 F.setSection(
A.getValueAsString());
6237 F.removeFnAttr(
"implicit-section-name");
6244 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6247 if (
A.getValueAsBool()) {
6248 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6254 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
6262 if (!
F.hasFnAttribute(FnAttrName))
6263 F.addFnAttr(FnAttrName,
Value);
6270 if (!
F.hasFnAttribute(FnAttrName)) {
6272 F.addFnAttr(FnAttrName);
6274 auto A =
F.getFnAttribute(FnAttrName);
6275 if (
"false" ==
A.getValueAsString())
6276 F.removeFnAttr(FnAttrName);
6277 else if (
"true" ==
A.getValueAsString()) {
6278 F.removeFnAttr(FnAttrName);
6279 F.addFnAttr(FnAttrName);
6285 Triple T(M.getTargetTriple());
6286 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6296 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6300 if (
Op->getNumOperands() != 3)
6309 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6310 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6311 : IDStr ==
"guarded-control-stack" ? &GCSValue
6312 : IDStr ==
"sign-return-address" ? &SRAValue
6313 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6314 : IDStr ==
"sign-return-address-with-bkey"
6320 *ValPtr = CI->getZExtValue();
6326 bool BTE = BTEValue == 1;
6327 bool BPPLR = BPPLRValue == 1;
6328 bool GCS = GCSValue == 1;
6329 bool SRA = SRAValue == 1;
6332 if (SRA && SRAALLValue == 1)
6333 SignTypeValue =
"all";
6336 if (SRA && SRABKeyValue == 1)
6337 SignKeyValue =
"b_key";
6339 for (
Function &
F : M.getFunctionList()) {
6340 if (
F.isDeclaration())
6347 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6348 A.isValid() &&
"none" ==
A.getValueAsString()) {
6349 F.removeFnAttr(
"sign-return-address");
6350 F.removeFnAttr(
"sign-return-address-key");
6366 if (SRAALLValue == 1)
6368 if (SRABKeyValue == 1)
6377 if (
T->getNumOperands() < 1)
6382 return S->getString().starts_with(
"llvm.vectorizer.");
6386 StringRef OldPrefix =
"llvm.vectorizer.";
6389 if (OldTag ==
"llvm.vectorizer.unroll")
6401 if (
T->getNumOperands() < 1)
6406 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6411 Ops.reserve(
T->getNumOperands());
6413 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6414 Ops.push_back(
T->getOperand(
I));
6428 Ops.reserve(
T->getNumOperands());
6439 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6440 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6441 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6444 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6446 auto I =
DL.find(
"-n64-");
6448 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6453 std::string Res =
DL.str();
6456 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6457 Res.append(Res.empty() ?
"G1" :
"-G1");
6465 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6466 Res.append(
"-ni:7:8:9");
6468 if (
DL.ends_with(
"ni:7"))
6470 if (
DL.ends_with(
"ni:7:8"))
6475 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6476 Res.append(
"-p7:160:256:256:32");
6477 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6478 Res.append(
"-p8:128:128:128:48");
6479 constexpr StringRef OldP8(
"-p8:128:128-");
6480 if (
DL.contains(OldP8))
6481 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6482 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6483 Res.append(
"-p9:192:256:256:32");
6487 if (!
DL.contains(
"m:e"))
6488 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6493 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6496 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6497 if (!
DL.contains(AddrSpaces)) {
6499 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6500 if (R.match(Res, &
Groups))
6506 if (
T.isAArch64()) {
6508 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6509 Res.append(
"-Fn32");
6510 AddPtr32Ptr64AddrSpaces();
6514 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6518 std::string I64 =
"-i64:64";
6519 std::string I128 =
"-i128:128";
6521 size_t Pos = Res.find(I64);
6522 if (Pos !=
size_t(-1))
6523 Res.insert(Pos + I64.size(), I128);
6527 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6528 size_t Pos = Res.find(
"-S128");
6531 Res.insert(Pos,
"-f64:32:64");
6537 AddPtr32Ptr64AddrSpaces();
6545 if (!
T.isOSIAMCU()) {
6546 std::string I128 =
"-i128:128";
6549 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6550 if (R.match(Res, &
Groups))
6558 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6560 auto I =
Ref.find(
"-f80:32-");
6562 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6570 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6573 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6574 B.removeAttribute(
"no-frame-pointer-elim");
6576 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6578 if (FramePointer !=
"all")
6579 FramePointer =
"non-leaf";
6580 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6582 if (!FramePointer.
empty())
6583 B.addAttribute(
"frame-pointer", FramePointer);
6585 A =
B.getAttribute(
"null-pointer-is-valid");
6588 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6589 B.removeAttribute(
"null-pointer-is-valid");
6590 if (NullPointerIsValid)
6591 B.addAttribute(Attribute::NullPointerIsValid);
6601 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.