35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
64 cl::desc(
"Disable autoupgrade of debug info"));
83 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
98 Type *LastArgType =
F->getFunctionType()->getParamType(
99 F->getFunctionType()->getNumParams() - 1);
114 if (
F->getReturnType()->isVectorTy())
127 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
128 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
145 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
146 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
160 if (
F->getReturnType()->getScalarType()->isBFloatTy())
170 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
184 if (Name.consume_front(
"avx."))
185 return (Name.starts_with(
"blend.p") ||
186 Name ==
"cvt.ps2.pd.256" ||
187 Name ==
"cvtdq2.pd.256" ||
188 Name ==
"cvtdq2.ps.256" ||
189 Name.starts_with(
"movnt.") ||
190 Name.starts_with(
"sqrt.p") ||
191 Name.starts_with(
"storeu.") ||
192 Name.starts_with(
"vbroadcast.s") ||
193 Name.starts_with(
"vbroadcastf128") ||
194 Name.starts_with(
"vextractf128.") ||
195 Name.starts_with(
"vinsertf128.") ||
196 Name.starts_with(
"vperm2f128.") ||
197 Name.starts_with(
"vpermil."));
199 if (Name.consume_front(
"avx2."))
200 return (Name ==
"movntdqa" ||
201 Name.starts_with(
"pabs.") ||
202 Name.starts_with(
"padds.") ||
203 Name.starts_with(
"paddus.") ||
204 Name.starts_with(
"pblendd.") ||
206 Name.starts_with(
"pbroadcast") ||
207 Name.starts_with(
"pcmpeq.") ||
208 Name.starts_with(
"pcmpgt.") ||
209 Name.starts_with(
"pmax") ||
210 Name.starts_with(
"pmin") ||
211 Name.starts_with(
"pmovsx") ||
212 Name.starts_with(
"pmovzx") ||
214 Name ==
"pmulu.dq" ||
215 Name.starts_with(
"psll.dq") ||
216 Name.starts_with(
"psrl.dq") ||
217 Name.starts_with(
"psubs.") ||
218 Name.starts_with(
"psubus.") ||
219 Name.starts_with(
"vbroadcast") ||
220 Name ==
"vbroadcasti128" ||
221 Name ==
"vextracti128" ||
222 Name ==
"vinserti128" ||
223 Name ==
"vperm2i128");
225 if (Name.consume_front(
"avx512.")) {
226 if (Name.consume_front(
"mask."))
228 return (Name.starts_with(
"add.p") ||
229 Name.starts_with(
"and.") ||
230 Name.starts_with(
"andn.") ||
231 Name.starts_with(
"broadcast.s") ||
232 Name.starts_with(
"broadcastf32x4.") ||
233 Name.starts_with(
"broadcastf32x8.") ||
234 Name.starts_with(
"broadcastf64x2.") ||
235 Name.starts_with(
"broadcastf64x4.") ||
236 Name.starts_with(
"broadcasti32x4.") ||
237 Name.starts_with(
"broadcasti32x8.") ||
238 Name.starts_with(
"broadcasti64x2.") ||
239 Name.starts_with(
"broadcasti64x4.") ||
240 Name.starts_with(
"cmp.b") ||
241 Name.starts_with(
"cmp.d") ||
242 Name.starts_with(
"cmp.q") ||
243 Name.starts_with(
"cmp.w") ||
244 Name.starts_with(
"compress.b") ||
245 Name.starts_with(
"compress.d") ||
246 Name.starts_with(
"compress.p") ||
247 Name.starts_with(
"compress.q") ||
248 Name.starts_with(
"compress.store.") ||
249 Name.starts_with(
"compress.w") ||
250 Name.starts_with(
"conflict.") ||
251 Name.starts_with(
"cvtdq2pd.") ||
252 Name.starts_with(
"cvtdq2ps.") ||
253 Name ==
"cvtpd2dq.256" ||
254 Name ==
"cvtpd2ps.256" ||
255 Name ==
"cvtps2pd.128" ||
256 Name ==
"cvtps2pd.256" ||
257 Name.starts_with(
"cvtqq2pd.") ||
258 Name ==
"cvtqq2ps.256" ||
259 Name ==
"cvtqq2ps.512" ||
260 Name ==
"cvttpd2dq.256" ||
261 Name ==
"cvttps2dq.128" ||
262 Name ==
"cvttps2dq.256" ||
263 Name.starts_with(
"cvtudq2pd.") ||
264 Name.starts_with(
"cvtudq2ps.") ||
265 Name.starts_with(
"cvtuqq2pd.") ||
266 Name ==
"cvtuqq2ps.256" ||
267 Name ==
"cvtuqq2ps.512" ||
268 Name.starts_with(
"dbpsadbw.") ||
269 Name.starts_with(
"div.p") ||
270 Name.starts_with(
"expand.b") ||
271 Name.starts_with(
"expand.d") ||
272 Name.starts_with(
"expand.load.") ||
273 Name.starts_with(
"expand.p") ||
274 Name.starts_with(
"expand.q") ||
275 Name.starts_with(
"expand.w") ||
276 Name.starts_with(
"fpclass.p") ||
277 Name.starts_with(
"insert") ||
278 Name.starts_with(
"load.") ||
279 Name.starts_with(
"loadu.") ||
280 Name.starts_with(
"lzcnt.") ||
281 Name.starts_with(
"max.p") ||
282 Name.starts_with(
"min.p") ||
283 Name.starts_with(
"movddup") ||
284 Name.starts_with(
"move.s") ||
285 Name.starts_with(
"movshdup") ||
286 Name.starts_with(
"movsldup") ||
287 Name.starts_with(
"mul.p") ||
288 Name.starts_with(
"or.") ||
289 Name.starts_with(
"pabs.") ||
290 Name.starts_with(
"packssdw.") ||
291 Name.starts_with(
"packsswb.") ||
292 Name.starts_with(
"packusdw.") ||
293 Name.starts_with(
"packuswb.") ||
294 Name.starts_with(
"padd.") ||
295 Name.starts_with(
"padds.") ||
296 Name.starts_with(
"paddus.") ||
297 Name.starts_with(
"palignr.") ||
298 Name.starts_with(
"pand.") ||
299 Name.starts_with(
"pandn.") ||
300 Name.starts_with(
"pavg") ||
301 Name.starts_with(
"pbroadcast") ||
302 Name.starts_with(
"pcmpeq.") ||
303 Name.starts_with(
"pcmpgt.") ||
304 Name.starts_with(
"perm.df.") ||
305 Name.starts_with(
"perm.di.") ||
306 Name.starts_with(
"permvar.") ||
307 Name.starts_with(
"pmaddubs.w.") ||
308 Name.starts_with(
"pmaddw.d.") ||
309 Name.starts_with(
"pmax") ||
310 Name.starts_with(
"pmin") ||
311 Name ==
"pmov.qd.256" ||
312 Name ==
"pmov.qd.512" ||
313 Name ==
"pmov.wb.256" ||
314 Name ==
"pmov.wb.512" ||
315 Name.starts_with(
"pmovsx") ||
316 Name.starts_with(
"pmovzx") ||
317 Name.starts_with(
"pmul.dq.") ||
318 Name.starts_with(
"pmul.hr.sw.") ||
319 Name.starts_with(
"pmulh.w.") ||
320 Name.starts_with(
"pmulhu.w.") ||
321 Name.starts_with(
"pmull.") ||
322 Name.starts_with(
"pmultishift.qb.") ||
323 Name.starts_with(
"pmulu.dq.") ||
324 Name.starts_with(
"por.") ||
325 Name.starts_with(
"prol.") ||
326 Name.starts_with(
"prolv.") ||
327 Name.starts_with(
"pror.") ||
328 Name.starts_with(
"prorv.") ||
329 Name.starts_with(
"pshuf.b.") ||
330 Name.starts_with(
"pshuf.d.") ||
331 Name.starts_with(
"pshufh.w.") ||
332 Name.starts_with(
"pshufl.w.") ||
333 Name.starts_with(
"psll.d") ||
334 Name.starts_with(
"psll.q") ||
335 Name.starts_with(
"psll.w") ||
336 Name.starts_with(
"pslli") ||
337 Name.starts_with(
"psllv") ||
338 Name.starts_with(
"psra.d") ||
339 Name.starts_with(
"psra.q") ||
340 Name.starts_with(
"psra.w") ||
341 Name.starts_with(
"psrai") ||
342 Name.starts_with(
"psrav") ||
343 Name.starts_with(
"psrl.d") ||
344 Name.starts_with(
"psrl.q") ||
345 Name.starts_with(
"psrl.w") ||
346 Name.starts_with(
"psrli") ||
347 Name.starts_with(
"psrlv") ||
348 Name.starts_with(
"psub.") ||
349 Name.starts_with(
"psubs.") ||
350 Name.starts_with(
"psubus.") ||
351 Name.starts_with(
"pternlog.") ||
352 Name.starts_with(
"punpckh") ||
353 Name.starts_with(
"punpckl") ||
354 Name.starts_with(
"pxor.") ||
355 Name.starts_with(
"shuf.f") ||
356 Name.starts_with(
"shuf.i") ||
357 Name.starts_with(
"shuf.p") ||
358 Name.starts_with(
"sqrt.p") ||
359 Name.starts_with(
"store.b.") ||
360 Name.starts_with(
"store.d.") ||
361 Name.starts_with(
"store.p") ||
362 Name.starts_with(
"store.q.") ||
363 Name.starts_with(
"store.w.") ||
364 Name ==
"store.ss" ||
365 Name.starts_with(
"storeu.") ||
366 Name.starts_with(
"sub.p") ||
367 Name.starts_with(
"ucmp.") ||
368 Name.starts_with(
"unpckh.") ||
369 Name.starts_with(
"unpckl.") ||
370 Name.starts_with(
"valign.") ||
371 Name ==
"vcvtph2ps.128" ||
372 Name ==
"vcvtph2ps.256" ||
373 Name.starts_with(
"vextract") ||
374 Name.starts_with(
"vfmadd.") ||
375 Name.starts_with(
"vfmaddsub.") ||
376 Name.starts_with(
"vfnmadd.") ||
377 Name.starts_with(
"vfnmsub.") ||
378 Name.starts_with(
"vpdpbusd.") ||
379 Name.starts_with(
"vpdpbusds.") ||
380 Name.starts_with(
"vpdpwssd.") ||
381 Name.starts_with(
"vpdpwssds.") ||
382 Name.starts_with(
"vpermi2var.") ||
383 Name.starts_with(
"vpermil.p") ||
384 Name.starts_with(
"vpermilvar.") ||
385 Name.starts_with(
"vpermt2var.") ||
386 Name.starts_with(
"vpmadd52") ||
387 Name.starts_with(
"vpshld.") ||
388 Name.starts_with(
"vpshldv.") ||
389 Name.starts_with(
"vpshrd.") ||
390 Name.starts_with(
"vpshrdv.") ||
391 Name.starts_with(
"vpshufbitqmb.") ||
392 Name.starts_with(
"xor."));
394 if (Name.consume_front(
"mask3."))
396 return (Name.starts_with(
"vfmadd.") ||
397 Name.starts_with(
"vfmaddsub.") ||
398 Name.starts_with(
"vfmsub.") ||
399 Name.starts_with(
"vfmsubadd.") ||
400 Name.starts_with(
"vfnmsub."));
402 if (Name.consume_front(
"maskz."))
404 return (Name.starts_with(
"pternlog.") ||
405 Name.starts_with(
"vfmadd.") ||
406 Name.starts_with(
"vfmaddsub.") ||
407 Name.starts_with(
"vpdpbusd.") ||
408 Name.starts_with(
"vpdpbusds.") ||
409 Name.starts_with(
"vpdpwssd.") ||
410 Name.starts_with(
"vpdpwssds.") ||
411 Name.starts_with(
"vpermt2var.") ||
412 Name.starts_with(
"vpmadd52") ||
413 Name.starts_with(
"vpshldv.") ||
414 Name.starts_with(
"vpshrdv."));
417 return (Name ==
"movntdqa" ||
418 Name ==
"pmul.dq.512" ||
419 Name ==
"pmulu.dq.512" ||
420 Name.starts_with(
"broadcastm") ||
421 Name.starts_with(
"cmp.p") ||
422 Name.starts_with(
"cvtb2mask.") ||
423 Name.starts_with(
"cvtd2mask.") ||
424 Name.starts_with(
"cvtmask2") ||
425 Name.starts_with(
"cvtq2mask.") ||
426 Name ==
"cvtusi2sd" ||
427 Name.starts_with(
"cvtw2mask.") ||
432 Name ==
"kortestc.w" ||
433 Name ==
"kortestz.w" ||
434 Name.starts_with(
"kunpck") ||
437 Name.starts_with(
"padds.") ||
438 Name.starts_with(
"pbroadcast") ||
439 Name.starts_with(
"prol") ||
440 Name.starts_with(
"pror") ||
441 Name.starts_with(
"psll.dq") ||
442 Name.starts_with(
"psrl.dq") ||
443 Name.starts_with(
"psubs.") ||
444 Name.starts_with(
"ptestm") ||
445 Name.starts_with(
"ptestnm") ||
446 Name.starts_with(
"storent.") ||
447 Name.starts_with(
"vbroadcast.s") ||
448 Name.starts_with(
"vpshld.") ||
449 Name.starts_with(
"vpshrd."));
452 if (Name.consume_front(
"fma."))
453 return (Name.starts_with(
"vfmadd.") ||
454 Name.starts_with(
"vfmsub.") ||
455 Name.starts_with(
"vfmsubadd.") ||
456 Name.starts_with(
"vfnmadd.") ||
457 Name.starts_with(
"vfnmsub."));
459 if (Name.consume_front(
"fma4."))
460 return Name.starts_with(
"vfmadd.s");
462 if (Name.consume_front(
"sse."))
463 return (Name ==
"add.ss" ||
464 Name ==
"cvtsi2ss" ||
465 Name ==
"cvtsi642ss" ||
468 Name.starts_with(
"sqrt.p") ||
470 Name.starts_with(
"storeu.") ||
473 if (Name.consume_front(
"sse2."))
474 return (Name ==
"add.sd" ||
475 Name ==
"cvtdq2pd" ||
476 Name ==
"cvtdq2ps" ||
477 Name ==
"cvtps2pd" ||
478 Name ==
"cvtsi2sd" ||
479 Name ==
"cvtsi642sd" ||
480 Name ==
"cvtss2sd" ||
483 Name.starts_with(
"padds.") ||
484 Name.starts_with(
"paddus.") ||
485 Name.starts_with(
"pcmpeq.") ||
486 Name.starts_with(
"pcmpgt.") ||
491 Name ==
"pmulu.dq" ||
492 Name.starts_with(
"pshuf") ||
493 Name.starts_with(
"psll.dq") ||
494 Name.starts_with(
"psrl.dq") ||
495 Name.starts_with(
"psubs.") ||
496 Name.starts_with(
"psubus.") ||
497 Name.starts_with(
"sqrt.p") ||
499 Name ==
"storel.dq" ||
500 Name.starts_with(
"storeu.") ||
503 if (Name.consume_front(
"sse41."))
504 return (Name.starts_with(
"blendp") ||
505 Name ==
"movntdqa" ||
515 Name.starts_with(
"pmovsx") ||
516 Name.starts_with(
"pmovzx") ||
519 if (Name.consume_front(
"sse42."))
520 return Name ==
"crc32.64.8";
522 if (Name.consume_front(
"sse4a."))
523 return Name.starts_with(
"movnt.");
525 if (Name.consume_front(
"ssse3."))
526 return (Name ==
"pabs.b.128" ||
527 Name ==
"pabs.d.128" ||
528 Name ==
"pabs.w.128");
530 if (Name.consume_front(
"xop."))
531 return (Name ==
"vpcmov" ||
532 Name ==
"vpcmov.256" ||
533 Name.starts_with(
"vpcom") ||
534 Name.starts_with(
"vprot"));
536 if (Name.consume_front(
"bmi."))
537 return (Name.starts_with(
"pdep.") ||
538 Name.starts_with(
"pext."));
540 return (Name ==
"addcarry.u32" ||
541 Name ==
"addcarry.u64" ||
542 Name ==
"addcarryx.u32" ||
543 Name ==
"addcarryx.u64" ||
544 Name ==
"subborrow.u32" ||
545 Name ==
"subborrow.u64" ||
546 Name.starts_with(
"vcvtph2ps."));
552 if (!Name.consume_front(
"x86."))
560 if (Name ==
"rdtscp") {
562 if (
F->getFunctionType()->getNumParams() == 0)
567 Intrinsic::x86_rdtscp);
574 if (Name.consume_front(
"sse41.ptest")) {
576 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
577 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
578 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
591 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
592 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
593 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
594 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
595 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
596 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
601 if (Name.consume_front(
"avx512.")) {
602 if (Name.consume_front(
"mask.cmp.")) {
605 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
606 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
607 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
608 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
609 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
610 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
614 }
else if (Name.starts_with(
"vpdpbusd.") ||
615 Name.starts_with(
"vpdpbusds.")) {
618 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
619 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
620 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
621 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
622 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
623 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
627 }
else if (Name.starts_with(
"vpdpwssd.") ||
628 Name.starts_with(
"vpdpwssds.")) {
631 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
632 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
633 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
634 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
635 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
636 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
644 if (Name.consume_front(
"avx2.")) {
645 if (Name.consume_front(
"vpdpb")) {
648 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
649 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
650 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
651 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
652 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
653 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
654 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
655 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
656 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
657 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
658 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
659 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
663 }
else if (Name.consume_front(
"vpdpw")) {
666 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
667 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
668 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
669 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
670 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
671 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
672 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
673 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
674 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
675 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
676 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
677 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
685 if (Name.consume_front(
"avx10.")) {
686 if (Name.consume_front(
"vpdpb")) {
689 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
690 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
691 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
692 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
693 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
694 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
698 }
else if (Name.consume_front(
"vpdpw")) {
700 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
701 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
702 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
703 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
704 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
705 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
713 if (Name.consume_front(
"avx512bf16.")) {
716 .
Case(
"cvtne2ps2bf16.128",
717 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
718 .
Case(
"cvtne2ps2bf16.256",
719 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
720 .
Case(
"cvtne2ps2bf16.512",
721 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
722 .
Case(
"mask.cvtneps2bf16.128",
723 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
724 .
Case(
"cvtneps2bf16.256",
725 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
726 .
Case(
"cvtneps2bf16.512",
727 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
734 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
735 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
736 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
743 if (Name.consume_front(
"xop.")) {
745 if (Name.starts_with(
"vpermil2")) {
748 auto Idx =
F->getFunctionType()->getParamType(2);
749 if (Idx->isFPOrFPVectorTy()) {
750 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
751 unsigned EltSize = Idx->getScalarSizeInBits();
752 if (EltSize == 64 && IdxSize == 128)
753 ID = Intrinsic::x86_xop_vpermil2pd;
754 else if (EltSize == 32 && IdxSize == 128)
755 ID = Intrinsic::x86_xop_vpermil2ps;
756 else if (EltSize == 64 && IdxSize == 256)
757 ID = Intrinsic::x86_xop_vpermil2pd_256;
759 ID = Intrinsic::x86_xop_vpermil2ps_256;
761 }
else if (
F->arg_size() == 2)
764 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
765 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
776 if (Name ==
"seh.recoverfp") {
778 Intrinsic::eh_recoverfp);
790 if (Name.starts_with(
"rbit")) {
793 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
797 if (Name ==
"thread.pointer") {
800 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
804 bool Neon = Name.consume_front(
"neon.");
809 if (Name.consume_front(
"bfdot.")) {
813 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
818 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
819 assert((OperandWidth == 64 || OperandWidth == 128) &&
820 "Unexpected operand width");
822 std::array<Type *, 2> Tys{
833 if (Name.consume_front(
"bfm")) {
835 if (Name.consume_back(
".v4f32.v16i8")) {
881 F->arg_begin()->getType());
885 if (Name.consume_front(
"vst")) {
887 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
891 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
892 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
895 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
896 Intrinsic::arm_neon_vst4lane};
898 auto fArgs =
F->getFunctionType()->params();
899 Type *Tys[] = {fArgs[0], fArgs[1]};
902 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
905 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
914 if (Name.consume_front(
"mve.")) {
916 if (Name ==
"vctp64") {
926 if (Name.starts_with(
"vrintn.v")) {
928 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
933 if (Name.consume_back(
".v4i1")) {
935 if (Name.consume_back(
".predicated.v2i64.v4i32"))
937 return Name ==
"mull.int" || Name ==
"vqdmull";
939 if (Name.consume_back(
".v2i64")) {
941 bool IsGather = Name.consume_front(
"vldr.gather.");
942 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
943 if (Name.consume_front(
"base.")) {
945 Name.consume_front(
"wb.");
948 return Name ==
"predicated.v2i64";
951 if (Name.consume_front(
"offset.predicated."))
952 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
953 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
966 if (Name.consume_front(
"cde.vcx")) {
968 if (Name.consume_back(
".predicated.v2i64.v4i1"))
970 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
971 Name ==
"3q" || Name ==
"3qa";
985 F->arg_begin()->getType());
989 if (Name.starts_with(
"addp")) {
991 if (
F->arg_size() != 2)
994 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
996 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
1002 if (Name.starts_with(
"bfcvt")) {
1008 if (Name ==
"vcvtfp2hf" || Name ==
"vcvthf2fp") {
1015 if (Name.consume_front(
"sve.")) {
1017 if (Name.consume_front(
"bf")) {
1018 if (Name ==
"mmla") {
1019 Type *Tys[] = {
F->getReturnType(),
1020 std::next(
F->arg_begin())->getType()};
1022 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1025 if (Name.consume_back(
".lane")) {
1029 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1030 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1031 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1043 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1048 if (Name.consume_front(
"addqv")) {
1050 if (!
F->getReturnType()->isFPOrFPVectorTy())
1053 auto Args =
F->getFunctionType()->params();
1054 Type *Tys[] = {
F->getReturnType(), Args[1]};
1056 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1060 if (Name.consume_front(
"ld")) {
1062 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1063 if (LdRegex.
match(Name)) {
1069 "Expected 2 arguments for ld* intrinsic.");
1070 Type *PtrTy =
F->getArg(1)->getType();
1073 Intrinsic::aarch64_sve_ld2_sret,
1074 Intrinsic::aarch64_sve_ld3_sret,
1075 Intrinsic::aarch64_sve_ld4_sret,
1078 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1084 if (Name.consume_front(
"tuple.")) {
1086 if (Name.starts_with(
"get")) {
1088 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1090 F->getParent(), Intrinsic::vector_extract, Tys);
1094 if (Name.starts_with(
"set")) {
1096 auto Args =
F->getFunctionType()->params();
1097 Type *Tys[] = {Args[0], Args[2], Args[1]};
1099 F->getParent(), Intrinsic::vector_insert, Tys);
1103 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1104 if (CreateTupleRegex.
match(Name)) {
1106 auto Args =
F->getFunctionType()->params();
1107 Type *Tys[] = {
F->getReturnType(), Args[1]};
1109 F->getParent(), Intrinsic::vector_insert, Tys);
1115 if (Name.starts_with(
"rev.nxv")) {
1118 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1124 if (Name.consume_front(
"sme.")) {
1126 if (Name.consume_front(
"ftmopa.")) {
1131 .
Case(
"za16.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za16)
1132 .
Case(
"za32.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za32)
1149 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1153 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1155 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1157 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1158 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1159 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1160 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1161 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1162 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1171 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1185 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1186 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1196 if (Name.consume_front(
"mapa.shared.cluster"))
1197 if (
F->getReturnType()->getPointerAddressSpace() ==
1199 return Intrinsic::nvvm_mapa_shared_cluster;
1201 if (Name.consume_front(
"cp.async.bulk.")) {
1204 .
Case(
"global.to.shared.cluster",
1205 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1206 .
Case(
"shared.cta.to.cluster",
1207 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1211 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1220 if (Name.consume_front(
"fma.rn."))
1222 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1223 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1224 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1225 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1228 if (Name.consume_front(
"fmax."))
1230 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1231 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1232 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1233 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1234 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1235 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1236 .
Case(
"ftz.nan.xorsign.abs.bf16",
1237 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1238 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1239 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1240 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1241 .
Case(
"ftz.xorsign.abs.bf16x2",
1242 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1243 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1244 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1245 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1246 .
Case(
"nan.xorsign.abs.bf16x2",
1247 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1248 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1249 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1252 if (Name.consume_front(
"fmin."))
1254 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1255 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1256 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1257 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1258 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1259 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1260 .
Case(
"ftz.nan.xorsign.abs.bf16",
1261 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1262 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1263 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1264 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1265 .
Case(
"ftz.xorsign.abs.bf16x2",
1266 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1267 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1268 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1269 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1270 .
Case(
"nan.xorsign.abs.bf16x2",
1271 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1272 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1273 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1276 if (Name.consume_front(
"neg."))
1278 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1279 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1286 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1287 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1288 Name.consume_front(
"param");
1294 if (Name.starts_with(
"to.fp16")) {
1298 FuncTy->getReturnType());
1301 if (Name.starts_with(
"from.fp16")) {
1305 FuncTy->getReturnType());
1312 bool CanUpgradeDebugIntrinsicsToRecords) {
1313 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1318 if (!Name.consume_front(
"llvm.") || Name.empty())
1324 bool IsArm = Name.consume_front(
"arm.");
1325 if (IsArm || Name.consume_front(
"aarch64.")) {
1331 if (Name.consume_front(
"amdgcn.")) {
1332 if (Name ==
"alignbit") {
1335 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1339 if (Name.consume_front(
"atomic.")) {
1340 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1341 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1350 switch (
F->getIntrinsicID()) {
1354 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1355 if (
F->arg_size() == 7) {
1360 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1361 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1362 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1363 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1364 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1365 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1366 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1367 if (
F->arg_size() == 8) {
1374 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1375 Name.consume_front(
"flat.atomic.")) {
1376 if (Name.starts_with(
"fadd") ||
1378 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1379 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1387 if (Name.starts_with(
"ldexp.")) {
1390 F->getParent(), Intrinsic::ldexp,
1391 {F->getReturnType(), F->getArg(1)->getType()});
1400 if (
F->arg_size() == 1) {
1401 if (Name.consume_front(
"convert.")) {
1415 F->arg_begin()->getType());
1420 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1423 Intrinsic::coro_end);
1430 if (Name.consume_front(
"dbg.")) {
1432 if (CanUpgradeDebugIntrinsicsToRecords) {
1433 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1434 Name ==
"declare" || Name ==
"label") {
1443 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1446 Intrinsic::dbg_value);
1453 if (Name.consume_front(
"experimental.vector.")) {
1459 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1460 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1461 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1462 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1463 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1465 Intrinsic::vector_partial_reduce_add)
1468 const auto *FT =
F->getFunctionType();
1470 if (
ID == Intrinsic::vector_extract ||
1471 ID == Intrinsic::vector_interleave2)
1474 if (
ID != Intrinsic::vector_interleave2)
1476 if (
ID == Intrinsic::vector_insert ||
1477 ID == Intrinsic::vector_partial_reduce_add)
1485 if (Name.consume_front(
"reduce.")) {
1487 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1488 if (R.match(Name, &
Groups))
1490 .
Case(
"add", Intrinsic::vector_reduce_add)
1491 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1492 .
Case(
"and", Intrinsic::vector_reduce_and)
1493 .
Case(
"or", Intrinsic::vector_reduce_or)
1494 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1495 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1496 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1497 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1498 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1499 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1500 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1505 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1510 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1511 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1516 auto Args =
F->getFunctionType()->params();
1518 {Args[V2 ? 1 : 0]});
1524 if (Name.consume_front(
"splice"))
1528 if (Name.consume_front(
"experimental.stepvector.")) {
1532 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1537 if (Name.starts_with(
"flt.rounds")) {
1540 Intrinsic::get_rounding);
1545 if (Name.starts_with(
"invariant.group.barrier")) {
1547 auto Args =
F->getFunctionType()->params();
1548 Type* ObjectPtr[1] = {Args[0]};
1551 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1556 bool IsLifetimeStart = Name.consume_front(
"lifetime.start");
1557 bool IsLifetimeEnd = !IsLifetimeStart && Name.consume_front(
"lifetime.end");
1558 if (IsLifetimeStart || IsLifetimeEnd) {
1559 if (
F->arg_size() == 2) {
1560 Intrinsic::ID IID = IsLifetimeStart ? Intrinsic::lifetime_start
1561 : Intrinsic::lifetime_end;
1566 F->getArg(1)->getType());
1568 }
else if (
F->arg_size() == 1 && Name ==
".i64") {
1588 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1589 .StartsWith(
"memmove.", Intrinsic::memmove)
1591 if (
F->arg_size() == 5) {
1595 F->getFunctionType()->params().slice(0, 3);
1601 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1604 const auto *FT =
F->getFunctionType();
1605 Type *ParamTypes[2] = {
1606 FT->getParamType(0),
1610 Intrinsic::memset, ParamTypes);
1616 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1617 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1618 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1619 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1621 if (MaskedID &&
F->arg_size() == 4) {
1623 if (MaskedID == Intrinsic::masked_load ||
1624 MaskedID == Intrinsic::masked_gather) {
1626 F->getParent(), MaskedID,
1627 {F->getReturnType(), F->getArg(0)->getType()});
1631 F->getParent(), MaskedID,
1632 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1638 if (Name.consume_front(
"nvvm.")) {
1640 if (
F->arg_size() == 1) {
1643 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1644 .Case(
"clz.i", Intrinsic::ctlz)
1645 .
Case(
"popc.i", Intrinsic::ctpop)
1649 {F->getReturnType()});
1652 }
else if (
F->arg_size() == 2) {
1655 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1656 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1657 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1658 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1662 {F->getReturnType()});
1668 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1696 bool Expand =
false;
1697 if (Name.consume_front(
"abs."))
1700 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1701 else if (Name.consume_front(
"fabs."))
1703 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1704 else if (Name.consume_front(
"ex2.approx."))
1707 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1708 else if (Name.consume_front(
"atomic.load."))
1717 else if (Name.consume_front(
"atomic."))
1732 else if (Name.consume_front(
"bitcast."))
1735 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1736 else if (Name.consume_front(
"rotate."))
1738 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1739 else if (Name.consume_front(
"ptr.gen.to."))
1742 else if (Name.consume_front(
"ptr."))
1745 else if (Name.consume_front(
"ldg.global."))
1747 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1748 Name.starts_with(
"p."));
1751 .
Case(
"barrier0",
true)
1752 .
Case(
"barrier.n",
true)
1753 .
Case(
"barrier.sync.cnt",
true)
1754 .
Case(
"barrier.sync",
true)
1755 .
Case(
"barrier",
true)
1756 .
Case(
"bar.sync",
true)
1757 .
Case(
"barrier0.popc",
true)
1758 .
Case(
"barrier0.and",
true)
1759 .
Case(
"barrier0.or",
true)
1760 .
Case(
"clz.ll",
true)
1761 .
Case(
"popc.ll",
true)
1763 .
Case(
"swap.lo.hi.b64",
true)
1764 .
Case(
"tanh.approx.f32",
true)
1776 if (Name.starts_with(
"objectsize.")) {
1777 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1778 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1781 Intrinsic::objectsize, Tys);
1788 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1791 F->getParent(), Intrinsic::ptr_annotation,
1792 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1798 if (Name.consume_front(
"riscv.")) {
1801 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1802 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1803 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1804 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1807 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1820 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1821 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1830 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1831 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1832 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1833 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1838 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1847 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1849 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1858 if (Name ==
"stackprotectorcheck") {
1865 if (Name ==
"thread.pointer") {
1867 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1873 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1876 F->getParent(), Intrinsic::var_annotation,
1877 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1880 if (Name.consume_front(
"vector.splice")) {
1881 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1889 if (Name.consume_front(
"wasm.")) {
1892 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1893 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1894 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1899 F->getReturnType());
1903 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1905 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1907 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1926 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1936 std::string
Name =
F->getName().str();
1939 Name,
F->getParent());
1950 if (Result != std::nullopt) {
1963 bool CanUpgradeDebugIntrinsicsToRecords) {
1983 GV->
getName() ==
"llvm.global_dtors")) ||
1998 unsigned N =
Init->getNumOperands();
1999 std::vector<Constant *> NewCtors(
N);
2000 for (
unsigned i = 0; i !=
N; ++i) {
2003 Ctor->getAggregateElement(1),
2017 unsigned NumElts = ResultTy->getNumElements() * 8;
2021 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2031 for (
unsigned l = 0; l != NumElts; l += 16)
2032 for (
unsigned i = 0; i != 16; ++i) {
2033 unsigned Idx = NumElts + i - Shift;
2035 Idx -= NumElts - 16;
2036 Idxs[l + i] = Idx + l;
2039 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
2043 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2051 unsigned NumElts = ResultTy->getNumElements() * 8;
2055 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2065 for (
unsigned l = 0; l != NumElts; l += 16)
2066 for (
unsigned i = 0; i != 16; ++i) {
2067 unsigned Idx = i + Shift;
2069 Idx += NumElts - 16;
2070 Idxs[l + i] = Idx + l;
2073 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2077 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2085 Mask = Builder.CreateBitCast(Mask, MaskTy);
2091 for (
unsigned i = 0; i != NumElts; ++i)
2093 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2104 if (
C->isAllOnesValue())
2109 return Builder.CreateSelect(Mask, Op0, Op1);
2116 if (
C->isAllOnesValue())
2120 Mask->getType()->getIntegerBitWidth());
2121 Mask = Builder.CreateBitCast(Mask, MaskTy);
2122 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2123 return Builder.CreateSelect(Mask, Op0, Op1);
2136 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2137 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2142 ShiftVal &= (NumElts - 1);
2151 if (ShiftVal > 16) {
2159 for (
unsigned l = 0; l < NumElts; l += 16) {
2160 for (
unsigned i = 0; i != 16; ++i) {
2161 unsigned Idx = ShiftVal + i;
2162 if (!IsVALIGN && Idx >= 16)
2163 Idx += NumElts - 16;
2164 Indices[l + i] = Idx + l;
2169 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2175 bool ZeroMask,
bool IndexForm) {
2178 unsigned EltWidth = Ty->getScalarSizeInBits();
2179 bool IsFloat = Ty->isFPOrFPVectorTy();
2181 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2182 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2183 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2184 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2185 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2186 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2187 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2188 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2189 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2190 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2191 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2192 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2193 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2194 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2195 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2196 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2197 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2198 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2199 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2200 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2201 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2202 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2203 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2204 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2205 else if (VecWidth == 128 && EltWidth == 16)
2206 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2207 else if (VecWidth == 256 && EltWidth == 16)
2208 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2209 else if (VecWidth == 512 && EltWidth == 16)
2210 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2211 else if (VecWidth == 128 && EltWidth == 8)
2212 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2213 else if (VecWidth == 256 && EltWidth == 8)
2214 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2215 else if (VecWidth == 512 && EltWidth == 8)
2216 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2227 Value *V = Builder.CreateIntrinsic(IID, Args);
2239 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2250 bool IsRotateRight) {
2260 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2261 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2264 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2265 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2310 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2315 bool IsShiftRight,
bool ZeroMask) {
2329 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2330 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2333 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2334 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2349 const Align Alignment =
2351 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2356 if (
C->isAllOnesValue())
2357 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2362 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2368 const Align Alignment =
2377 if (
C->isAllOnesValue())
2378 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2383 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2389 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2390 {Op0, Builder.getInt1(
false)});
2405 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2406 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2407 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2408 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2409 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2412 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2413 LHS = Builder.CreateAnd(
LHS, Mask);
2414 RHS = Builder.CreateAnd(
RHS, Mask);
2431 if (!
C || !
C->isAllOnesValue())
2432 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2437 for (
unsigned i = 0; i != NumElts; ++i)
2439 for (
unsigned i = NumElts; i != 8; ++i)
2440 Indices[i] = NumElts + i % NumElts;
2441 Vec = Builder.CreateShuffleVector(Vec,
2445 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2449 unsigned CC,
bool Signed) {
2457 }
else if (CC == 7) {
2493 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2494 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2496 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2497 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2506 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2512 Name = Name.substr(12);
2517 if (Name.starts_with(
"max.p")) {
2518 if (VecWidth == 128 && EltWidth == 32)
2519 IID = Intrinsic::x86_sse_max_ps;
2520 else if (VecWidth == 128 && EltWidth == 64)
2521 IID = Intrinsic::x86_sse2_max_pd;
2522 else if (VecWidth == 256 && EltWidth == 32)
2523 IID = Intrinsic::x86_avx_max_ps_256;
2524 else if (VecWidth == 256 && EltWidth == 64)
2525 IID = Intrinsic::x86_avx_max_pd_256;
2528 }
else if (Name.starts_with(
"min.p")) {
2529 if (VecWidth == 128 && EltWidth == 32)
2530 IID = Intrinsic::x86_sse_min_ps;
2531 else if (VecWidth == 128 && EltWidth == 64)
2532 IID = Intrinsic::x86_sse2_min_pd;
2533 else if (VecWidth == 256 && EltWidth == 32)
2534 IID = Intrinsic::x86_avx_min_ps_256;
2535 else if (VecWidth == 256 && EltWidth == 64)
2536 IID = Intrinsic::x86_avx_min_pd_256;
2539 }
else if (Name.starts_with(
"pshuf.b.")) {
2540 if (VecWidth == 128)
2541 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2542 else if (VecWidth == 256)
2543 IID = Intrinsic::x86_avx2_pshuf_b;
2544 else if (VecWidth == 512)
2545 IID = Intrinsic::x86_avx512_pshuf_b_512;
2548 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2549 if (VecWidth == 128)
2550 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2551 else if (VecWidth == 256)
2552 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2553 else if (VecWidth == 512)
2554 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2557 }
else if (Name.starts_with(
"pmulh.w.")) {
2558 if (VecWidth == 128)
2559 IID = Intrinsic::x86_sse2_pmulh_w;
2560 else if (VecWidth == 256)
2561 IID = Intrinsic::x86_avx2_pmulh_w;
2562 else if (VecWidth == 512)
2563 IID = Intrinsic::x86_avx512_pmulh_w_512;
2566 }
else if (Name.starts_with(
"pmulhu.w.")) {
2567 if (VecWidth == 128)
2568 IID = Intrinsic::x86_sse2_pmulhu_w;
2569 else if (VecWidth == 256)
2570 IID = Intrinsic::x86_avx2_pmulhu_w;
2571 else if (VecWidth == 512)
2572 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2575 }
else if (Name.starts_with(
"pmaddw.d.")) {
2576 if (VecWidth == 128)
2577 IID = Intrinsic::x86_sse2_pmadd_wd;
2578 else if (VecWidth == 256)
2579 IID = Intrinsic::x86_avx2_pmadd_wd;
2580 else if (VecWidth == 512)
2581 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2584 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2585 if (VecWidth == 128)
2586 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2587 else if (VecWidth == 256)
2588 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2589 else if (VecWidth == 512)
2590 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2593 }
else if (Name.starts_with(
"packsswb.")) {
2594 if (VecWidth == 128)
2595 IID = Intrinsic::x86_sse2_packsswb_128;
2596 else if (VecWidth == 256)
2597 IID = Intrinsic::x86_avx2_packsswb;
2598 else if (VecWidth == 512)
2599 IID = Intrinsic::x86_avx512_packsswb_512;
2602 }
else if (Name.starts_with(
"packssdw.")) {
2603 if (VecWidth == 128)
2604 IID = Intrinsic::x86_sse2_packssdw_128;
2605 else if (VecWidth == 256)
2606 IID = Intrinsic::x86_avx2_packssdw;
2607 else if (VecWidth == 512)
2608 IID = Intrinsic::x86_avx512_packssdw_512;
2611 }
else if (Name.starts_with(
"packuswb.")) {
2612 if (VecWidth == 128)
2613 IID = Intrinsic::x86_sse2_packuswb_128;
2614 else if (VecWidth == 256)
2615 IID = Intrinsic::x86_avx2_packuswb;
2616 else if (VecWidth == 512)
2617 IID = Intrinsic::x86_avx512_packuswb_512;
2620 }
else if (Name.starts_with(
"packusdw.")) {
2621 if (VecWidth == 128)
2622 IID = Intrinsic::x86_sse41_packusdw;
2623 else if (VecWidth == 256)
2624 IID = Intrinsic::x86_avx2_packusdw;
2625 else if (VecWidth == 512)
2626 IID = Intrinsic::x86_avx512_packusdw_512;
2629 }
else if (Name.starts_with(
"vpermilvar.")) {
2630 if (VecWidth == 128 && EltWidth == 32)
2631 IID = Intrinsic::x86_avx_vpermilvar_ps;
2632 else if (VecWidth == 128 && EltWidth == 64)
2633 IID = Intrinsic::x86_avx_vpermilvar_pd;
2634 else if (VecWidth == 256 && EltWidth == 32)
2635 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2636 else if (VecWidth == 256 && EltWidth == 64)
2637 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2638 else if (VecWidth == 512 && EltWidth == 32)
2639 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2640 else if (VecWidth == 512 && EltWidth == 64)
2641 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2644 }
else if (Name ==
"cvtpd2dq.256") {
2645 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2646 }
else if (Name ==
"cvtpd2ps.256") {
2647 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2648 }
else if (Name ==
"cvttpd2dq.256") {
2649 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2650 }
else if (Name ==
"cvttps2dq.128") {
2651 IID = Intrinsic::x86_sse2_cvttps2dq;
2652 }
else if (Name ==
"cvttps2dq.256") {
2653 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2654 }
else if (Name.starts_with(
"permvar.")) {
2656 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2657 IID = Intrinsic::x86_avx2_permps;
2658 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2659 IID = Intrinsic::x86_avx2_permd;
2660 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2661 IID = Intrinsic::x86_avx512_permvar_df_256;
2662 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2663 IID = Intrinsic::x86_avx512_permvar_di_256;
2664 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2665 IID = Intrinsic::x86_avx512_permvar_sf_512;
2666 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2667 IID = Intrinsic::x86_avx512_permvar_si_512;
2668 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2669 IID = Intrinsic::x86_avx512_permvar_df_512;
2670 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2671 IID = Intrinsic::x86_avx512_permvar_di_512;
2672 else if (VecWidth == 128 && EltWidth == 16)
2673 IID = Intrinsic::x86_avx512_permvar_hi_128;
2674 else if (VecWidth == 256 && EltWidth == 16)
2675 IID = Intrinsic::x86_avx512_permvar_hi_256;
2676 else if (VecWidth == 512 && EltWidth == 16)
2677 IID = Intrinsic::x86_avx512_permvar_hi_512;
2678 else if (VecWidth == 128 && EltWidth == 8)
2679 IID = Intrinsic::x86_avx512_permvar_qi_128;
2680 else if (VecWidth == 256 && EltWidth == 8)
2681 IID = Intrinsic::x86_avx512_permvar_qi_256;
2682 else if (VecWidth == 512 && EltWidth == 8)
2683 IID = Intrinsic::x86_avx512_permvar_qi_512;
2686 }
else if (Name.starts_with(
"dbpsadbw.")) {
2687 if (VecWidth == 128)
2688 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2689 else if (VecWidth == 256)
2690 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2691 else if (VecWidth == 512)
2692 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2695 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2696 if (VecWidth == 128)
2697 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2698 else if (VecWidth == 256)
2699 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2700 else if (VecWidth == 512)
2701 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2704 }
else if (Name.starts_with(
"conflict.")) {
2705 if (Name[9] ==
'd' && VecWidth == 128)
2706 IID = Intrinsic::x86_avx512_conflict_d_128;
2707 else if (Name[9] ==
'd' && VecWidth == 256)
2708 IID = Intrinsic::x86_avx512_conflict_d_256;
2709 else if (Name[9] ==
'd' && VecWidth == 512)
2710 IID = Intrinsic::x86_avx512_conflict_d_512;
2711 else if (Name[9] ==
'q' && VecWidth == 128)
2712 IID = Intrinsic::x86_avx512_conflict_q_128;
2713 else if (Name[9] ==
'q' && VecWidth == 256)
2714 IID = Intrinsic::x86_avx512_conflict_q_256;
2715 else if (Name[9] ==
'q' && VecWidth == 512)
2716 IID = Intrinsic::x86_avx512_conflict_q_512;
2719 }
else if (Name.starts_with(
"pavg.")) {
2720 if (Name[5] ==
'b' && VecWidth == 128)
2721 IID = Intrinsic::x86_sse2_pavg_b;
2722 else if (Name[5] ==
'b' && VecWidth == 256)
2723 IID = Intrinsic::x86_avx2_pavg_b;
2724 else if (Name[5] ==
'b' && VecWidth == 512)
2725 IID = Intrinsic::x86_avx512_pavg_b_512;
2726 else if (Name[5] ==
'w' && VecWidth == 128)
2727 IID = Intrinsic::x86_sse2_pavg_w;
2728 else if (Name[5] ==
'w' && VecWidth == 256)
2729 IID = Intrinsic::x86_avx2_pavg_w;
2730 else if (Name[5] ==
'w' && VecWidth == 512)
2731 IID = Intrinsic::x86_avx512_pavg_w_512;
2740 Rep = Builder.CreateIntrinsic(IID, Args);
2751 if (AsmStr->find(
"mov\tfp") == 0 &&
2752 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2753 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2754 AsmStr->replace(Pos, 1,
";");
2760 Value *Rep =
nullptr;
2762 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2764 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->
getType()},
2765 {Arg, Builder.getTrue()},
2767 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2768 Type *Ty = (Name ==
"abs.bf16")
2772 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2773 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2774 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2775 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2776 : Intrinsic::nvvm_fabs;
2777 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2778 }
else if (Name.consume_front(
"ex2.approx.")) {
2780 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2781 : Intrinsic::nvvm_ex2_approx;
2782 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2783 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2784 Name.starts_with(
"atomic.load.add.f64.p")) {
2787 Rep = Builder.CreateAtomicRMW(
2793 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2794 Name.starts_with(
"atomic.load.dec.32.p")) {
2799 Rep = Builder.CreateAtomicRMW(
2803 }
else if (Name.starts_with(
"atomic.") && Name.contains(
".gen.")) {
2809 Op.contains(
".cta.") ?
"block" :
"");
2810 if (
Op.starts_with(
"cas.")) {
2812 Value *Pair = Builder.CreateAtomicCmpXchg(
2815 Rep = Builder.CreateExtractValue(Pair, 0);
2833 "unexpected nvvm scoped atomic intrinsic");
2834 Rep = Builder.CreateAtomicRMW(BinOp, Ptr, Val,
MaybeAlign(),
2837 }
else if (Name ==
"clz.ll") {
2840 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2841 {Arg, Builder.getFalse()},
2843 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2844 }
else if (Name ==
"popc.ll") {
2848 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2849 Arg,
nullptr,
"ctpop");
2850 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2851 }
else if (Name ==
"h2f") {
2853 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2854 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2855 }
else if (Name.consume_front(
"bitcast.") &&
2856 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2859 }
else if (Name ==
"rotate.b32") {
2862 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2863 {Arg, Arg, ShiftAmt});
2864 }
else if (Name ==
"rotate.b64") {
2868 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2869 {Arg, Arg, ZExtShiftAmt});
2870 }
else if (Name ==
"rotate.right.b64") {
2874 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2875 {Arg, Arg, ZExtShiftAmt});
2876 }
else if (Name ==
"swap.lo.hi.b64") {
2879 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2880 {Arg, Arg, Builder.getInt64(32)});
2881 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2884 Name.starts_with(
".to.gen"))) {
2886 }
else if (Name.consume_front(
"ldg.global")) {
2890 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2893 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2895 }
else if (Name ==
"tanh.approx.f32") {
2899 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2901 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2903 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2904 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2906 }
else if (Name ==
"barrier") {
2907 Rep = Builder.CreateIntrinsic(
2908 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2910 }
else if (Name ==
"barrier.sync") {
2911 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2913 }
else if (Name ==
"barrier.sync.cnt") {
2914 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2916 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2917 Name ==
"barrier0.or") {
2919 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2923 .
Case(
"barrier0.popc",
2924 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2925 .
Case(
"barrier0.and",
2926 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2927 .
Case(
"barrier0.or",
2928 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2929 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2930 Rep = Builder.CreateZExt(Bar, CI->
getType());
2934 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2944 ? Builder.CreateBitCast(Arg, NewType)
2947 Rep = Builder.CreateCall(NewFn, Args);
2948 if (
F->getReturnType()->isIntegerTy())
2949 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2959 Value *Rep =
nullptr;
2961 if (Name.starts_with(
"sse4a.movnt.")) {
2973 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2976 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2977 }
else if (Name.starts_with(
"avx.movnt.") ||
2978 Name.starts_with(
"avx512.storent.")) {
2990 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2991 }
else if (Name ==
"sse2.storel.dq") {
2996 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2997 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2998 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2999 }
else if (Name.starts_with(
"sse.storeu.") ||
3000 Name.starts_with(
"sse2.storeu.") ||
3001 Name.starts_with(
"avx.storeu.")) {
3004 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
3005 }
else if (Name ==
"avx512.mask.store.ss") {
3009 }
else if (Name.starts_with(
"avx512.mask.store")) {
3011 bool Aligned = Name[17] !=
'u';
3014 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
3017 bool CmpEq = Name[9] ==
'e';
3020 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
3021 }
else if (Name.starts_with(
"avx512.broadcastm")) {
3028 Rep = Builder.CreateVectorSplat(NumElts, Rep);
3029 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
3031 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
3032 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
3033 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
3034 }
else if (Name.starts_with(
"avx.sqrt.p") ||
3035 Name.starts_with(
"sse2.sqrt.p") ||
3036 Name.starts_with(
"sse.sqrt.p")) {
3037 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
3038 {CI->getArgOperand(0)});
3039 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
3043 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
3044 : Intrinsic::x86_avx512_sqrt_pd_512;
3047 Rep = Builder.CreateIntrinsic(IID, Args);
3049 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
3050 {CI->getArgOperand(0)});
3054 }
else if (Name.starts_with(
"avx512.ptestm") ||
3055 Name.starts_with(
"avx512.ptestnm")) {
3059 Rep = Builder.CreateAnd(Op0, Op1);
3065 Rep = Builder.CreateICmp(Pred, Rep, Zero);
3067 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
3070 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
3073 }
else if (Name.starts_with(
"avx512.kunpck")) {
3078 for (
unsigned i = 0; i != NumElts; ++i)
3087 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
3088 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3089 }
else if (Name ==
"avx512.kand.w") {
3092 Rep = Builder.CreateAnd(
LHS,
RHS);
3093 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3094 }
else if (Name ==
"avx512.kandn.w") {
3097 LHS = Builder.CreateNot(
LHS);
3098 Rep = Builder.CreateAnd(
LHS,
RHS);
3099 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3100 }
else if (Name ==
"avx512.kor.w") {
3103 Rep = Builder.CreateOr(
LHS,
RHS);
3104 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3105 }
else if (Name ==
"avx512.kxor.w") {
3108 Rep = Builder.CreateXor(
LHS,
RHS);
3109 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3110 }
else if (Name ==
"avx512.kxnor.w") {
3113 LHS = Builder.CreateNot(
LHS);
3114 Rep = Builder.CreateXor(
LHS,
RHS);
3115 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3116 }
else if (Name ==
"avx512.knot.w") {
3118 Rep = Builder.CreateNot(Rep);
3119 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3120 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3123 Rep = Builder.CreateOr(
LHS,
RHS);
3124 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3126 if (Name[14] ==
'c')
3130 Rep = Builder.CreateICmpEQ(Rep,
C);
3131 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3132 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3133 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3134 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3135 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3138 ConstantInt::get(I32Ty, 0));
3140 ConstantInt::get(I32Ty, 0));
3142 if (Name.contains(
".add."))
3143 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3144 else if (Name.contains(
".sub."))
3145 EltOp = Builder.CreateFSub(Elt0, Elt1);
3146 else if (Name.contains(
".mul."))
3147 EltOp = Builder.CreateFMul(Elt0, Elt1);
3149 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3150 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3151 ConstantInt::get(I32Ty, 0));
3152 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3154 bool CmpEq = Name[16] ==
'e';
3156 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3165 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3168 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3171 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3178 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3183 if (VecWidth == 128 && EltWidth == 32)
3184 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3185 else if (VecWidth == 256 && EltWidth == 32)
3186 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3187 else if (VecWidth == 512 && EltWidth == 32)
3188 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3189 else if (VecWidth == 128 && EltWidth == 64)
3190 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3191 else if (VecWidth == 256 && EltWidth == 64)
3192 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3193 else if (VecWidth == 512 && EltWidth == 64)
3194 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3201 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3203 Type *OpTy = Args[0]->getType();
3207 if (VecWidth == 128 && EltWidth == 32)
3208 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3209 else if (VecWidth == 256 && EltWidth == 32)
3210 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3211 else if (VecWidth == 512 && EltWidth == 32)
3212 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3213 else if (VecWidth == 128 && EltWidth == 64)
3214 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3215 else if (VecWidth == 256 && EltWidth == 64)
3216 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3217 else if (VecWidth == 512 && EltWidth == 64)
3218 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3223 if (VecWidth == 512)
3225 Args.push_back(Mask);
3227 Rep = Builder.CreateIntrinsic(IID, Args);
3228 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3232 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3235 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3236 Name.starts_with(
"avx512.cvtw2mask.") ||
3237 Name.starts_with(
"avx512.cvtd2mask.") ||
3238 Name.starts_with(
"avx512.cvtq2mask.")) {
3243 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3244 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3245 Name.starts_with(
"avx512.mask.pabs")) {
3247 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3248 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3249 Name.starts_with(
"avx512.mask.pmaxs")) {
3251 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3252 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3253 Name.starts_with(
"avx512.mask.pmaxu")) {
3255 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3256 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3257 Name.starts_with(
"avx512.mask.pmins")) {
3259 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3260 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3261 Name.starts_with(
"avx512.mask.pminu")) {
3263 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3264 Name ==
"avx512.pmulu.dq.512" ||
3265 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3267 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3268 Name ==
"avx512.pmul.dq.512" ||
3269 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3271 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3272 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3277 }
else if (Name ==
"avx512.cvtusi2sd") {
3282 }
else if (Name ==
"sse2.cvtss2sd") {
3284 Rep = Builder.CreateFPExt(
3287 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3288 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3289 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3290 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3291 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3292 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3293 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3294 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3295 Name ==
"avx512.mask.cvtqq2ps.256" ||
3296 Name ==
"avx512.mask.cvtqq2ps.512" ||
3297 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3298 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3299 Name ==
"avx.cvt.ps2.pd.256" ||
3300 Name ==
"avx512.mask.cvtps2pd.128" ||
3301 Name ==
"avx512.mask.cvtps2pd.256") {
3306 unsigned NumDstElts = DstTy->getNumElements();
3308 assert(NumDstElts == 2 &&
"Unexpected vector size");
3309 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3312 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3313 bool IsUnsigned = Name.contains(
"cvtu");
3315 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3319 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3320 : Intrinsic::x86_avx512_sitofp_round;
3321 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3324 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3325 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3331 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3332 Name.starts_with(
"vcvtph2ps.")) {
3336 unsigned NumDstElts = DstTy->getNumElements();
3337 if (NumDstElts != SrcTy->getNumElements()) {
3338 assert(NumDstElts == 4 &&
"Unexpected vector size");
3339 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3341 Rep = Builder.CreateBitCast(
3343 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3347 }
else if (Name.starts_with(
"avx512.mask.load")) {
3349 bool Aligned = Name[16] !=
'u';
3352 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3356 ResultTy->getNumElements());
3357 Rep = Builder.CreateIntrinsic(
3358 Intrinsic::masked_expandload, {ResultTy, PtrTy},
3360 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3366 Rep = Builder.CreateIntrinsic(
3367 Intrinsic::masked_compressstore, {ResultTy, PtrTy},
3369 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3370 Name.starts_with(
"avx512.mask.expand.")) {
3374 ResultTy->getNumElements());
3376 bool IsCompress = Name[12] ==
'c';
3377 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3378 : Intrinsic::x86_avx512_mask_expand;
3379 Rep = Builder.CreateIntrinsic(
3381 }
else if (Name.starts_with(
"xop.vpcom")) {
3383 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3384 Name.ends_with(
"uq"))
3386 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3387 Name.ends_with(
"d") || Name.ends_with(
"q"))
3396 Name = Name.substr(9);
3397 if (Name.starts_with(
"lt"))
3399 else if (Name.starts_with(
"le"))
3401 else if (Name.starts_with(
"gt"))
3403 else if (Name.starts_with(
"ge"))
3405 else if (Name.starts_with(
"eq"))
3407 else if (Name.starts_with(
"ne"))
3409 else if (Name.starts_with(
"false"))
3411 else if (Name.starts_with(
"true"))
3418 }
else if (Name.starts_with(
"xop.vpcmov")) {
3420 Value *NotSel = Builder.CreateNot(Sel);
3423 Rep = Builder.CreateOr(Sel0, Sel1);
3424 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3425 Name.starts_with(
"avx512.mask.prol")) {
3427 }
else if (Name.starts_with(
"avx512.pror") ||
3428 Name.starts_with(
"avx512.mask.pror")) {
3430 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3431 Name.starts_with(
"avx512.mask.vpshld") ||
3432 Name.starts_with(
"avx512.maskz.vpshld")) {
3433 bool ZeroMask = Name[11] ==
'z';
3435 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3436 Name.starts_with(
"avx512.mask.vpshrd") ||
3437 Name.starts_with(
"avx512.maskz.vpshrd")) {
3438 bool ZeroMask = Name[11] ==
'z';
3440 }
else if (Name ==
"sse42.crc32.64.8") {
3443 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3445 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3446 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3447 Name.starts_with(
"avx512.vbroadcast.s")) {
3450 Type *EltTy = VecTy->getElementType();
3451 unsigned EltNum = VecTy->getNumElements();
3455 for (
unsigned I = 0;
I < EltNum; ++
I)
3456 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3457 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3458 Name.starts_with(
"sse41.pmovzx") ||
3459 Name.starts_with(
"avx2.pmovsx") ||
3460 Name.starts_with(
"avx2.pmovzx") ||
3461 Name.starts_with(
"avx512.mask.pmovsx") ||
3462 Name.starts_with(
"avx512.mask.pmovzx")) {
3464 unsigned NumDstElts = DstTy->getNumElements();
3468 for (
unsigned i = 0; i != NumDstElts; ++i)
3473 bool DoSext = Name.contains(
"pmovsx");
3475 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3480 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3481 Name ==
"avx512.mask.pmov.qd.512" ||
3482 Name ==
"avx512.mask.pmov.wb.256" ||
3483 Name ==
"avx512.mask.pmov.wb.512") {
3488 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3489 Name ==
"avx2.vbroadcasti128") {
3495 if (NumSrcElts == 2)
3496 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3498 Rep = Builder.CreateShuffleVector(Load,
3500 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3501 Name.starts_with(
"avx512.mask.shuf.f")) {
3506 unsigned ControlBitsMask = NumLanes - 1;
3507 unsigned NumControlBits = NumLanes / 2;
3510 for (
unsigned l = 0; l != NumLanes; ++l) {
3511 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3513 if (l >= NumLanes / 2)
3514 LaneMask += NumLanes;
3515 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3516 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3522 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3523 Name.starts_with(
"avx512.mask.broadcasti")) {
3526 unsigned NumDstElts =
3530 for (
unsigned i = 0; i != NumDstElts; ++i)
3531 ShuffleMask[i] = i % NumSrcElts;
3537 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3538 Name.starts_with(
"avx2.vbroadcast") ||
3539 Name.starts_with(
"avx512.pbroadcast") ||
3540 Name.starts_with(
"avx512.mask.broadcast.s")) {
3547 Rep = Builder.CreateShuffleVector(
Op, M);
3552 }
else if (Name.starts_with(
"sse2.padds.") ||
3553 Name.starts_with(
"avx2.padds.") ||
3554 Name.starts_with(
"avx512.padds.") ||
3555 Name.starts_with(
"avx512.mask.padds.")) {
3557 }
else if (Name.starts_with(
"sse2.psubs.") ||
3558 Name.starts_with(
"avx2.psubs.") ||
3559 Name.starts_with(
"avx512.psubs.") ||
3560 Name.starts_with(
"avx512.mask.psubs.")) {
3562 }
else if (Name.starts_with(
"sse2.paddus.") ||
3563 Name.starts_with(
"avx2.paddus.") ||
3564 Name.starts_with(
"avx512.mask.paddus.")) {
3566 }
else if (Name.starts_with(
"sse2.psubus.") ||
3567 Name.starts_with(
"avx2.psubus.") ||
3568 Name.starts_with(
"avx512.mask.psubus.")) {
3570 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3575 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3579 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3584 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3589 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3590 Name ==
"avx512.psll.dq.512") {
3594 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3595 Name ==
"avx512.psrl.dq.512") {
3599 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3600 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3601 Name.starts_with(
"avx2.pblendd.")) {
3606 unsigned NumElts = VecTy->getNumElements();
3609 for (
unsigned i = 0; i != NumElts; ++i)
3610 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3612 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3613 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3614 Name ==
"avx2.vinserti128" ||
3615 Name.starts_with(
"avx512.mask.insert")) {
3619 unsigned DstNumElts =
3621 unsigned SrcNumElts =
3623 unsigned Scale = DstNumElts / SrcNumElts;
3630 for (
unsigned i = 0; i != SrcNumElts; ++i)
3632 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3633 Idxs[i] = SrcNumElts;
3634 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3648 for (
unsigned i = 0; i != DstNumElts; ++i)
3651 for (
unsigned i = 0; i != SrcNumElts; ++i)
3652 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3653 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3659 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3660 Name ==
"avx2.vextracti128" ||
3661 Name.starts_with(
"avx512.mask.vextract")) {
3664 unsigned DstNumElts =
3666 unsigned SrcNumElts =
3668 unsigned Scale = SrcNumElts / DstNumElts;
3675 for (
unsigned i = 0; i != DstNumElts; ++i) {
3676 Idxs[i] = i + (Imm * DstNumElts);
3678 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3684 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3685 Name.starts_with(
"avx512.mask.perm.di.")) {
3689 unsigned NumElts = VecTy->getNumElements();
3692 for (
unsigned i = 0; i != NumElts; ++i)
3693 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3695 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3700 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3712 unsigned HalfSize = NumElts / 2;
3724 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3725 for (
unsigned i = 0; i < HalfSize; ++i)
3726 ShuffleMask[i] = StartIndex + i;
3729 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3730 for (
unsigned i = 0; i < HalfSize; ++i)
3731 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3733 Rep = Builder.CreateShuffleVector(V0,
V1, ShuffleMask);
3735 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3736 Name.starts_with(
"avx512.mask.vpermil.p") ||
3737 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3741 unsigned NumElts = VecTy->getNumElements();
3743 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3744 unsigned IdxMask = ((1 << IdxSize) - 1);
3750 for (
unsigned i = 0; i != NumElts; ++i)
3751 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3753 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3758 }
else if (Name ==
"sse2.pshufl.w" ||
3759 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3764 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3768 for (
unsigned l = 0; l != NumElts; l += 8) {
3769 for (
unsigned i = 0; i != 4; ++i)
3770 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3771 for (
unsigned i = 4; i != 8; ++i)
3772 Idxs[i + l] = i + l;
3775 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3780 }
else if (Name ==
"sse2.pshufh.w" ||
3781 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3786 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3790 for (
unsigned l = 0; l != NumElts; l += 8) {
3791 for (
unsigned i = 0; i != 4; ++i)
3792 Idxs[i + l] = i + l;
3793 for (
unsigned i = 0; i != 4; ++i)
3794 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3797 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3802 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3809 unsigned HalfLaneElts = NumLaneElts / 2;
3812 for (
unsigned i = 0; i != NumElts; ++i) {
3814 Idxs[i] = i - (i % NumLaneElts);
3816 if ((i % NumLaneElts) >= HalfLaneElts)
3820 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3823 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3827 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3828 Name.starts_with(
"avx512.mask.movshdup") ||
3829 Name.starts_with(
"avx512.mask.movsldup")) {
3835 if (Name.starts_with(
"avx512.mask.movshdup."))
3839 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3840 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3841 Idxs[i + l + 0] = i + l +
Offset;
3842 Idxs[i + l + 1] = i + l +
Offset;
3845 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3849 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3850 Name.starts_with(
"avx512.mask.unpckl.")) {
3857 for (
int l = 0; l != NumElts; l += NumLaneElts)
3858 for (
int i = 0; i != NumLaneElts; ++i)
3859 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3861 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3865 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3866 Name.starts_with(
"avx512.mask.unpckh.")) {
3873 for (
int l = 0; l != NumElts; l += NumLaneElts)
3874 for (
int i = 0; i != NumLaneElts; ++i)
3875 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3877 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3881 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3882 Name.starts_with(
"avx512.mask.pand.")) {
3885 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3887 Rep = Builder.CreateBitCast(Rep, FTy);
3890 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3891 Name.starts_with(
"avx512.mask.pandn.")) {
3894 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3895 Rep = Builder.CreateAnd(Rep,
3897 Rep = Builder.CreateBitCast(Rep, FTy);
3900 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3901 Name.starts_with(
"avx512.mask.por.")) {
3904 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3906 Rep = Builder.CreateBitCast(Rep, FTy);
3909 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3910 Name.starts_with(
"avx512.mask.pxor.")) {
3913 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3915 Rep = Builder.CreateBitCast(Rep, FTy);
3918 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3922 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3926 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3930 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3931 if (Name.ends_with(
".512")) {
3933 if (Name[17] ==
's')
3934 IID = Intrinsic::x86_avx512_add_ps_512;
3936 IID = Intrinsic::x86_avx512_add_pd_512;
3938 Rep = Builder.CreateIntrinsic(
3946 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3947 if (Name.ends_with(
".512")) {
3949 if (Name[17] ==
's')
3950 IID = Intrinsic::x86_avx512_div_ps_512;
3952 IID = Intrinsic::x86_avx512_div_pd_512;
3954 Rep = Builder.CreateIntrinsic(
3962 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3963 if (Name.ends_with(
".512")) {
3965 if (Name[17] ==
's')
3966 IID = Intrinsic::x86_avx512_mul_ps_512;
3968 IID = Intrinsic::x86_avx512_mul_pd_512;
3970 Rep = Builder.CreateIntrinsic(
3978 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3979 if (Name.ends_with(
".512")) {
3981 if (Name[17] ==
's')
3982 IID = Intrinsic::x86_avx512_sub_ps_512;
3984 IID = Intrinsic::x86_avx512_sub_pd_512;
3986 Rep = Builder.CreateIntrinsic(
3994 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3995 Name.starts_with(
"avx512.mask.min.p")) &&
3996 Name.drop_front(18) ==
".512") {
3997 bool IsDouble = Name[17] ==
'd';
3998 bool IsMin = Name[13] ==
'i';
4000 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
4001 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
4004 Rep = Builder.CreateIntrinsic(
4009 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
4011 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
4012 {CI->getArgOperand(0), Builder.getInt1(false)});
4015 }
else if (Name.starts_with(
"avx512.mask.psll")) {
4016 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4017 bool IsVariable = Name[16] ==
'v';
4018 char Size = Name[16] ==
'.' ? Name[17]
4019 : Name[17] ==
'.' ? Name[18]
4020 : Name[18] ==
'.' ? Name[19]
4024 if (IsVariable && Name[17] !=
'.') {
4025 if (
Size ==
'd' && Name[17] ==
'2')
4026 IID = Intrinsic::x86_avx2_psllv_q;
4027 else if (
Size ==
'd' && Name[17] ==
'4')
4028 IID = Intrinsic::x86_avx2_psllv_q_256;
4029 else if (
Size ==
's' && Name[17] ==
'4')
4030 IID = Intrinsic::x86_avx2_psllv_d;
4031 else if (
Size ==
's' && Name[17] ==
'8')
4032 IID = Intrinsic::x86_avx2_psllv_d_256;
4033 else if (
Size ==
'h' && Name[17] ==
'8')
4034 IID = Intrinsic::x86_avx512_psllv_w_128;
4035 else if (
Size ==
'h' && Name[17] ==
'1')
4036 IID = Intrinsic::x86_avx512_psllv_w_256;
4037 else if (Name[17] ==
'3' && Name[18] ==
'2')
4038 IID = Intrinsic::x86_avx512_psllv_w_512;
4041 }
else if (Name.ends_with(
".128")) {
4043 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4044 : Intrinsic::x86_sse2_psll_d;
4045 else if (
Size ==
'q')
4046 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4047 : Intrinsic::x86_sse2_psll_q;
4048 else if (
Size ==
'w')
4049 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4050 : Intrinsic::x86_sse2_psll_w;
4053 }
else if (Name.ends_with(
".256")) {
4055 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4056 : Intrinsic::x86_avx2_psll_d;
4057 else if (
Size ==
'q')
4058 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4059 : Intrinsic::x86_avx2_psll_q;
4060 else if (
Size ==
'w')
4061 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4062 : Intrinsic::x86_avx2_psll_w;
4067 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4068 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4069 : Intrinsic::x86_avx512_psll_d_512;
4070 else if (
Size ==
'q')
4071 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4072 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4073 : Intrinsic::x86_avx512_psll_q_512;
4074 else if (
Size ==
'w')
4075 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4076 : Intrinsic::x86_avx512_psll_w_512;
4082 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
4083 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4084 bool IsVariable = Name[16] ==
'v';
4085 char Size = Name[16] ==
'.' ? Name[17]
4086 : Name[17] ==
'.' ? Name[18]
4087 : Name[18] ==
'.' ? Name[19]
4091 if (IsVariable && Name[17] !=
'.') {
4092 if (
Size ==
'd' && Name[17] ==
'2')
4093 IID = Intrinsic::x86_avx2_psrlv_q;
4094 else if (
Size ==
'd' && Name[17] ==
'4')
4095 IID = Intrinsic::x86_avx2_psrlv_q_256;
4096 else if (
Size ==
's' && Name[17] ==
'4')
4097 IID = Intrinsic::x86_avx2_psrlv_d;
4098 else if (
Size ==
's' && Name[17] ==
'8')
4099 IID = Intrinsic::x86_avx2_psrlv_d_256;
4100 else if (
Size ==
'h' && Name[17] ==
'8')
4101 IID = Intrinsic::x86_avx512_psrlv_w_128;
4102 else if (
Size ==
'h' && Name[17] ==
'1')
4103 IID = Intrinsic::x86_avx512_psrlv_w_256;
4104 else if (Name[17] ==
'3' && Name[18] ==
'2')
4105 IID = Intrinsic::x86_avx512_psrlv_w_512;
4108 }
else if (Name.ends_with(
".128")) {
4110 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4111 : Intrinsic::x86_sse2_psrl_d;
4112 else if (
Size ==
'q')
4113 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4114 : Intrinsic::x86_sse2_psrl_q;
4115 else if (
Size ==
'w')
4116 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4117 : Intrinsic::x86_sse2_psrl_w;
4120 }
else if (Name.ends_with(
".256")) {
4122 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4123 : Intrinsic::x86_avx2_psrl_d;
4124 else if (
Size ==
'q')
4125 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4126 : Intrinsic::x86_avx2_psrl_q;
4127 else if (
Size ==
'w')
4128 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4129 : Intrinsic::x86_avx2_psrl_w;
4134 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4135 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4136 : Intrinsic::x86_avx512_psrl_d_512;
4137 else if (
Size ==
'q')
4138 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4139 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4140 : Intrinsic::x86_avx512_psrl_q_512;
4141 else if (
Size ==
'w')
4142 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4143 : Intrinsic::x86_avx512_psrl_w_512;
4149 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4150 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4151 bool IsVariable = Name[16] ==
'v';
4152 char Size = Name[16] ==
'.' ? Name[17]
4153 : Name[17] ==
'.' ? Name[18]
4154 : Name[18] ==
'.' ? Name[19]
4158 if (IsVariable && Name[17] !=
'.') {
4159 if (
Size ==
's' && Name[17] ==
'4')
4160 IID = Intrinsic::x86_avx2_psrav_d;
4161 else if (
Size ==
's' && Name[17] ==
'8')
4162 IID = Intrinsic::x86_avx2_psrav_d_256;
4163 else if (
Size ==
'h' && Name[17] ==
'8')
4164 IID = Intrinsic::x86_avx512_psrav_w_128;
4165 else if (
Size ==
'h' && Name[17] ==
'1')
4166 IID = Intrinsic::x86_avx512_psrav_w_256;
4167 else if (Name[17] ==
'3' && Name[18] ==
'2')
4168 IID = Intrinsic::x86_avx512_psrav_w_512;
4171 }
else if (Name.ends_with(
".128")) {
4173 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4174 : Intrinsic::x86_sse2_psra_d;
4175 else if (
Size ==
'q')
4176 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4177 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4178 : Intrinsic::x86_avx512_psra_q_128;
4179 else if (
Size ==
'w')
4180 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4181 : Intrinsic::x86_sse2_psra_w;
4184 }
else if (Name.ends_with(
".256")) {
4186 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4187 : Intrinsic::x86_avx2_psra_d;
4188 else if (
Size ==
'q')
4189 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4190 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4191 : Intrinsic::x86_avx512_psra_q_256;
4192 else if (
Size ==
'w')
4193 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4194 : Intrinsic::x86_avx2_psra_w;
4199 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4200 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4201 : Intrinsic::x86_avx512_psra_d_512;
4202 else if (
Size ==
'q')
4203 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4204 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4205 : Intrinsic::x86_avx512_psra_q_512;
4206 else if (
Size ==
'w')
4207 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4208 : Intrinsic::x86_avx512_psra_w_512;
4214 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4216 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4218 }
else if (Name.ends_with(
".movntdqa")) {
4222 LoadInst *LI = Builder.CreateAlignedLoad(
4227 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4228 Name.starts_with(
"fma.vfmsub.") ||
4229 Name.starts_with(
"fma.vfnmadd.") ||
4230 Name.starts_with(
"fma.vfnmsub.")) {
4231 bool NegMul = Name[6] ==
'n';
4232 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4233 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4244 if (NegMul && !IsScalar)
4245 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4246 if (NegMul && IsScalar)
4247 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4249 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4251 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4255 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4263 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4267 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4268 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4269 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4270 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4271 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4272 bool IsMask3 = Name[11] ==
'3';
4273 bool IsMaskZ = Name[11] ==
'z';
4275 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4276 bool NegMul = Name[2] ==
'n';
4277 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4283 if (NegMul && (IsMask3 || IsMaskZ))
4284 A = Builder.CreateFNeg(
A);
4285 if (NegMul && !(IsMask3 || IsMaskZ))
4286 B = Builder.CreateFNeg(
B);
4288 C = Builder.CreateFNeg(
C);
4290 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4291 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4292 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4299 if (Name.back() ==
'd')
4300 IID = Intrinsic::x86_avx512_vfmadd_f64;
4302 IID = Intrinsic::x86_avx512_vfmadd_f32;
4303 Rep = Builder.CreateIntrinsic(IID,
Ops);
4305 Rep = Builder.CreateFMA(
A,
B,
C);
4314 if (NegAcc && IsMask3)
4319 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4321 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4322 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4323 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4324 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4325 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4326 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4327 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4328 bool IsMask3 = Name[11] ==
'3';
4329 bool IsMaskZ = Name[11] ==
'z';
4331 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4332 bool NegMul = Name[2] ==
'n';
4333 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4339 if (NegMul && (IsMask3 || IsMaskZ))
4340 A = Builder.CreateFNeg(
A);
4341 if (NegMul && !(IsMask3 || IsMaskZ))
4342 B = Builder.CreateFNeg(
B);
4344 C = Builder.CreateFNeg(
C);
4351 if (Name[Name.size() - 5] ==
's')
4352 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4354 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4358 Rep = Builder.CreateFMA(
A,
B,
C);
4366 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4370 if (VecWidth == 128 && EltWidth == 32)
4371 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4372 else if (VecWidth == 256 && EltWidth == 32)
4373 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4374 else if (VecWidth == 128 && EltWidth == 64)
4375 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4376 else if (VecWidth == 256 && EltWidth == 64)
4377 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4383 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4384 Rep = Builder.CreateIntrinsic(IID,
Ops);
4385 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4386 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4387 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4388 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4389 bool IsMask3 = Name[11] ==
'3';
4390 bool IsMaskZ = Name[11] ==
'z';
4392 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4393 bool IsSubAdd = Name[3] ==
's';
4397 if (Name[Name.size() - 5] ==
's')
4398 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4400 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4405 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4407 Rep = Builder.CreateIntrinsic(IID,
Ops);
4416 Value *Odd = Builder.CreateCall(FMA,
Ops);
4417 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4418 Value *Even = Builder.CreateCall(FMA,
Ops);
4424 for (
int i = 0; i != NumElts; ++i)
4425 Idxs[i] = i + (i % 2) * NumElts;
4427 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4435 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4436 Name.starts_with(
"avx512.maskz.pternlog.")) {
4437 bool ZeroMask = Name[11] ==
'z';
4441 if (VecWidth == 128 && EltWidth == 32)
4442 IID = Intrinsic::x86_avx512_pternlog_d_128;
4443 else if (VecWidth == 256 && EltWidth == 32)
4444 IID = Intrinsic::x86_avx512_pternlog_d_256;
4445 else if (VecWidth == 512 && EltWidth == 32)
4446 IID = Intrinsic::x86_avx512_pternlog_d_512;
4447 else if (VecWidth == 128 && EltWidth == 64)
4448 IID = Intrinsic::x86_avx512_pternlog_q_128;
4449 else if (VecWidth == 256 && EltWidth == 64)
4450 IID = Intrinsic::x86_avx512_pternlog_q_256;
4451 else if (VecWidth == 512 && EltWidth == 64)
4452 IID = Intrinsic::x86_avx512_pternlog_q_512;
4458 Rep = Builder.CreateIntrinsic(IID, Args);
4462 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4463 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4464 bool ZeroMask = Name[11] ==
'z';
4465 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4468 if (VecWidth == 128 && !
High)
4469 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4470 else if (VecWidth == 256 && !
High)
4471 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4472 else if (VecWidth == 512 && !
High)
4473 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4474 else if (VecWidth == 128 &&
High)
4475 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4476 else if (VecWidth == 256 &&
High)
4477 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4478 else if (VecWidth == 512 &&
High)
4479 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4485 Rep = Builder.CreateIntrinsic(IID, Args);
4489 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4490 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4491 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4492 bool ZeroMask = Name[11] ==
'z';
4493 bool IndexForm = Name[17] ==
'i';
4495 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4496 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4497 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4498 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4499 bool ZeroMask = Name[11] ==
'z';
4500 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4503 if (VecWidth == 128 && !IsSaturating)
4504 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4505 else if (VecWidth == 256 && !IsSaturating)
4506 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4507 else if (VecWidth == 512 && !IsSaturating)
4508 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4509 else if (VecWidth == 128 && IsSaturating)
4510 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4511 else if (VecWidth == 256 && IsSaturating)
4512 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4513 else if (VecWidth == 512 && IsSaturating)
4514 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4524 if (Args[1]->
getType()->isVectorTy() &&
4527 ->isIntegerTy(32) &&
4528 Args[2]->
getType()->isVectorTy() &&
4531 ->isIntegerTy(32)) {
4532 Type *NewArgType =
nullptr;
4533 if (VecWidth == 128)
4535 else if (VecWidth == 256)
4537 else if (VecWidth == 512)
4543 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4544 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4547 Rep = Builder.CreateIntrinsic(IID, Args);
4551 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4552 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4553 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4554 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4555 bool ZeroMask = Name[11] ==
'z';
4556 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4559 if (VecWidth == 128 && !IsSaturating)
4560 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4561 else if (VecWidth == 256 && !IsSaturating)
4562 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4563 else if (VecWidth == 512 && !IsSaturating)
4564 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4565 else if (VecWidth == 128 && IsSaturating)
4566 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4567 else if (VecWidth == 256 && IsSaturating)
4568 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4569 else if (VecWidth == 512 && IsSaturating)
4570 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4580 if (Args[1]->
getType()->isVectorTy() &&
4583 ->isIntegerTy(32) &&
4584 Args[2]->
getType()->isVectorTy() &&
4587 ->isIntegerTy(32)) {
4588 Type *NewArgType =
nullptr;
4589 if (VecWidth == 128)
4591 else if (VecWidth == 256)
4593 else if (VecWidth == 512)
4599 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4600 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4603 Rep = Builder.CreateIntrinsic(IID, Args);
4607 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4608 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4609 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4611 if (Name[0] ==
'a' && Name.back() ==
'2')
4612 IID = Intrinsic::x86_addcarry_32;
4613 else if (Name[0] ==
'a' && Name.back() ==
'4')
4614 IID = Intrinsic::x86_addcarry_64;
4615 else if (Name[0] ==
's' && Name.back() ==
'2')
4616 IID = Intrinsic::x86_subborrow_32;
4617 else if (Name[0] ==
's' && Name.back() ==
'4')
4618 IID = Intrinsic::x86_subborrow_64;
4625 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4628 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4631 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4635 }
else if (Name.starts_with(
"avx512.mask.") &&
4638 }
else if (Name.starts_with(
"bmi.pdep.")) {
4640 }
else if (Name.starts_with(
"bmi.pext.")) {
4650 if (Name.starts_with(
"neon.bfcvt")) {
4651 if (Name.starts_with(
"neon.bfcvtn2")) {
4653 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4655 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4656 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4659 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4660 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4662 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4666 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4667 return Builder.CreateShuffleVector(
4670 return Builder.CreateFPTrunc(CI->
getOperand(0),
4673 }
else if (Name.starts_with(
"sve.fcvt")) {
4676 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4677 .
Case(
"sve.fcvtnt.bf16f32",
4678 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4690 if (Args[1]->
getType() != BadPredTy)
4693 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4694 BadPredTy, Args[1]);
4695 Args[1] = Builder.CreateIntrinsic(
4696 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4698 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4702 if (Name ==
"neon.vcvtfp2hf")
4703 return Builder.CreateBitCast(
4704 Builder.CreateFPTrunc(
4708 if (Name ==
"neon.vcvthf2fp")
4709 return Builder.CreateFPExt(
4710 Builder.CreateBitCast(
4720 if (Name ==
"mve.vctp64.old") {
4723 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4726 Value *C1 = Builder.CreateIntrinsic(
4727 Intrinsic::arm_mve_pred_v2i,
4729 return Builder.CreateIntrinsic(
4730 Intrinsic::arm_mve_pred_i2v,
4732 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4733 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4734 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4735 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4737 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4738 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4739 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4740 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4742 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4743 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4744 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4745 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4746 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4747 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4748 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4749 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4750 std::vector<Type *> Tys;
4754 case Intrinsic::arm_mve_mull_int_predicated:
4755 case Intrinsic::arm_mve_vqdmull_predicated:
4756 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4759 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4760 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4761 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4765 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4769 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4773 case Intrinsic::arm_cde_vcx1q_predicated:
4774 case Intrinsic::arm_cde_vcx1qa_predicated:
4775 case Intrinsic::arm_cde_vcx2q_predicated:
4776 case Intrinsic::arm_cde_vcx2qa_predicated:
4777 case Intrinsic::arm_cde_vcx3q_predicated:
4778 case Intrinsic::arm_cde_vcx3qa_predicated:
4785 std::vector<Value *>
Ops;
4787 Type *Ty =
Op->getType();
4788 if (Ty->getScalarSizeInBits() == 1) {
4789 Value *C1 = Builder.CreateIntrinsic(
4790 Intrinsic::arm_mve_pred_v2i,
4792 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4797 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4812 auto UpgradeLegacyWMMAIUIntrinsicCall =
4817 Args.push_back(Builder.getFalse());
4821 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4828 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4833 NewCall->copyMetadata(*CI);
4837 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4838 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4839 "intrinsic should have 7 arguments");
4842 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4844 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4845 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4846 "intrinsic should have 8 arguments");
4851 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4854 switch (
F->getIntrinsicID()) {
4857 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4858 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4859 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4860 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4861 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4862 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4877 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4880 F->getParent(),
F->getIntrinsicID(), Overloads);
4885 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4890 NewCall->copyMetadata(*CI);
4891 NewCall->takeName(CI);
4913 if (NumOperands < 3)
4926 bool IsVolatile =
false;
4930 if (NumOperands > 3)
4935 if (NumOperands > 5) {
4937 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4951 if (VT->getElementType()->isIntegerTy(16)) {
4954 Val = Builder.CreateBitCast(Val, AsBF16);
4962 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4964 unsigned AddrSpace = PtrTy->getAddressSpace();
4967 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4969 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4974 MDNode *RangeNotPrivate =
4977 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4983 return Builder.CreateBitCast(RMW, RetTy);
5004 return MAV->getMetadata();
5013 if (Name ==
"label") {
5015 }
else if (Name ==
"assign") {
5022 }
else if (Name ==
"declare") {
5026 }
else if (Name ==
"addr") {
5036 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr);
5037 }
else if (Name ==
"value") {
5040 unsigned ExprOp = 2;
5055 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
5063 int64_t OffsetVal =
Offset->getSExtValue();
5064 return Builder.CreateIntrinsic(OffsetVal >= 0
5065 ? Intrinsic::vector_splice_left
5066 : Intrinsic::vector_splice_right,
5068 {CI->getArgOperand(0), CI->getArgOperand(1),
5069 Builder.getInt32(std::abs(OffsetVal))});
5074 if (Name.starts_with(
"to.fp16")) {
5076 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
5077 return Builder.CreateBitCast(Cast, CI->
getType());
5080 if (Name.starts_with(
"from.fp16")) {
5082 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
5083 return Builder.CreateFPExt(Cast, CI->
getType());
5108 if (!Name.consume_front(
"llvm."))
5111 bool IsX86 = Name.consume_front(
"x86.");
5112 bool IsNVVM = Name.consume_front(
"nvvm.");
5113 bool IsAArch64 = Name.consume_front(
"aarch64.");
5114 bool IsARM = Name.consume_front(
"arm.");
5115 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5116 bool IsDbg = Name.consume_front(
"dbg.");
5118 (Name.consume_front(
"experimental.vector.splice") ||
5119 Name.consume_front(
"vector.splice")) &&
5120 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5121 Value *Rep =
nullptr;
5123 if (!IsX86 && Name ==
"stackprotectorcheck") {
5125 }
else if (IsNVVM) {
5129 }
else if (IsAArch64) {
5133 }
else if (IsAMDGCN) {
5137 }
else if (IsOldSplice) {
5139 }
else if (Name.consume_front(
"convert.")) {
5141 }
else if (Name ==
"lifetime.start.i64" || Name ==
"lifetime.end.i64") {
5154 const auto &DefaultCase = [&]() ->
void {
5162 "Unknown function for CallBase upgrade and isn't just a name change");
5170 "Return type must have changed");
5171 assert(OldST->getNumElements() ==
5173 "Must have same number of elements");
5176 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5179 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5180 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5181 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5200 case Intrinsic::arm_neon_vst1:
5201 case Intrinsic::arm_neon_vst2:
5202 case Intrinsic::arm_neon_vst3:
5203 case Intrinsic::arm_neon_vst4:
5204 case Intrinsic::arm_neon_vst2lane:
5205 case Intrinsic::arm_neon_vst3lane:
5206 case Intrinsic::arm_neon_vst4lane: {
5208 NewCall = Builder.CreateCall(NewFn, Args);
5211 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5212 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5213 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5218 NewCall = Builder.CreateCall(NewFn, Args);
5221 case Intrinsic::aarch64_sve_ld3_sret:
5222 case Intrinsic::aarch64_sve_ld4_sret:
5223 case Intrinsic::aarch64_sve_ld2_sret: {
5231 Name = Name.substr(5);
5238 unsigned MinElts = RetTy->getMinNumElements() /
N;
5240 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5242 for (
unsigned I = 0;
I <
N;
I++) {
5243 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5244 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5250 case Intrinsic::coro_end: {
5253 NewCall = Builder.CreateCall(NewFn, Args);
5257 case Intrinsic::vector_extract: {
5259 Name = Name.substr(5);
5260 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5265 unsigned MinElts = RetTy->getMinNumElements();
5268 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5272 case Intrinsic::vector_insert: {
5274 Name = Name.substr(5);
5275 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5279 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5284 NewCall = Builder.CreateCall(
5288 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5294 assert(
N > 1 &&
"Create is expected to be between 2-4");
5297 unsigned MinElts = RetTy->getMinNumElements() /
N;
5298 for (
unsigned I = 0;
I <
N;
I++) {
5300 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5307 case Intrinsic::arm_neon_bfdot:
5308 case Intrinsic::arm_neon_bfmmla:
5309 case Intrinsic::arm_neon_bfmlalb:
5310 case Intrinsic::arm_neon_bfmlalt:
5311 case Intrinsic::aarch64_neon_bfdot:
5312 case Intrinsic::aarch64_neon_bfmmla:
5313 case Intrinsic::aarch64_neon_bfmlalb:
5314 case Intrinsic::aarch64_neon_bfmlalt: {
5317 "Mismatch between function args and call args");
5318 size_t OperandWidth =
5320 assert((OperandWidth == 64 || OperandWidth == 128) &&
5321 "Unexpected operand width");
5323 auto Iter = CI->
args().begin();
5324 Args.push_back(*Iter++);
5325 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5326 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5327 NewCall = Builder.CreateCall(NewFn, Args);
5331 case Intrinsic::bitreverse:
5332 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5335 case Intrinsic::ctlz:
5336 case Intrinsic::cttz: {
5343 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5347 case Intrinsic::objectsize: {
5348 Value *NullIsUnknownSize =
5352 NewCall = Builder.CreateCall(
5357 case Intrinsic::ctpop:
5358 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5360 case Intrinsic::dbg_value: {
5362 Name = Name.substr(5);
5364 if (Name.starts_with(
"dbg.addr")) {
5378 if (
Offset->isNullValue()) {
5379 NewCall = Builder.CreateCall(
5388 case Intrinsic::ptr_annotation:
5396 NewCall = Builder.CreateCall(
5405 case Intrinsic::var_annotation:
5412 NewCall = Builder.CreateCall(
5421 case Intrinsic::riscv_aes32dsi:
5422 case Intrinsic::riscv_aes32dsmi:
5423 case Intrinsic::riscv_aes32esi:
5424 case Intrinsic::riscv_aes32esmi:
5425 case Intrinsic::riscv_sm4ks:
5426 case Intrinsic::riscv_sm4ed: {
5436 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5437 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5443 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5444 Value *Res = NewCall;
5446 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5452 case Intrinsic::nvvm_mapa_shared_cluster: {
5456 Value *Res = NewCall;
5457 Res = Builder.CreateAddrSpaceCast(
5464 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5465 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5468 Args[0] = Builder.CreateAddrSpaceCast(
5471 NewCall = Builder.CreateCall(NewFn, Args);
5477 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5478 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5479 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5480 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5481 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5482 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5483 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5484 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5491 Args[0] = Builder.CreateAddrSpaceCast(
5500 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5502 NewCall = Builder.CreateCall(NewFn, Args);
5508 case Intrinsic::riscv_sha256sig0:
5509 case Intrinsic::riscv_sha256sig1:
5510 case Intrinsic::riscv_sha256sum0:
5511 case Intrinsic::riscv_sha256sum1:
5512 case Intrinsic::riscv_sm3p0:
5513 case Intrinsic::riscv_sm3p1: {
5520 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5522 NewCall = Builder.CreateCall(NewFn, Arg);
5524 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5531 case Intrinsic::x86_xop_vfrcz_ss:
5532 case Intrinsic::x86_xop_vfrcz_sd:
5533 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5536 case Intrinsic::x86_xop_vpermil2pd:
5537 case Intrinsic::x86_xop_vpermil2ps:
5538 case Intrinsic::x86_xop_vpermil2pd_256:
5539 case Intrinsic::x86_xop_vpermil2ps_256: {
5543 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5544 NewCall = Builder.CreateCall(NewFn, Args);
5548 case Intrinsic::x86_sse41_ptestc:
5549 case Intrinsic::x86_sse41_ptestz:
5550 case Intrinsic::x86_sse41_ptestnzc: {
5564 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5565 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5567 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5571 case Intrinsic::x86_rdtscp: {
5577 NewCall = Builder.CreateCall(NewFn);
5579 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5582 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5590 case Intrinsic::x86_sse41_insertps:
5591 case Intrinsic::x86_sse41_dppd:
5592 case Intrinsic::x86_sse41_dpps:
5593 case Intrinsic::x86_sse41_mpsadbw:
5594 case Intrinsic::x86_avx_dp_ps_256:
5595 case Intrinsic::x86_avx2_mpsadbw: {
5601 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5602 NewCall = Builder.CreateCall(NewFn, Args);
5606 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5607 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5608 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5609 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5610 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5611 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5617 NewCall = Builder.CreateCall(NewFn, Args);
5626 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5627 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5628 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5629 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5630 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5631 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5635 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5636 Args[1] = Builder.CreateBitCast(
5639 NewCall = Builder.CreateCall(NewFn, Args);
5640 Value *Res = Builder.CreateBitCast(
5648 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5649 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5650 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5654 Args[1] = Builder.CreateBitCast(
5656 Args[2] = Builder.CreateBitCast(
5659 NewCall = Builder.CreateCall(NewFn, Args);
5663 case Intrinsic::thread_pointer: {
5664 NewCall = Builder.CreateCall(NewFn, {});
5668 case Intrinsic::memcpy:
5669 case Intrinsic::memmove:
5670 case Intrinsic::memset: {
5686 NewCall = Builder.CreateCall(NewFn, Args);
5688 AttributeList NewAttrs = AttributeList::get(
5689 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5690 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5691 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5696 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5699 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5703 case Intrinsic::masked_load:
5704 case Intrinsic::masked_gather:
5705 case Intrinsic::masked_store:
5706 case Intrinsic::masked_scatter: {
5712 auto GetMaybeAlign = [](
Value *
Op) {
5722 auto GetAlign = [&](
Value *
Op) {
5731 case Intrinsic::masked_load:
5732 NewCall = Builder.CreateMaskedLoad(
5736 case Intrinsic::masked_gather:
5737 NewCall = Builder.CreateMaskedGather(
5743 case Intrinsic::masked_store:
5744 NewCall = Builder.CreateMaskedStore(
5748 case Intrinsic::masked_scatter:
5749 NewCall = Builder.CreateMaskedScatter(
5751 DL.getValueOrABITypeAlignment(
5765 case Intrinsic::lifetime_start:
5766 case Intrinsic::lifetime_end: {
5778 NewCall = Builder.CreateLifetimeStart(Ptr);
5780 NewCall = Builder.CreateLifetimeEnd(Ptr);
5789 case Intrinsic::x86_avx512_vpdpbusd_128:
5790 case Intrinsic::x86_avx512_vpdpbusd_256:
5791 case Intrinsic::x86_avx512_vpdpbusd_512:
5792 case Intrinsic::x86_avx512_vpdpbusds_128:
5793 case Intrinsic::x86_avx512_vpdpbusds_256:
5794 case Intrinsic::x86_avx512_vpdpbusds_512:
5795 case Intrinsic::x86_avx2_vpdpbssd_128:
5796 case Intrinsic::x86_avx2_vpdpbssd_256:
5797 case Intrinsic::x86_avx10_vpdpbssd_512:
5798 case Intrinsic::x86_avx2_vpdpbssds_128:
5799 case Intrinsic::x86_avx2_vpdpbssds_256:
5800 case Intrinsic::x86_avx10_vpdpbssds_512:
5801 case Intrinsic::x86_avx2_vpdpbsud_128:
5802 case Intrinsic::x86_avx2_vpdpbsud_256:
5803 case Intrinsic::x86_avx10_vpdpbsud_512:
5804 case Intrinsic::x86_avx2_vpdpbsuds_128:
5805 case Intrinsic::x86_avx2_vpdpbsuds_256:
5806 case Intrinsic::x86_avx10_vpdpbsuds_512:
5807 case Intrinsic::x86_avx2_vpdpbuud_128:
5808 case Intrinsic::x86_avx2_vpdpbuud_256:
5809 case Intrinsic::x86_avx10_vpdpbuud_512:
5810 case Intrinsic::x86_avx2_vpdpbuuds_128:
5811 case Intrinsic::x86_avx2_vpdpbuuds_256:
5812 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5817 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5818 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5820 NewCall = Builder.CreateCall(NewFn, Args);
5823 case Intrinsic::x86_avx512_vpdpwssd_128:
5824 case Intrinsic::x86_avx512_vpdpwssd_256:
5825 case Intrinsic::x86_avx512_vpdpwssd_512:
5826 case Intrinsic::x86_avx512_vpdpwssds_128:
5827 case Intrinsic::x86_avx512_vpdpwssds_256:
5828 case Intrinsic::x86_avx512_vpdpwssds_512:
5829 case Intrinsic::x86_avx2_vpdpwsud_128:
5830 case Intrinsic::x86_avx2_vpdpwsud_256:
5831 case Intrinsic::x86_avx10_vpdpwsud_512:
5832 case Intrinsic::x86_avx2_vpdpwsuds_128:
5833 case Intrinsic::x86_avx2_vpdpwsuds_256:
5834 case Intrinsic::x86_avx10_vpdpwsuds_512:
5835 case Intrinsic::x86_avx2_vpdpwusd_128:
5836 case Intrinsic::x86_avx2_vpdpwusd_256:
5837 case Intrinsic::x86_avx10_vpdpwusd_512:
5838 case Intrinsic::x86_avx2_vpdpwusds_128:
5839 case Intrinsic::x86_avx2_vpdpwusds_256:
5840 case Intrinsic::x86_avx10_vpdpwusds_512:
5841 case Intrinsic::x86_avx2_vpdpwuud_128:
5842 case Intrinsic::x86_avx2_vpdpwuud_256:
5843 case Intrinsic::x86_avx10_vpdpwuud_512:
5844 case Intrinsic::x86_avx2_vpdpwuuds_128:
5845 case Intrinsic::x86_avx2_vpdpwuuds_256:
5846 case Intrinsic::x86_avx10_vpdpwuuds_512:
5851 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5852 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5854 NewCall = Builder.CreateCall(NewFn, Args);
5857 assert(NewCall &&
"Should have either set this variable or returned through "
5858 "the default case");
5865 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5879 F->eraseFromParent();
5885 if (NumOperands == 0)
5893 if (NumOperands == 3) {
5897 Metadata *Elts2[] = {ScalarType, ScalarType,
5911 if (
Opc != Instruction::BitCast)
5915 Type *SrcTy = V->getType();
5932 if (
Opc != Instruction::BitCast)
5935 Type *SrcTy =
C->getType();
5962 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5963 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5964 if (Flag->getNumOperands() < 3)
5966 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5967 return K->getString() ==
"Debug Info Version";
5970 if (OpIt != ModFlags->op_end()) {
5971 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5978 bool BrokenDebugInfo =
false;
5981 if (!BrokenDebugInfo)
5987 M.getContext().diagnose(Diag);
5994 M.getContext().diagnose(DiagVersion);
6004 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
6007 if (
F->hasFnAttribute(Attr)) {
6010 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
6012 auto [Part, Rest] = S.
split(
',');
6018 const unsigned Dim = DimC -
'x';
6019 assert(Dim < 3 &&
"Unexpected dim char");
6029 F->addFnAttr(Attr, NewAttr);
6033 return S ==
"x" || S ==
"y" || S ==
"z";
6038 if (K ==
"kernel") {
6050 const unsigned Idx = (AlignIdxValuePair >> 16);
6051 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
6056 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
6061 if (K ==
"minctasm") {
6066 if (K ==
"maxnreg") {
6071 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
6075 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
6079 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
6083 if (K ==
"grid_constant") {
6098 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
6105 if (!SeenNodes.
insert(MD).second)
6112 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6119 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6121 const MDOperand &V = MD->getOperand(j + 1);
6124 NewOperands.
append({K, V});
6127 if (NewOperands.
size() > 1)
6140 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6141 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6142 if (ModRetainReleaseMarker) {
6148 ID->getString().split(ValueComp,
"#");
6149 if (ValueComp.
size() == 2) {
6150 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6154 M.eraseNamedMetadata(ModRetainReleaseMarker);
6165 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6191 bool InvalidCast =
false;
6193 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6206 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6208 Args.push_back(Arg);
6215 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6220 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6233 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6241 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6242 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6243 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6244 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6245 {
"objc_autoreleaseReturnValue",
6246 llvm::Intrinsic::objc_autoreleaseReturnValue},
6247 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6248 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6249 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6250 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6251 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6252 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6253 {
"objc_release", llvm::Intrinsic::objc_release},
6254 {
"objc_retain", llvm::Intrinsic::objc_retain},
6255 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6256 {
"objc_retainAutoreleaseReturnValue",
6257 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6258 {
"objc_retainAutoreleasedReturnValue",
6259 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6260 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6261 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6262 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6263 {
"objc_unsafeClaimAutoreleasedReturnValue",
6264 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6265 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6266 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6267 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6268 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6269 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6270 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6271 {
"objc_arc_annotation_topdown_bbstart",
6272 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6273 {
"objc_arc_annotation_topdown_bbend",
6274 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6275 {
"objc_arc_annotation_bottomup_bbstart",
6276 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6277 {
"objc_arc_annotation_bottomup_bbend",
6278 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6280 for (
auto &
I : RuntimeFuncs)
6281 UpgradeToIntrinsic(
I.first,
I.second);
6285 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6289 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6290 bool HasSwiftVersionFlag =
false;
6291 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6298 if (
Op->getNumOperands() != 3)
6312 if (
ID->getString() ==
"Objective-C Image Info Version")
6314 if (
ID->getString() ==
"Objective-C Class Properties")
6315 HasClassProperties =
true;
6317 if (
ID->getString() ==
"PIC Level") {
6318 if (
auto *Behavior =
6320 uint64_t V = Behavior->getLimitedValue();
6326 if (
ID->getString() ==
"PIE Level")
6327 if (
auto *Behavior =
6334 if (
ID->getString() ==
"branch-target-enforcement" ||
6335 ID->getString().starts_with(
"sign-return-address")) {
6336 if (
auto *Behavior =
6342 Op->getOperand(1),
Op->getOperand(2)};
6352 if (
ID->getString() ==
"Objective-C Image Info Section") {
6355 Value->getString().split(ValueComp,
" ");
6356 if (ValueComp.
size() != 1) {
6357 std::string NewValue;
6358 for (
auto &S : ValueComp)
6359 NewValue += S.str();
6370 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6373 assert(Md->getValue() &&
"Expected non-empty metadata");
6374 auto Type = Md->getValue()->getType();
6377 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6378 if ((Val & 0xff) != Val) {
6379 HasSwiftVersionFlag =
true;
6380 SwiftABIVersion = (Val & 0xff00) >> 8;
6381 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6382 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6393 if (
ID->getString() ==
"amdgpu_code_object_version") {
6396 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6408 if (HasObjCFlag && !HasClassProperties) {
6414 if (HasSwiftVersionFlag) {
6418 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6420 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6428 NamedMDNode *CFIConsts = M.getNamedMetadata(
"cfi.functions");
6432 auto MatchesVersion = [](
const MDNode *
Op) {
6433 return Op->getNumOperands() >= 3 &&
6447 assert(!MatchesVersion(
Op) &&
"Unexpected mix of CFIConstant formats");
6448 assert(
Op->getNumOperands() >= 2 &&
6449 "Expected at least 2 operands - name and linkage type");
6461 for (
unsigned J = 2, EJ =
Op->getNumOperands(); J != EJ; ++J)
6472 auto TrimSpaces = [](
StringRef Section) -> std::string {
6474 Section.split(Components,
',');
6479 for (
auto Component : Components)
6480 OS <<
',' << Component.trim();
6485 for (
auto &GV : M.globals()) {
6486 if (!GV.hasSection())
6491 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6496 GV.setSection(TrimSpaces(Section));
6512struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6513 StrictFPUpgradeVisitor() =
default;
6516 if (!
Call.isStrictFP())
6522 Call.removeFnAttr(Attribute::StrictFP);
6523 Call.addFnAttr(Attribute::NoBuiltin);
6528struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6529 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6530 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6532 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6547 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6548 StrictFPUpgradeVisitor SFPV;
6553 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6554 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6555 for (
auto &Arg :
F.args())
6557 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6559 bool AddingAttrs =
false, RemovingAttrs =
false;
6560 AttrBuilder AttrsToAdd(
F.getContext());
6565 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6566 A.isValid() &&
A.isStringAttribute()) {
6567 F.setSection(
A.getValueAsString());
6569 RemovingAttrs =
true;
6573 A.isValid() &&
A.isStringAttribute()) {
6576 AddingAttrs = RemovingAttrs =
true;
6579 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6580 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6582 RemovingAttrs =
true;
6583 if (
A.getValueAsString() ==
"true") {
6584 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6593 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6596 if (
A.getValueAsBool()) {
6597 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6603 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6604 RemovingAttrs =
true;
6611 bool HandleDenormalMode =
false;
6613 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6616 DenormalFPMath = ParsedMode;
6618 AddingAttrs = RemovingAttrs =
true;
6619 HandleDenormalMode =
true;
6623 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6627 DenormalFPMathF32 = ParsedMode;
6629 AddingAttrs = RemovingAttrs =
true;
6630 HandleDenormalMode =
true;
6634 if (HandleDenormalMode)
6635 AttrsToAdd.addDenormalFPEnvAttr(
6639 F.removeFnAttrs(AttrsToRemove);
6642 F.addFnAttrs(AttrsToAdd);
6648 if (!
F.hasFnAttribute(FnAttrName))
6649 F.addFnAttr(FnAttrName,
Value);
6656 if (!
F.hasFnAttribute(FnAttrName)) {
6658 F.addFnAttr(FnAttrName);
6660 auto A =
F.getFnAttribute(FnAttrName);
6661 if (
"false" ==
A.getValueAsString())
6662 F.removeFnAttr(FnAttrName);
6663 else if (
"true" ==
A.getValueAsString()) {
6664 F.removeFnAttr(FnAttrName);
6665 F.addFnAttr(FnAttrName);
6671 Triple T(M.getTargetTriple());
6672 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6682 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6686 if (
Op->getNumOperands() != 3)
6695 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6696 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6697 : IDStr ==
"guarded-control-stack" ? &GCSValue
6698 : IDStr ==
"sign-return-address" ? &SRAValue
6699 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6700 : IDStr ==
"sign-return-address-with-bkey"
6706 *ValPtr = CI->getZExtValue();
6712 bool BTE = BTEValue == 1;
6713 bool BPPLR = BPPLRValue == 1;
6714 bool GCS = GCSValue == 1;
6715 bool SRA = SRAValue == 1;
6718 if (SRA && SRAALLValue == 1)
6719 SignTypeValue =
"all";
6722 if (SRA && SRABKeyValue == 1)
6723 SignKeyValue =
"b_key";
6725 for (
Function &
F : M.getFunctionList()) {
6726 if (
F.isDeclaration())
6733 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6734 A.isValid() &&
"none" ==
A.getValueAsString()) {
6735 F.removeFnAttr(
"sign-return-address");
6736 F.removeFnAttr(
"sign-return-address-key");
6752 if (SRAALLValue == 1)
6754 if (SRABKeyValue == 1)
6763 if (
T->getNumOperands() < 1)
6768 return S->getString().starts_with(
"llvm.vectorizer.");
6772 StringRef OldPrefix =
"llvm.vectorizer.";
6775 if (OldTag ==
"llvm.vectorizer.unroll")
6787 if (
T->getNumOperands() < 1)
6792 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6797 Ops.reserve(
T->getNumOperands());
6799 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6800 Ops.push_back(
T->getOperand(
I));
6814 Ops.reserve(
T->getNumOperands());
6825 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6826 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6827 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6830 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6832 auto I =
DL.find(
"-n64-");
6834 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6839 std::string Res =
DL.str();
6842 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6843 Res.append(Res.empty() ?
"G1" :
"-G1");
6851 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6852 Res.append(
"-ni:7:8:9");
6854 if (
DL.ends_with(
"ni:7"))
6856 if (
DL.ends_with(
"ni:7:8"))
6861 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6862 Res.append(
"-p7:160:256:256:32");
6863 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6864 Res.append(
"-p8:128:128:128:48");
6865 constexpr StringRef OldP8(
"-p8:128:128-");
6866 if (
DL.contains(OldP8))
6867 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6868 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6869 Res.append(
"-p9:192:256:256:32");
6873 if (!
DL.contains(
"m:e"))
6874 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6879 if (
T.isSystemZ() && !
DL.empty()) {
6881 if (!
DL.contains(
"-S64"))
6882 return "E-S64" +
DL.drop_front(1).str();
6886 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6889 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6890 if (!
DL.contains(AddrSpaces)) {
6892 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6893 if (R.match(Res, &
Groups))
6899 if (
T.isAArch64()) {
6901 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6902 Res.append(
"-Fn32");
6903 AddPtr32Ptr64AddrSpaces();
6907 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6911 std::string I64 =
"-i64:64";
6912 std::string I128 =
"-i128:128";
6914 size_t Pos = Res.find(I64);
6915 if (Pos !=
size_t(-1))
6916 Res.insert(Pos + I64.size(), I128);
6920 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6921 size_t Pos = Res.find(
"-S128");
6924 Res.insert(Pos,
"-f64:32:64");
6930 AddPtr32Ptr64AddrSpaces();
6938 if (!
T.isOSIAMCU()) {
6939 std::string I128 =
"-i128:128";
6942 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6943 if (R.match(Res, &
Groups))
6951 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6953 auto I =
Ref.find(
"-f80:32-");
6955 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6963 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6966 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6967 B.removeAttribute(
"no-frame-pointer-elim");
6969 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6971 if (FramePointer !=
"all")
6972 FramePointer =
"non-leaf";
6973 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6975 if (!FramePointer.
empty())
6976 B.addAttribute(
"frame-pointer", FramePointer);
6978 A =
B.getAttribute(
"null-pointer-is-valid");
6981 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6982 B.removeAttribute(
"null-pointer-is-valid");
6983 if (NullPointerIsValid)
6984 B.addAttribute(Attribute::NullPointerIsValid);
6987 A =
B.getAttribute(
"uniform-work-group-size");
6991 bool IsTrue = Val ==
"true";
6992 B.removeAttribute(
"uniform-work-group-size");
6994 B.addAttribute(
"uniform-work-group-size");
7005 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
LinkageTypes getLinkage() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
LLVM_ABI StringRef getString() const
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool UpgradeCFIFunctionsMetadata(Module &M)
Upgrade the cfi.functions metadata node by calculating and inserting the GUID for each function entry...
LLVM_ABI void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.