34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
62 cl::desc(
"Disable autoupgrade of debug info"));
81 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
96 Type *LastArgType =
F->getFunctionType()->getParamType(
97 F->getFunctionType()->getNumParams() - 1);
112 if (
F->getReturnType()->isVectorTy())
125 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
126 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
143 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
144 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
158 if (
F->getReturnType()->getScalarType()->isBFloatTy())
168 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
182 if (Name.consume_front(
"avx."))
183 return (Name.starts_with(
"blend.p") ||
184 Name ==
"cvt.ps2.pd.256" ||
185 Name ==
"cvtdq2.pd.256" ||
186 Name ==
"cvtdq2.ps.256" ||
187 Name.starts_with(
"movnt.") ||
188 Name.starts_with(
"sqrt.p") ||
189 Name.starts_with(
"storeu.") ||
190 Name.starts_with(
"vbroadcast.s") ||
191 Name.starts_with(
"vbroadcastf128") ||
192 Name.starts_with(
"vextractf128.") ||
193 Name.starts_with(
"vinsertf128.") ||
194 Name.starts_with(
"vperm2f128.") ||
195 Name.starts_with(
"vpermil."));
197 if (Name.consume_front(
"avx2."))
198 return (Name ==
"movntdqa" ||
199 Name.starts_with(
"pabs.") ||
200 Name.starts_with(
"padds.") ||
201 Name.starts_with(
"paddus.") ||
202 Name.starts_with(
"pblendd.") ||
204 Name.starts_with(
"pbroadcast") ||
205 Name.starts_with(
"pcmpeq.") ||
206 Name.starts_with(
"pcmpgt.") ||
207 Name.starts_with(
"pmax") ||
208 Name.starts_with(
"pmin") ||
209 Name.starts_with(
"pmovsx") ||
210 Name.starts_with(
"pmovzx") ||
212 Name ==
"pmulu.dq" ||
213 Name.starts_with(
"psll.dq") ||
214 Name.starts_with(
"psrl.dq") ||
215 Name.starts_with(
"psubs.") ||
216 Name.starts_with(
"psubus.") ||
217 Name.starts_with(
"vbroadcast") ||
218 Name ==
"vbroadcasti128" ||
219 Name ==
"vextracti128" ||
220 Name ==
"vinserti128" ||
221 Name ==
"vperm2i128");
223 if (Name.consume_front(
"avx512.")) {
224 if (Name.consume_front(
"mask."))
226 return (Name.starts_with(
"add.p") ||
227 Name.starts_with(
"and.") ||
228 Name.starts_with(
"andn.") ||
229 Name.starts_with(
"broadcast.s") ||
230 Name.starts_with(
"broadcastf32x4.") ||
231 Name.starts_with(
"broadcastf32x8.") ||
232 Name.starts_with(
"broadcastf64x2.") ||
233 Name.starts_with(
"broadcastf64x4.") ||
234 Name.starts_with(
"broadcasti32x4.") ||
235 Name.starts_with(
"broadcasti32x8.") ||
236 Name.starts_with(
"broadcasti64x2.") ||
237 Name.starts_with(
"broadcasti64x4.") ||
238 Name.starts_with(
"cmp.b") ||
239 Name.starts_with(
"cmp.d") ||
240 Name.starts_with(
"cmp.q") ||
241 Name.starts_with(
"cmp.w") ||
242 Name.starts_with(
"compress.b") ||
243 Name.starts_with(
"compress.d") ||
244 Name.starts_with(
"compress.p") ||
245 Name.starts_with(
"compress.q") ||
246 Name.starts_with(
"compress.store.") ||
247 Name.starts_with(
"compress.w") ||
248 Name.starts_with(
"conflict.") ||
249 Name.starts_with(
"cvtdq2pd.") ||
250 Name.starts_with(
"cvtdq2ps.") ||
251 Name ==
"cvtpd2dq.256" ||
252 Name ==
"cvtpd2ps.256" ||
253 Name ==
"cvtps2pd.128" ||
254 Name ==
"cvtps2pd.256" ||
255 Name.starts_with(
"cvtqq2pd.") ||
256 Name ==
"cvtqq2ps.256" ||
257 Name ==
"cvtqq2ps.512" ||
258 Name ==
"cvttpd2dq.256" ||
259 Name ==
"cvttps2dq.128" ||
260 Name ==
"cvttps2dq.256" ||
261 Name.starts_with(
"cvtudq2pd.") ||
262 Name.starts_with(
"cvtudq2ps.") ||
263 Name.starts_with(
"cvtuqq2pd.") ||
264 Name ==
"cvtuqq2ps.256" ||
265 Name ==
"cvtuqq2ps.512" ||
266 Name.starts_with(
"dbpsadbw.") ||
267 Name.starts_with(
"div.p") ||
268 Name.starts_with(
"expand.b") ||
269 Name.starts_with(
"expand.d") ||
270 Name.starts_with(
"expand.load.") ||
271 Name.starts_with(
"expand.p") ||
272 Name.starts_with(
"expand.q") ||
273 Name.starts_with(
"expand.w") ||
274 Name.starts_with(
"fpclass.p") ||
275 Name.starts_with(
"insert") ||
276 Name.starts_with(
"load.") ||
277 Name.starts_with(
"loadu.") ||
278 Name.starts_with(
"lzcnt.") ||
279 Name.starts_with(
"max.p") ||
280 Name.starts_with(
"min.p") ||
281 Name.starts_with(
"movddup") ||
282 Name.starts_with(
"move.s") ||
283 Name.starts_with(
"movshdup") ||
284 Name.starts_with(
"movsldup") ||
285 Name.starts_with(
"mul.p") ||
286 Name.starts_with(
"or.") ||
287 Name.starts_with(
"pabs.") ||
288 Name.starts_with(
"packssdw.") ||
289 Name.starts_with(
"packsswb.") ||
290 Name.starts_with(
"packusdw.") ||
291 Name.starts_with(
"packuswb.") ||
292 Name.starts_with(
"padd.") ||
293 Name.starts_with(
"padds.") ||
294 Name.starts_with(
"paddus.") ||
295 Name.starts_with(
"palignr.") ||
296 Name.starts_with(
"pand.") ||
297 Name.starts_with(
"pandn.") ||
298 Name.starts_with(
"pavg") ||
299 Name.starts_with(
"pbroadcast") ||
300 Name.starts_with(
"pcmpeq.") ||
301 Name.starts_with(
"pcmpgt.") ||
302 Name.starts_with(
"perm.df.") ||
303 Name.starts_with(
"perm.di.") ||
304 Name.starts_with(
"permvar.") ||
305 Name.starts_with(
"pmaddubs.w.") ||
306 Name.starts_with(
"pmaddw.d.") ||
307 Name.starts_with(
"pmax") ||
308 Name.starts_with(
"pmin") ||
309 Name ==
"pmov.qd.256" ||
310 Name ==
"pmov.qd.512" ||
311 Name ==
"pmov.wb.256" ||
312 Name ==
"pmov.wb.512" ||
313 Name.starts_with(
"pmovsx") ||
314 Name.starts_with(
"pmovzx") ||
315 Name.starts_with(
"pmul.dq.") ||
316 Name.starts_with(
"pmul.hr.sw.") ||
317 Name.starts_with(
"pmulh.w.") ||
318 Name.starts_with(
"pmulhu.w.") ||
319 Name.starts_with(
"pmull.") ||
320 Name.starts_with(
"pmultishift.qb.") ||
321 Name.starts_with(
"pmulu.dq.") ||
322 Name.starts_with(
"por.") ||
323 Name.starts_with(
"prol.") ||
324 Name.starts_with(
"prolv.") ||
325 Name.starts_with(
"pror.") ||
326 Name.starts_with(
"prorv.") ||
327 Name.starts_with(
"pshuf.b.") ||
328 Name.starts_with(
"pshuf.d.") ||
329 Name.starts_with(
"pshufh.w.") ||
330 Name.starts_with(
"pshufl.w.") ||
331 Name.starts_with(
"psll.d") ||
332 Name.starts_with(
"psll.q") ||
333 Name.starts_with(
"psll.w") ||
334 Name.starts_with(
"pslli") ||
335 Name.starts_with(
"psllv") ||
336 Name.starts_with(
"psra.d") ||
337 Name.starts_with(
"psra.q") ||
338 Name.starts_with(
"psra.w") ||
339 Name.starts_with(
"psrai") ||
340 Name.starts_with(
"psrav") ||
341 Name.starts_with(
"psrl.d") ||
342 Name.starts_with(
"psrl.q") ||
343 Name.starts_with(
"psrl.w") ||
344 Name.starts_with(
"psrli") ||
345 Name.starts_with(
"psrlv") ||
346 Name.starts_with(
"psub.") ||
347 Name.starts_with(
"psubs.") ||
348 Name.starts_with(
"psubus.") ||
349 Name.starts_with(
"pternlog.") ||
350 Name.starts_with(
"punpckh") ||
351 Name.starts_with(
"punpckl") ||
352 Name.starts_with(
"pxor.") ||
353 Name.starts_with(
"shuf.f") ||
354 Name.starts_with(
"shuf.i") ||
355 Name.starts_with(
"shuf.p") ||
356 Name.starts_with(
"sqrt.p") ||
357 Name.starts_with(
"store.b.") ||
358 Name.starts_with(
"store.d.") ||
359 Name.starts_with(
"store.p") ||
360 Name.starts_with(
"store.q.") ||
361 Name.starts_with(
"store.w.") ||
362 Name ==
"store.ss" ||
363 Name.starts_with(
"storeu.") ||
364 Name.starts_with(
"sub.p") ||
365 Name.starts_with(
"ucmp.") ||
366 Name.starts_with(
"unpckh.") ||
367 Name.starts_with(
"unpckl.") ||
368 Name.starts_with(
"valign.") ||
369 Name ==
"vcvtph2ps.128" ||
370 Name ==
"vcvtph2ps.256" ||
371 Name.starts_with(
"vextract") ||
372 Name.starts_with(
"vfmadd.") ||
373 Name.starts_with(
"vfmaddsub.") ||
374 Name.starts_with(
"vfnmadd.") ||
375 Name.starts_with(
"vfnmsub.") ||
376 Name.starts_with(
"vpdpbusd.") ||
377 Name.starts_with(
"vpdpbusds.") ||
378 Name.starts_with(
"vpdpwssd.") ||
379 Name.starts_with(
"vpdpwssds.") ||
380 Name.starts_with(
"vpermi2var.") ||
381 Name.starts_with(
"vpermil.p") ||
382 Name.starts_with(
"vpermilvar.") ||
383 Name.starts_with(
"vpermt2var.") ||
384 Name.starts_with(
"vpmadd52") ||
385 Name.starts_with(
"vpshld.") ||
386 Name.starts_with(
"vpshldv.") ||
387 Name.starts_with(
"vpshrd.") ||
388 Name.starts_with(
"vpshrdv.") ||
389 Name.starts_with(
"vpshufbitqmb.") ||
390 Name.starts_with(
"xor."));
392 if (Name.consume_front(
"mask3."))
394 return (Name.starts_with(
"vfmadd.") ||
395 Name.starts_with(
"vfmaddsub.") ||
396 Name.starts_with(
"vfmsub.") ||
397 Name.starts_with(
"vfmsubadd.") ||
398 Name.starts_with(
"vfnmsub."));
400 if (Name.consume_front(
"maskz."))
402 return (Name.starts_with(
"pternlog.") ||
403 Name.starts_with(
"vfmadd.") ||
404 Name.starts_with(
"vfmaddsub.") ||
405 Name.starts_with(
"vpdpbusd.") ||
406 Name.starts_with(
"vpdpbusds.") ||
407 Name.starts_with(
"vpdpwssd.") ||
408 Name.starts_with(
"vpdpwssds.") ||
409 Name.starts_with(
"vpermt2var.") ||
410 Name.starts_with(
"vpmadd52") ||
411 Name.starts_with(
"vpshldv.") ||
412 Name.starts_with(
"vpshrdv."));
415 return (Name ==
"movntdqa" ||
416 Name ==
"pmul.dq.512" ||
417 Name ==
"pmulu.dq.512" ||
418 Name.starts_with(
"broadcastm") ||
419 Name.starts_with(
"cmp.p") ||
420 Name.starts_with(
"cvtb2mask.") ||
421 Name.starts_with(
"cvtd2mask.") ||
422 Name.starts_with(
"cvtmask2") ||
423 Name.starts_with(
"cvtq2mask.") ||
424 Name ==
"cvtusi2sd" ||
425 Name.starts_with(
"cvtw2mask.") ||
430 Name ==
"kortestc.w" ||
431 Name ==
"kortestz.w" ||
432 Name.starts_with(
"kunpck") ||
435 Name.starts_with(
"padds.") ||
436 Name.starts_with(
"pbroadcast") ||
437 Name.starts_with(
"prol") ||
438 Name.starts_with(
"pror") ||
439 Name.starts_with(
"psll.dq") ||
440 Name.starts_with(
"psrl.dq") ||
441 Name.starts_with(
"psubs.") ||
442 Name.starts_with(
"ptestm") ||
443 Name.starts_with(
"ptestnm") ||
444 Name.starts_with(
"storent.") ||
445 Name.starts_with(
"vbroadcast.s") ||
446 Name.starts_with(
"vpshld.") ||
447 Name.starts_with(
"vpshrd."));
450 if (Name.consume_front(
"fma."))
451 return (Name.starts_with(
"vfmadd.") ||
452 Name.starts_with(
"vfmsub.") ||
453 Name.starts_with(
"vfmsubadd.") ||
454 Name.starts_with(
"vfnmadd.") ||
455 Name.starts_with(
"vfnmsub."));
457 if (Name.consume_front(
"fma4."))
458 return Name.starts_with(
"vfmadd.s");
460 if (Name.consume_front(
"sse."))
461 return (Name ==
"add.ss" ||
462 Name ==
"cvtsi2ss" ||
463 Name ==
"cvtsi642ss" ||
466 Name.starts_with(
"sqrt.p") ||
468 Name.starts_with(
"storeu.") ||
471 if (Name.consume_front(
"sse2."))
472 return (Name ==
"add.sd" ||
473 Name ==
"cvtdq2pd" ||
474 Name ==
"cvtdq2ps" ||
475 Name ==
"cvtps2pd" ||
476 Name ==
"cvtsi2sd" ||
477 Name ==
"cvtsi642sd" ||
478 Name ==
"cvtss2sd" ||
481 Name.starts_with(
"padds.") ||
482 Name.starts_with(
"paddus.") ||
483 Name.starts_with(
"pcmpeq.") ||
484 Name.starts_with(
"pcmpgt.") ||
489 Name ==
"pmulu.dq" ||
490 Name.starts_with(
"pshuf") ||
491 Name.starts_with(
"psll.dq") ||
492 Name.starts_with(
"psrl.dq") ||
493 Name.starts_with(
"psubs.") ||
494 Name.starts_with(
"psubus.") ||
495 Name.starts_with(
"sqrt.p") ||
497 Name ==
"storel.dq" ||
498 Name.starts_with(
"storeu.") ||
501 if (Name.consume_front(
"sse41."))
502 return (Name.starts_with(
"blendp") ||
503 Name ==
"movntdqa" ||
513 Name.starts_with(
"pmovsx") ||
514 Name.starts_with(
"pmovzx") ||
517 if (Name.consume_front(
"sse42."))
518 return Name ==
"crc32.64.8";
520 if (Name.consume_front(
"sse4a."))
521 return Name.starts_with(
"movnt.");
523 if (Name.consume_front(
"ssse3."))
524 return (Name ==
"pabs.b.128" ||
525 Name ==
"pabs.d.128" ||
526 Name ==
"pabs.w.128");
528 if (Name.consume_front(
"xop."))
529 return (Name ==
"vpcmov" ||
530 Name ==
"vpcmov.256" ||
531 Name.starts_with(
"vpcom") ||
532 Name.starts_with(
"vprot"));
534 return (Name ==
"addcarry.u32" ||
535 Name ==
"addcarry.u64" ||
536 Name ==
"addcarryx.u32" ||
537 Name ==
"addcarryx.u64" ||
538 Name ==
"subborrow.u32" ||
539 Name ==
"subborrow.u64" ||
540 Name.starts_with(
"vcvtph2ps."));
546 if (!Name.consume_front(
"x86."))
554 if (Name ==
"rdtscp") {
556 if (
F->getFunctionType()->getNumParams() == 0)
561 Intrinsic::x86_rdtscp);
568 if (Name.consume_front(
"sse41.ptest")) {
570 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
571 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
572 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
585 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
586 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
587 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
588 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
589 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
590 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
595 if (Name.consume_front(
"avx512.")) {
596 if (Name.consume_front(
"mask.cmp.")) {
599 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
600 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
601 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
602 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
603 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
604 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 }
else if (Name.starts_with(
"vpdpbusd.") ||
609 Name.starts_with(
"vpdpbusds.")) {
612 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
613 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
614 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
615 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
616 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
617 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 }
else if (Name.starts_with(
"vpdpwssd.") ||
622 Name.starts_with(
"vpdpwssds.")) {
625 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
626 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
627 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
628 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
629 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
630 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
638 if (Name.consume_front(
"avx2.")) {
639 if (Name.consume_front(
"vpdpb")) {
642 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
643 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
644 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
645 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
646 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
647 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
648 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
649 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
650 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
651 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
652 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
653 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 }
else if (Name.consume_front(
"vpdpw")) {
660 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
661 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
662 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
663 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
664 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
665 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
666 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
667 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
668 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
669 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
670 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
671 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
679 if (Name.consume_front(
"avx10.")) {
680 if (Name.consume_front(
"vpdpb")) {
683 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
684 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
685 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
686 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
687 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
688 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 }
else if (Name.consume_front(
"vpdpw")) {
694 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
695 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
696 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
697 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
698 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
699 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
707 if (Name.consume_front(
"avx512bf16.")) {
710 .
Case(
"cvtne2ps2bf16.128",
711 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
712 .
Case(
"cvtne2ps2bf16.256",
713 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
714 .
Case(
"cvtne2ps2bf16.512",
715 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
716 .
Case(
"mask.cvtneps2bf16.128",
717 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
718 .
Case(
"cvtneps2bf16.256",
719 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
720 .
Case(
"cvtneps2bf16.512",
721 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
728 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
729 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
730 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
737 if (Name.consume_front(
"xop.")) {
739 if (Name.starts_with(
"vpermil2")) {
742 auto Idx =
F->getFunctionType()->getParamType(2);
743 if (Idx->isFPOrFPVectorTy()) {
744 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
745 unsigned EltSize = Idx->getScalarSizeInBits();
746 if (EltSize == 64 && IdxSize == 128)
747 ID = Intrinsic::x86_xop_vpermil2pd;
748 else if (EltSize == 32 && IdxSize == 128)
749 ID = Intrinsic::x86_xop_vpermil2ps;
750 else if (EltSize == 64 && IdxSize == 256)
751 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
else if (
F->arg_size() == 2)
758 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
759 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
770 if (Name ==
"seh.recoverfp") {
772 Intrinsic::eh_recoverfp);
784 if (Name.starts_with(
"rbit")) {
787 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
791 if (Name ==
"thread.pointer") {
794 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
798 bool Neon = Name.consume_front(
"neon.");
803 if (Name.consume_front(
"bfdot.")) {
807 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
812 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
813 assert((OperandWidth == 64 || OperandWidth == 128) &&
814 "Unexpected operand width");
816 std::array<Type *, 2> Tys{
827 if (Name.consume_front(
"bfm")) {
829 if (Name.consume_back(
".v4f32.v16i8")) {
875 F->arg_begin()->getType());
879 if (Name.consume_front(
"vst")) {
881 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
885 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
886 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
889 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
890 Intrinsic::arm_neon_vst4lane};
892 auto fArgs =
F->getFunctionType()->params();
893 Type *Tys[] = {fArgs[0], fArgs[1]};
896 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
899 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
908 if (Name.consume_front(
"mve.")) {
910 if (Name ==
"vctp64") {
920 if (Name.starts_with(
"vrintn.v")) {
922 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
927 if (Name.consume_back(
".v4i1")) {
929 if (Name.consume_back(
".predicated.v2i64.v4i32"))
931 return Name ==
"mull.int" || Name ==
"vqdmull";
933 if (Name.consume_back(
".v2i64")) {
935 bool IsGather = Name.consume_front(
"vldr.gather.");
936 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
937 if (Name.consume_front(
"base.")) {
939 Name.consume_front(
"wb.");
942 return Name ==
"predicated.v2i64";
945 if (Name.consume_front(
"offset.predicated."))
946 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
947 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
960 if (Name.consume_front(
"cde.vcx")) {
962 if (Name.consume_back(
".predicated.v2i64.v4i1"))
964 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
965 Name ==
"3q" || Name ==
"3qa";
979 F->arg_begin()->getType());
983 if (Name.starts_with(
"addp")) {
985 if (
F->arg_size() != 2)
988 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
990 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
996 if (Name.starts_with(
"bfcvt")) {
1003 if (Name.consume_front(
"sve.")) {
1005 if (Name.consume_front(
"bf")) {
1006 if (Name.consume_back(
".lane")) {
1010 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1011 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1012 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1024 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1029 if (Name.consume_front(
"addqv")) {
1031 if (!
F->getReturnType()->isFPOrFPVectorTy())
1034 auto Args =
F->getFunctionType()->params();
1035 Type *Tys[] = {
F->getReturnType(), Args[1]};
1037 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1041 if (Name.consume_front(
"ld")) {
1043 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1044 if (LdRegex.
match(Name)) {
1051 Intrinsic::aarch64_sve_ld2_sret,
1052 Intrinsic::aarch64_sve_ld3_sret,
1053 Intrinsic::aarch64_sve_ld4_sret,
1056 LoadIDs[Name[0] -
'2'], Ty);
1062 if (Name.consume_front(
"tuple.")) {
1064 if (Name.starts_with(
"get")) {
1066 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1068 F->getParent(), Intrinsic::vector_extract, Tys);
1072 if (Name.starts_with(
"set")) {
1074 auto Args =
F->getFunctionType()->params();
1075 Type *Tys[] = {Args[0], Args[2], Args[1]};
1077 F->getParent(), Intrinsic::vector_insert, Tys);
1081 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1082 if (CreateTupleRegex.
match(Name)) {
1084 auto Args =
F->getFunctionType()->params();
1085 Type *Tys[] = {
F->getReturnType(), Args[1]};
1087 F->getParent(), Intrinsic::vector_insert, Tys);
1093 if (Name.starts_with(
"rev.nxv")) {
1096 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1108 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1112 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1114 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1116 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1117 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1118 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1119 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1120 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1121 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1130 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1144 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1145 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1155 if (Name.consume_front(
"mapa.shared.cluster"))
1156 if (
F->getReturnType()->getPointerAddressSpace() ==
1158 return Intrinsic::nvvm_mapa_shared_cluster;
1160 if (Name.consume_front(
"cp.async.bulk.")) {
1163 .
Case(
"global.to.shared.cluster",
1164 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1165 .
Case(
"shared.cta.to.cluster",
1166 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1170 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1179 if (Name.consume_front(
"fma.rn."))
1181 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1182 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1183 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1184 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1187 if (Name.consume_front(
"fmax."))
1189 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1190 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1191 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1192 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1193 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1194 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1195 .
Case(
"ftz.nan.xorsign.abs.bf16",
1196 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1197 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1198 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1199 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1200 .
Case(
"ftz.xorsign.abs.bf16x2",
1201 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1202 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1203 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1204 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1205 .
Case(
"nan.xorsign.abs.bf16x2",
1206 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1207 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1208 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1211 if (Name.consume_front(
"fmin."))
1213 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1214 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1215 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1216 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1217 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1218 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1219 .
Case(
"ftz.nan.xorsign.abs.bf16",
1220 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1221 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1222 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1223 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1224 .
Case(
"ftz.xorsign.abs.bf16x2",
1225 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1226 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1227 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1228 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1229 .
Case(
"nan.xorsign.abs.bf16x2",
1230 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1231 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1232 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1235 if (Name.consume_front(
"neg."))
1237 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1238 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1245 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1246 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1247 Name.consume_front(
"param");
1253 if (Name.starts_with(
"to.fp16")) {
1257 FuncTy->getReturnType());
1260 if (Name.starts_with(
"from.fp16")) {
1264 FuncTy->getReturnType());
1271 bool CanUpgradeDebugIntrinsicsToRecords) {
1272 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1277 if (!Name.consume_front(
"llvm.") || Name.empty())
1283 bool IsArm = Name.consume_front(
"arm.");
1284 if (IsArm || Name.consume_front(
"aarch64.")) {
1290 if (Name.consume_front(
"amdgcn.")) {
1291 if (Name ==
"alignbit") {
1294 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1298 if (Name.consume_front(
"atomic.")) {
1299 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1300 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1310 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1311 F->arg_size() == 7) {
1315 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1316 F->arg_size() == 8) {
1321 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1322 Name.consume_front(
"flat.atomic.")) {
1323 if (Name.starts_with(
"fadd") ||
1325 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1326 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1334 if (Name.starts_with(
"ldexp.")) {
1337 F->getParent(), Intrinsic::ldexp,
1338 {F->getReturnType(), F->getArg(1)->getType()});
1347 if (
F->arg_size() == 1) {
1348 if (Name.consume_front(
"convert.")) {
1362 F->arg_begin()->getType());
1367 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1370 Intrinsic::coro_end);
1377 if (Name.consume_front(
"dbg.")) {
1379 if (CanUpgradeDebugIntrinsicsToRecords) {
1380 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1381 Name ==
"declare" || Name ==
"label") {
1390 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1393 Intrinsic::dbg_value);
1400 if (Name.consume_front(
"experimental.vector.")) {
1406 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1407 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1408 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1409 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1410 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1412 Intrinsic::vector_partial_reduce_add)
1415 const auto *FT =
F->getFunctionType();
1417 if (
ID == Intrinsic::vector_extract ||
1418 ID == Intrinsic::vector_interleave2)
1421 if (
ID != Intrinsic::vector_interleave2)
1423 if (
ID == Intrinsic::vector_insert ||
1424 ID == Intrinsic::vector_partial_reduce_add)
1432 if (Name.consume_front(
"reduce.")) {
1434 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1435 if (R.match(Name, &
Groups))
1437 .
Case(
"add", Intrinsic::vector_reduce_add)
1438 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1439 .
Case(
"and", Intrinsic::vector_reduce_and)
1440 .
Case(
"or", Intrinsic::vector_reduce_or)
1441 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1442 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1443 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1444 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1445 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1446 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1447 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1452 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1457 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1458 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1463 auto Args =
F->getFunctionType()->params();
1465 {Args[V2 ? 1 : 0]});
1471 if (Name.consume_front(
"splice"))
1475 if (Name.consume_front(
"experimental.stepvector.")) {
1479 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1484 if (Name.starts_with(
"flt.rounds")) {
1487 Intrinsic::get_rounding);
1492 if (Name.starts_with(
"invariant.group.barrier")) {
1494 auto Args =
F->getFunctionType()->params();
1495 Type* ObjectPtr[1] = {Args[0]};
1498 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1503 if ((Name.starts_with(
"lifetime.start") ||
1504 Name.starts_with(
"lifetime.end")) &&
1505 F->arg_size() == 2) {
1507 ? Intrinsic::lifetime_start
1508 : Intrinsic::lifetime_end;
1511 F->getArg(0)->getType());
1520 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1521 .StartsWith(
"memmove.", Intrinsic::memmove)
1523 if (
F->arg_size() == 5) {
1527 F->getFunctionType()->params().slice(0, 3);
1533 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1536 const auto *FT =
F->getFunctionType();
1537 Type *ParamTypes[2] = {
1538 FT->getParamType(0),
1542 Intrinsic::memset, ParamTypes);
1548 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1549 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1550 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1551 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1553 if (MaskedID &&
F->arg_size() == 4) {
1555 if (MaskedID == Intrinsic::masked_load ||
1556 MaskedID == Intrinsic::masked_gather) {
1558 F->getParent(), MaskedID,
1559 {F->getReturnType(), F->getArg(0)->getType()});
1563 F->getParent(), MaskedID,
1564 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1570 if (Name.consume_front(
"nvvm.")) {
1572 if (
F->arg_size() == 1) {
1575 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1576 .Case(
"clz.i", Intrinsic::ctlz)
1577 .
Case(
"popc.i", Intrinsic::ctpop)
1581 {F->getReturnType()});
1584 }
else if (
F->arg_size() == 2) {
1587 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1588 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1589 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1590 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1594 {F->getReturnType()});
1600 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1628 bool Expand =
false;
1629 if (Name.consume_front(
"abs."))
1632 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1633 else if (Name.consume_front(
"fabs."))
1635 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1636 else if (Name.consume_front(
"ex2.approx."))
1639 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1640 else if (Name.consume_front(
"atomic.load."))
1649 else if (Name.consume_front(
"bitcast."))
1652 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1653 else if (Name.consume_front(
"rotate."))
1655 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1656 else if (Name.consume_front(
"ptr.gen.to."))
1659 else if (Name.consume_front(
"ptr."))
1662 else if (Name.consume_front(
"ldg.global."))
1664 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1665 Name.starts_with(
"p."));
1668 .
Case(
"barrier0",
true)
1669 .
Case(
"barrier.n",
true)
1670 .
Case(
"barrier.sync.cnt",
true)
1671 .
Case(
"barrier.sync",
true)
1672 .
Case(
"barrier",
true)
1673 .
Case(
"bar.sync",
true)
1674 .
Case(
"barrier0.popc",
true)
1675 .
Case(
"barrier0.and",
true)
1676 .
Case(
"barrier0.or",
true)
1677 .
Case(
"clz.ll",
true)
1678 .
Case(
"popc.ll",
true)
1680 .
Case(
"swap.lo.hi.b64",
true)
1681 .
Case(
"tanh.approx.f32",
true)
1693 if (Name.starts_with(
"objectsize.")) {
1694 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1695 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1698 Intrinsic::objectsize, Tys);
1705 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1708 F->getParent(), Intrinsic::ptr_annotation,
1709 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1715 if (Name.consume_front(
"riscv.")) {
1718 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1719 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1720 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1721 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1724 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1737 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1738 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1747 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1748 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1749 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1750 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1755 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1764 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1766 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1775 if (Name ==
"stackprotectorcheck") {
1782 if (Name ==
"thread.pointer") {
1784 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1790 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1793 F->getParent(), Intrinsic::var_annotation,
1794 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1797 if (Name.consume_front(
"vector.splice")) {
1798 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1806 if (Name.consume_front(
"wasm.")) {
1809 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1810 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1811 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1816 F->getReturnType());
1820 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1822 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1824 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1843 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1852 auto *FT =
F->getFunctionType();
1855 std::string
Name =
F->getName().str();
1858 Name,
F->getParent());
1869 if (Result != std::nullopt) {
1882 bool CanUpgradeDebugIntrinsicsToRecords) {
1902 GV->
getName() ==
"llvm.global_dtors")) ||
1917 unsigned N =
Init->getNumOperands();
1918 std::vector<Constant *> NewCtors(
N);
1919 for (
unsigned i = 0; i !=
N; ++i) {
1922 Ctor->getAggregateElement(1),
1936 unsigned NumElts = ResultTy->getNumElements() * 8;
1940 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1950 for (
unsigned l = 0; l != NumElts; l += 16)
1951 for (
unsigned i = 0; i != 16; ++i) {
1952 unsigned Idx = NumElts + i - Shift;
1954 Idx -= NumElts - 16;
1955 Idxs[l + i] = Idx + l;
1958 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1962 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1970 unsigned NumElts = ResultTy->getNumElements() * 8;
1974 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1984 for (
unsigned l = 0; l != NumElts; l += 16)
1985 for (
unsigned i = 0; i != 16; ++i) {
1986 unsigned Idx = i + Shift;
1988 Idx += NumElts - 16;
1989 Idxs[l + i] = Idx + l;
1992 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1996 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2004 Mask = Builder.CreateBitCast(Mask, MaskTy);
2010 for (
unsigned i = 0; i != NumElts; ++i)
2012 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2023 if (
C->isAllOnesValue())
2028 return Builder.CreateSelect(Mask, Op0, Op1);
2035 if (
C->isAllOnesValue())
2039 Mask->getType()->getIntegerBitWidth());
2040 Mask = Builder.CreateBitCast(Mask, MaskTy);
2041 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2042 return Builder.CreateSelect(Mask, Op0, Op1);
2055 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2056 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2061 ShiftVal &= (NumElts - 1);
2070 if (ShiftVal > 16) {
2078 for (
unsigned l = 0; l < NumElts; l += 16) {
2079 for (
unsigned i = 0; i != 16; ++i) {
2080 unsigned Idx = ShiftVal + i;
2081 if (!IsVALIGN && Idx >= 16)
2082 Idx += NumElts - 16;
2083 Indices[l + i] = Idx + l;
2088 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2094 bool ZeroMask,
bool IndexForm) {
2097 unsigned EltWidth = Ty->getScalarSizeInBits();
2098 bool IsFloat = Ty->isFPOrFPVectorTy();
2100 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2101 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2102 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2103 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2104 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2105 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2106 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2107 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2108 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2109 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2110 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2111 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2112 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2113 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2114 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2115 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2116 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2117 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2118 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2119 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2120 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2121 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2122 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2123 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2124 else if (VecWidth == 128 && EltWidth == 16)
2125 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2126 else if (VecWidth == 256 && EltWidth == 16)
2127 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2128 else if (VecWidth == 512 && EltWidth == 16)
2129 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2130 else if (VecWidth == 128 && EltWidth == 8)
2131 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2132 else if (VecWidth == 256 && EltWidth == 8)
2133 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2134 else if (VecWidth == 512 && EltWidth == 8)
2135 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2146 Value *V = Builder.CreateIntrinsic(IID, Args);
2158 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2169 bool IsRotateRight) {
2179 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2180 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2183 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2184 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2229 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2234 bool IsShiftRight,
bool ZeroMask) {
2248 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2249 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2252 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2253 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2268 const Align Alignment =
2270 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2275 if (
C->isAllOnesValue())
2276 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2281 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2287 const Align Alignment =
2296 if (
C->isAllOnesValue())
2297 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2302 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2308 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2309 {Op0, Builder.getInt1(
false)});
2324 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2325 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2326 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2327 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2328 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2331 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2332 LHS = Builder.CreateAnd(
LHS, Mask);
2333 RHS = Builder.CreateAnd(
RHS, Mask);
2350 if (!
C || !
C->isAllOnesValue())
2351 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2356 for (
unsigned i = 0; i != NumElts; ++i)
2358 for (
unsigned i = NumElts; i != 8; ++i)
2359 Indices[i] = NumElts + i % NumElts;
2360 Vec = Builder.CreateShuffleVector(Vec,
2364 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2368 unsigned CC,
bool Signed) {
2376 }
else if (CC == 7) {
2412 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2413 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2415 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2416 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2425 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2431 Name = Name.substr(12);
2436 if (Name.starts_with(
"max.p")) {
2437 if (VecWidth == 128 && EltWidth == 32)
2438 IID = Intrinsic::x86_sse_max_ps;
2439 else if (VecWidth == 128 && EltWidth == 64)
2440 IID = Intrinsic::x86_sse2_max_pd;
2441 else if (VecWidth == 256 && EltWidth == 32)
2442 IID = Intrinsic::x86_avx_max_ps_256;
2443 else if (VecWidth == 256 && EltWidth == 64)
2444 IID = Intrinsic::x86_avx_max_pd_256;
2447 }
else if (Name.starts_with(
"min.p")) {
2448 if (VecWidth == 128 && EltWidth == 32)
2449 IID = Intrinsic::x86_sse_min_ps;
2450 else if (VecWidth == 128 && EltWidth == 64)
2451 IID = Intrinsic::x86_sse2_min_pd;
2452 else if (VecWidth == 256 && EltWidth == 32)
2453 IID = Intrinsic::x86_avx_min_ps_256;
2454 else if (VecWidth == 256 && EltWidth == 64)
2455 IID = Intrinsic::x86_avx_min_pd_256;
2458 }
else if (Name.starts_with(
"pshuf.b.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pshuf_b;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pshuf_b_512;
2467 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2476 }
else if (Name.starts_with(
"pmulh.w.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_pmulh_w;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_pmulh_w;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_pmulh_w_512;
2485 }
else if (Name.starts_with(
"pmulhu.w.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_pmulhu_w;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_pmulhu_w;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2494 }
else if (Name.starts_with(
"pmaddw.d.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_sse2_pmadd_wd;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_pmadd_wd;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2503 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2504 if (VecWidth == 128)
2505 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2506 else if (VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2508 else if (VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2512 }
else if (Name.starts_with(
"packsswb.")) {
2513 if (VecWidth == 128)
2514 IID = Intrinsic::x86_sse2_packsswb_128;
2515 else if (VecWidth == 256)
2516 IID = Intrinsic::x86_avx2_packsswb;
2517 else if (VecWidth == 512)
2518 IID = Intrinsic::x86_avx512_packsswb_512;
2521 }
else if (Name.starts_with(
"packssdw.")) {
2522 if (VecWidth == 128)
2523 IID = Intrinsic::x86_sse2_packssdw_128;
2524 else if (VecWidth == 256)
2525 IID = Intrinsic::x86_avx2_packssdw;
2526 else if (VecWidth == 512)
2527 IID = Intrinsic::x86_avx512_packssdw_512;
2530 }
else if (Name.starts_with(
"packuswb.")) {
2531 if (VecWidth == 128)
2532 IID = Intrinsic::x86_sse2_packuswb_128;
2533 else if (VecWidth == 256)
2534 IID = Intrinsic::x86_avx2_packuswb;
2535 else if (VecWidth == 512)
2536 IID = Intrinsic::x86_avx512_packuswb_512;
2539 }
else if (Name.starts_with(
"packusdw.")) {
2540 if (VecWidth == 128)
2541 IID = Intrinsic::x86_sse41_packusdw;
2542 else if (VecWidth == 256)
2543 IID = Intrinsic::x86_avx2_packusdw;
2544 else if (VecWidth == 512)
2545 IID = Intrinsic::x86_avx512_packusdw_512;
2548 }
else if (Name.starts_with(
"vpermilvar.")) {
2549 if (VecWidth == 128 && EltWidth == 32)
2550 IID = Intrinsic::x86_avx_vpermilvar_ps;
2551 else if (VecWidth == 128 && EltWidth == 64)
2552 IID = Intrinsic::x86_avx_vpermilvar_pd;
2553 else if (VecWidth == 256 && EltWidth == 32)
2554 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2555 else if (VecWidth == 256 && EltWidth == 64)
2556 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2557 else if (VecWidth == 512 && EltWidth == 32)
2558 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2559 else if (VecWidth == 512 && EltWidth == 64)
2560 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2563 }
else if (Name ==
"cvtpd2dq.256") {
2564 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2565 }
else if (Name ==
"cvtpd2ps.256") {
2566 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2567 }
else if (Name ==
"cvttpd2dq.256") {
2568 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2569 }
else if (Name ==
"cvttps2dq.128") {
2570 IID = Intrinsic::x86_sse2_cvttps2dq;
2571 }
else if (Name ==
"cvttps2dq.256") {
2572 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2573 }
else if (Name.starts_with(
"permvar.")) {
2575 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2576 IID = Intrinsic::x86_avx2_permps;
2577 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2578 IID = Intrinsic::x86_avx2_permd;
2579 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2580 IID = Intrinsic::x86_avx512_permvar_df_256;
2581 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2582 IID = Intrinsic::x86_avx512_permvar_di_256;
2583 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2584 IID = Intrinsic::x86_avx512_permvar_sf_512;
2585 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2586 IID = Intrinsic::x86_avx512_permvar_si_512;
2587 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2588 IID = Intrinsic::x86_avx512_permvar_df_512;
2589 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2590 IID = Intrinsic::x86_avx512_permvar_di_512;
2591 else if (VecWidth == 128 && EltWidth == 16)
2592 IID = Intrinsic::x86_avx512_permvar_hi_128;
2593 else if (VecWidth == 256 && EltWidth == 16)
2594 IID = Intrinsic::x86_avx512_permvar_hi_256;
2595 else if (VecWidth == 512 && EltWidth == 16)
2596 IID = Intrinsic::x86_avx512_permvar_hi_512;
2597 else if (VecWidth == 128 && EltWidth == 8)
2598 IID = Intrinsic::x86_avx512_permvar_qi_128;
2599 else if (VecWidth == 256 && EltWidth == 8)
2600 IID = Intrinsic::x86_avx512_permvar_qi_256;
2601 else if (VecWidth == 512 && EltWidth == 8)
2602 IID = Intrinsic::x86_avx512_permvar_qi_512;
2605 }
else if (Name.starts_with(
"dbpsadbw.")) {
2606 if (VecWidth == 128)
2607 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2608 else if (VecWidth == 256)
2609 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2610 else if (VecWidth == 512)
2611 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2614 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2615 if (VecWidth == 128)
2616 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2617 else if (VecWidth == 256)
2618 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2619 else if (VecWidth == 512)
2620 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2623 }
else if (Name.starts_with(
"conflict.")) {
2624 if (Name[9] ==
'd' && VecWidth == 128)
2625 IID = Intrinsic::x86_avx512_conflict_d_128;
2626 else if (Name[9] ==
'd' && VecWidth == 256)
2627 IID = Intrinsic::x86_avx512_conflict_d_256;
2628 else if (Name[9] ==
'd' && VecWidth == 512)
2629 IID = Intrinsic::x86_avx512_conflict_d_512;
2630 else if (Name[9] ==
'q' && VecWidth == 128)
2631 IID = Intrinsic::x86_avx512_conflict_q_128;
2632 else if (Name[9] ==
'q' && VecWidth == 256)
2633 IID = Intrinsic::x86_avx512_conflict_q_256;
2634 else if (Name[9] ==
'q' && VecWidth == 512)
2635 IID = Intrinsic::x86_avx512_conflict_q_512;
2638 }
else if (Name.starts_with(
"pavg.")) {
2639 if (Name[5] ==
'b' && VecWidth == 128)
2640 IID = Intrinsic::x86_sse2_pavg_b;
2641 else if (Name[5] ==
'b' && VecWidth == 256)
2642 IID = Intrinsic::x86_avx2_pavg_b;
2643 else if (Name[5] ==
'b' && VecWidth == 512)
2644 IID = Intrinsic::x86_avx512_pavg_b_512;
2645 else if (Name[5] ==
'w' && VecWidth == 128)
2646 IID = Intrinsic::x86_sse2_pavg_w;
2647 else if (Name[5] ==
'w' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx2_pavg_w;
2649 else if (Name[5] ==
'w' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_pavg_w_512;
2659 Rep = Builder.CreateIntrinsic(IID, Args);
2670 if (AsmStr->find(
"mov\tfp") == 0 &&
2671 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2672 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2673 AsmStr->replace(Pos, 1,
";");
2679 Value *Rep =
nullptr;
2681 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2683 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2684 Value *Cmp = Builder.CreateICmpSGE(
2686 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2687 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2688 Type *Ty = (Name ==
"abs.bf16")
2692 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2693 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2694 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2695 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2696 : Intrinsic::nvvm_fabs;
2697 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2698 }
else if (Name.consume_front(
"ex2.approx.")) {
2700 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2701 : Intrinsic::nvvm_ex2_approx;
2702 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2703 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2704 Name.starts_with(
"atomic.load.add.f64.p")) {
2709 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2710 Name.starts_with(
"atomic.load.dec.32.p")) {
2715 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2717 }
else if (Name ==
"clz.ll") {
2720 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2721 {Arg, Builder.getFalse()},
2723 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2724 }
else if (Name ==
"popc.ll") {
2728 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2729 Arg,
nullptr,
"ctpop");
2730 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2731 }
else if (Name ==
"h2f") {
2733 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2734 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2735 }
else if (Name.consume_front(
"bitcast.") &&
2736 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2739 }
else if (Name ==
"rotate.b32") {
2742 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2743 {Arg, Arg, ShiftAmt});
2744 }
else if (Name ==
"rotate.b64") {
2748 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2749 {Arg, Arg, ZExtShiftAmt});
2750 }
else if (Name ==
"rotate.right.b64") {
2754 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2755 {Arg, Arg, ZExtShiftAmt});
2756 }
else if (Name ==
"swap.lo.hi.b64") {
2759 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2760 {Arg, Arg, Builder.getInt64(32)});
2761 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2764 Name.starts_with(
".to.gen"))) {
2766 }
else if (Name.consume_front(
"ldg.global")) {
2770 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2773 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2775 }
else if (Name ==
"tanh.approx.f32") {
2779 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2781 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2783 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2784 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2786 }
else if (Name ==
"barrier") {
2787 Rep = Builder.CreateIntrinsic(
2788 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2790 }
else if (Name ==
"barrier.sync") {
2791 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2793 }
else if (Name ==
"barrier.sync.cnt") {
2794 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2796 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2797 Name ==
"barrier0.or") {
2799 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2803 .
Case(
"barrier0.popc",
2804 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2805 .
Case(
"barrier0.and",
2806 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2807 .
Case(
"barrier0.or",
2808 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2809 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2810 Rep = Builder.CreateZExt(Bar, CI->
getType());
2814 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2824 ? Builder.CreateBitCast(Arg, NewType)
2827 Rep = Builder.CreateCall(NewFn, Args);
2828 if (
F->getReturnType()->isIntegerTy())
2829 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2839 Value *Rep =
nullptr;
2841 if (Name.starts_with(
"sse4a.movnt.")) {
2853 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2856 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2857 }
else if (Name.starts_with(
"avx.movnt.") ||
2858 Name.starts_with(
"avx512.storent.")) {
2870 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2871 }
else if (Name ==
"sse2.storel.dq") {
2876 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2877 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2878 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2879 }
else if (Name.starts_with(
"sse.storeu.") ||
2880 Name.starts_with(
"sse2.storeu.") ||
2881 Name.starts_with(
"avx.storeu.")) {
2884 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2885 }
else if (Name ==
"avx512.mask.store.ss") {
2889 }
else if (Name.starts_with(
"avx512.mask.store")) {
2891 bool Aligned = Name[17] !=
'u';
2894 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2897 bool CmpEq = Name[9] ==
'e';
2900 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2901 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2908 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2909 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2911 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2912 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2913 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2914 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2915 Name.starts_with(
"sse2.sqrt.p") ||
2916 Name.starts_with(
"sse.sqrt.p")) {
2917 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2918 {CI->getArgOperand(0)});
2919 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2923 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2924 : Intrinsic::x86_avx512_sqrt_pd_512;
2927 Rep = Builder.CreateIntrinsic(IID, Args);
2929 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2930 {CI->getArgOperand(0)});
2934 }
else if (Name.starts_with(
"avx512.ptestm") ||
2935 Name.starts_with(
"avx512.ptestnm")) {
2939 Rep = Builder.CreateAnd(Op0, Op1);
2945 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2947 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2950 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2953 }
else if (Name.starts_with(
"avx512.kunpck")) {
2958 for (
unsigned i = 0; i != NumElts; ++i)
2967 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2968 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2969 }
else if (Name ==
"avx512.kand.w") {
2972 Rep = Builder.CreateAnd(
LHS,
RHS);
2973 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2974 }
else if (Name ==
"avx512.kandn.w") {
2977 LHS = Builder.CreateNot(
LHS);
2978 Rep = Builder.CreateAnd(
LHS,
RHS);
2979 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2980 }
else if (Name ==
"avx512.kor.w") {
2983 Rep = Builder.CreateOr(
LHS,
RHS);
2984 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2985 }
else if (Name ==
"avx512.kxor.w") {
2988 Rep = Builder.CreateXor(
LHS,
RHS);
2989 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2990 }
else if (Name ==
"avx512.kxnor.w") {
2993 LHS = Builder.CreateNot(
LHS);
2994 Rep = Builder.CreateXor(
LHS,
RHS);
2995 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2996 }
else if (Name ==
"avx512.knot.w") {
2998 Rep = Builder.CreateNot(Rep);
2999 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3000 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3003 Rep = Builder.CreateOr(
LHS,
RHS);
3004 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3006 if (Name[14] ==
'c')
3010 Rep = Builder.CreateICmpEQ(Rep,
C);
3011 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3012 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3013 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3014 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3015 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3018 ConstantInt::get(I32Ty, 0));
3020 ConstantInt::get(I32Ty, 0));
3022 if (Name.contains(
".add."))
3023 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3024 else if (Name.contains(
".sub."))
3025 EltOp = Builder.CreateFSub(Elt0, Elt1);
3026 else if (Name.contains(
".mul."))
3027 EltOp = Builder.CreateFMul(Elt0, Elt1);
3029 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3030 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3031 ConstantInt::get(I32Ty, 0));
3032 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3034 bool CmpEq = Name[16] ==
'e';
3036 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3045 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3048 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3051 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3058 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3063 if (VecWidth == 128 && EltWidth == 32)
3064 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3065 else if (VecWidth == 256 && EltWidth == 32)
3066 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3067 else if (VecWidth == 512 && EltWidth == 32)
3068 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3069 else if (VecWidth == 128 && EltWidth == 64)
3070 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3071 else if (VecWidth == 256 && EltWidth == 64)
3072 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3073 else if (VecWidth == 512 && EltWidth == 64)
3074 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3081 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3083 Type *OpTy = Args[0]->getType();
3087 if (VecWidth == 128 && EltWidth == 32)
3088 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3089 else if (VecWidth == 256 && EltWidth == 32)
3090 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3091 else if (VecWidth == 512 && EltWidth == 32)
3092 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3093 else if (VecWidth == 128 && EltWidth == 64)
3094 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3095 else if (VecWidth == 256 && EltWidth == 64)
3096 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3097 else if (VecWidth == 512 && EltWidth == 64)
3098 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3103 if (VecWidth == 512)
3105 Args.push_back(Mask);
3107 Rep = Builder.CreateIntrinsic(IID, Args);
3108 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3112 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3115 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3116 Name.starts_with(
"avx512.cvtw2mask.") ||
3117 Name.starts_with(
"avx512.cvtd2mask.") ||
3118 Name.starts_with(
"avx512.cvtq2mask.")) {
3123 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3124 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3125 Name.starts_with(
"avx512.mask.pabs")) {
3127 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3128 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3129 Name.starts_with(
"avx512.mask.pmaxs")) {
3131 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3132 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3133 Name.starts_with(
"avx512.mask.pmaxu")) {
3135 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3136 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3137 Name.starts_with(
"avx512.mask.pmins")) {
3139 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3140 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3141 Name.starts_with(
"avx512.mask.pminu")) {
3143 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3144 Name ==
"avx512.pmulu.dq.512" ||
3145 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3147 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3148 Name ==
"avx512.pmul.dq.512" ||
3149 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3151 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3152 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3157 }
else if (Name ==
"avx512.cvtusi2sd") {
3162 }
else if (Name ==
"sse2.cvtss2sd") {
3164 Rep = Builder.CreateFPExt(
3167 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3168 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3169 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3170 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3171 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3172 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3173 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3174 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3175 Name ==
"avx512.mask.cvtqq2ps.256" ||
3176 Name ==
"avx512.mask.cvtqq2ps.512" ||
3177 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3178 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3179 Name ==
"avx.cvt.ps2.pd.256" ||
3180 Name ==
"avx512.mask.cvtps2pd.128" ||
3181 Name ==
"avx512.mask.cvtps2pd.256") {
3186 unsigned NumDstElts = DstTy->getNumElements();
3188 assert(NumDstElts == 2 &&
"Unexpected vector size");
3189 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3192 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3193 bool IsUnsigned = Name.contains(
"cvtu");
3195 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3199 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3200 : Intrinsic::x86_avx512_sitofp_round;
3201 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3204 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3205 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3211 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3212 Name.starts_with(
"vcvtph2ps.")) {
3216 unsigned NumDstElts = DstTy->getNumElements();
3217 if (NumDstElts != SrcTy->getNumElements()) {
3218 assert(NumDstElts == 4 &&
"Unexpected vector size");
3219 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3221 Rep = Builder.CreateBitCast(
3223 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3227 }
else if (Name.starts_with(
"avx512.mask.load")) {
3229 bool Aligned = Name[16] !=
'u';
3232 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3235 ResultTy->getNumElements());
3237 Rep = Builder.CreateIntrinsic(
3238 Intrinsic::masked_expandload, ResultTy,
3240 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3246 Rep = Builder.CreateIntrinsic(
3247 Intrinsic::masked_compressstore, ResultTy,
3249 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3250 Name.starts_with(
"avx512.mask.expand.")) {
3254 ResultTy->getNumElements());
3256 bool IsCompress = Name[12] ==
'c';
3257 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3258 : Intrinsic::x86_avx512_mask_expand;
3259 Rep = Builder.CreateIntrinsic(
3261 }
else if (Name.starts_with(
"xop.vpcom")) {
3263 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3264 Name.ends_with(
"uq"))
3266 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3267 Name.ends_with(
"d") || Name.ends_with(
"q"))
3276 Name = Name.substr(9);
3277 if (Name.starts_with(
"lt"))
3279 else if (Name.starts_with(
"le"))
3281 else if (Name.starts_with(
"gt"))
3283 else if (Name.starts_with(
"ge"))
3285 else if (Name.starts_with(
"eq"))
3287 else if (Name.starts_with(
"ne"))
3289 else if (Name.starts_with(
"false"))
3291 else if (Name.starts_with(
"true"))
3298 }
else if (Name.starts_with(
"xop.vpcmov")) {
3300 Value *NotSel = Builder.CreateNot(Sel);
3303 Rep = Builder.CreateOr(Sel0, Sel1);
3304 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3305 Name.starts_with(
"avx512.mask.prol")) {
3307 }
else if (Name.starts_with(
"avx512.pror") ||
3308 Name.starts_with(
"avx512.mask.pror")) {
3310 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3311 Name.starts_with(
"avx512.mask.vpshld") ||
3312 Name.starts_with(
"avx512.maskz.vpshld")) {
3313 bool ZeroMask = Name[11] ==
'z';
3315 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3316 Name.starts_with(
"avx512.mask.vpshrd") ||
3317 Name.starts_with(
"avx512.maskz.vpshrd")) {
3318 bool ZeroMask = Name[11] ==
'z';
3320 }
else if (Name ==
"sse42.crc32.64.8") {
3323 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3325 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3326 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3327 Name.starts_with(
"avx512.vbroadcast.s")) {
3330 Type *EltTy = VecTy->getElementType();
3331 unsigned EltNum = VecTy->getNumElements();
3335 for (
unsigned I = 0;
I < EltNum; ++
I)
3336 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3337 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3338 Name.starts_with(
"sse41.pmovzx") ||
3339 Name.starts_with(
"avx2.pmovsx") ||
3340 Name.starts_with(
"avx2.pmovzx") ||
3341 Name.starts_with(
"avx512.mask.pmovsx") ||
3342 Name.starts_with(
"avx512.mask.pmovzx")) {
3344 unsigned NumDstElts = DstTy->getNumElements();
3348 for (
unsigned i = 0; i != NumDstElts; ++i)
3353 bool DoSext = Name.contains(
"pmovsx");
3355 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3360 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3361 Name ==
"avx512.mask.pmov.qd.512" ||
3362 Name ==
"avx512.mask.pmov.wb.256" ||
3363 Name ==
"avx512.mask.pmov.wb.512") {
3368 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3369 Name ==
"avx2.vbroadcasti128") {
3375 if (NumSrcElts == 2)
3376 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3378 Rep = Builder.CreateShuffleVector(Load,
3380 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3381 Name.starts_with(
"avx512.mask.shuf.f")) {
3386 unsigned ControlBitsMask = NumLanes - 1;
3387 unsigned NumControlBits = NumLanes / 2;
3390 for (
unsigned l = 0; l != NumLanes; ++l) {
3391 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3393 if (l >= NumLanes / 2)
3394 LaneMask += NumLanes;
3395 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3396 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3402 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3403 Name.starts_with(
"avx512.mask.broadcasti")) {
3406 unsigned NumDstElts =
3410 for (
unsigned i = 0; i != NumDstElts; ++i)
3411 ShuffleMask[i] = i % NumSrcElts;
3417 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3418 Name.starts_with(
"avx2.vbroadcast") ||
3419 Name.starts_with(
"avx512.pbroadcast") ||
3420 Name.starts_with(
"avx512.mask.broadcast.s")) {
3427 Rep = Builder.CreateShuffleVector(
Op, M);
3432 }
else if (Name.starts_with(
"sse2.padds.") ||
3433 Name.starts_with(
"avx2.padds.") ||
3434 Name.starts_with(
"avx512.padds.") ||
3435 Name.starts_with(
"avx512.mask.padds.")) {
3437 }
else if (Name.starts_with(
"sse2.psubs.") ||
3438 Name.starts_with(
"avx2.psubs.") ||
3439 Name.starts_with(
"avx512.psubs.") ||
3440 Name.starts_with(
"avx512.mask.psubs.")) {
3442 }
else if (Name.starts_with(
"sse2.paddus.") ||
3443 Name.starts_with(
"avx2.paddus.") ||
3444 Name.starts_with(
"avx512.mask.paddus.")) {
3446 }
else if (Name.starts_with(
"sse2.psubus.") ||
3447 Name.starts_with(
"avx2.psubus.") ||
3448 Name.starts_with(
"avx512.mask.psubus.")) {
3450 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3455 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3459 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3464 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3469 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3470 Name ==
"avx512.psll.dq.512") {
3474 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3475 Name ==
"avx512.psrl.dq.512") {
3479 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3480 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3481 Name.starts_with(
"avx2.pblendd.")) {
3486 unsigned NumElts = VecTy->getNumElements();
3489 for (
unsigned i = 0; i != NumElts; ++i)
3490 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3492 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3493 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3494 Name ==
"avx2.vinserti128" ||
3495 Name.starts_with(
"avx512.mask.insert")) {
3499 unsigned DstNumElts =
3501 unsigned SrcNumElts =
3503 unsigned Scale = DstNumElts / SrcNumElts;
3510 for (
unsigned i = 0; i != SrcNumElts; ++i)
3512 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3513 Idxs[i] = SrcNumElts;
3514 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3528 for (
unsigned i = 0; i != DstNumElts; ++i)
3531 for (
unsigned i = 0; i != SrcNumElts; ++i)
3532 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3533 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3539 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3540 Name ==
"avx2.vextracti128" ||
3541 Name.starts_with(
"avx512.mask.vextract")) {
3544 unsigned DstNumElts =
3546 unsigned SrcNumElts =
3548 unsigned Scale = SrcNumElts / DstNumElts;
3555 for (
unsigned i = 0; i != DstNumElts; ++i) {
3556 Idxs[i] = i + (Imm * DstNumElts);
3558 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3564 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3565 Name.starts_with(
"avx512.mask.perm.di.")) {
3569 unsigned NumElts = VecTy->getNumElements();
3572 for (
unsigned i = 0; i != NumElts; ++i)
3573 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3575 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3580 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3592 unsigned HalfSize = NumElts / 2;
3604 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3605 for (
unsigned i = 0; i < HalfSize; ++i)
3606 ShuffleMask[i] = StartIndex + i;
3609 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3610 for (
unsigned i = 0; i < HalfSize; ++i)
3611 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3613 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3615 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3616 Name.starts_with(
"avx512.mask.vpermil.p") ||
3617 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3621 unsigned NumElts = VecTy->getNumElements();
3623 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3624 unsigned IdxMask = ((1 << IdxSize) - 1);
3630 for (
unsigned i = 0; i != NumElts; ++i)
3631 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3633 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3638 }
else if (Name ==
"sse2.pshufl.w" ||
3639 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3644 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3648 for (
unsigned l = 0; l != NumElts; l += 8) {
3649 for (
unsigned i = 0; i != 4; ++i)
3650 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3651 for (
unsigned i = 4; i != 8; ++i)
3652 Idxs[i + l] = i + l;
3655 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3660 }
else if (Name ==
"sse2.pshufh.w" ||
3661 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3666 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3670 for (
unsigned l = 0; l != NumElts; l += 8) {
3671 for (
unsigned i = 0; i != 4; ++i)
3672 Idxs[i + l] = i + l;
3673 for (
unsigned i = 0; i != 4; ++i)
3674 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3677 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3682 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3689 unsigned HalfLaneElts = NumLaneElts / 2;
3692 for (
unsigned i = 0; i != NumElts; ++i) {
3694 Idxs[i] = i - (i % NumLaneElts);
3696 if ((i % NumLaneElts) >= HalfLaneElts)
3700 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3703 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3707 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3708 Name.starts_with(
"avx512.mask.movshdup") ||
3709 Name.starts_with(
"avx512.mask.movsldup")) {
3715 if (Name.starts_with(
"avx512.mask.movshdup."))
3719 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3720 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3721 Idxs[i + l + 0] = i + l +
Offset;
3722 Idxs[i + l + 1] = i + l +
Offset;
3725 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3729 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3730 Name.starts_with(
"avx512.mask.unpckl.")) {
3737 for (
int l = 0; l != NumElts; l += NumLaneElts)
3738 for (
int i = 0; i != NumLaneElts; ++i)
3739 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3741 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3745 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3746 Name.starts_with(
"avx512.mask.unpckh.")) {
3753 for (
int l = 0; l != NumElts; l += NumLaneElts)
3754 for (
int i = 0; i != NumLaneElts; ++i)
3755 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3757 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3761 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3762 Name.starts_with(
"avx512.mask.pand.")) {
3765 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3767 Rep = Builder.CreateBitCast(Rep, FTy);
3770 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3771 Name.starts_with(
"avx512.mask.pandn.")) {
3774 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3775 Rep = Builder.CreateAnd(Rep,
3777 Rep = Builder.CreateBitCast(Rep, FTy);
3780 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3781 Name.starts_with(
"avx512.mask.por.")) {
3784 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3786 Rep = Builder.CreateBitCast(Rep, FTy);
3789 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3790 Name.starts_with(
"avx512.mask.pxor.")) {
3793 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3795 Rep = Builder.CreateBitCast(Rep, FTy);
3798 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3802 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3806 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3810 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3811 if (Name.ends_with(
".512")) {
3813 if (Name[17] ==
's')
3814 IID = Intrinsic::x86_avx512_add_ps_512;
3816 IID = Intrinsic::x86_avx512_add_pd_512;
3818 Rep = Builder.CreateIntrinsic(
3826 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3827 if (Name.ends_with(
".512")) {
3829 if (Name[17] ==
's')
3830 IID = Intrinsic::x86_avx512_div_ps_512;
3832 IID = Intrinsic::x86_avx512_div_pd_512;
3834 Rep = Builder.CreateIntrinsic(
3842 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3843 if (Name.ends_with(
".512")) {
3845 if (Name[17] ==
's')
3846 IID = Intrinsic::x86_avx512_mul_ps_512;
3848 IID = Intrinsic::x86_avx512_mul_pd_512;
3850 Rep = Builder.CreateIntrinsic(
3858 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3859 if (Name.ends_with(
".512")) {
3861 if (Name[17] ==
's')
3862 IID = Intrinsic::x86_avx512_sub_ps_512;
3864 IID = Intrinsic::x86_avx512_sub_pd_512;
3866 Rep = Builder.CreateIntrinsic(
3874 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3875 Name.starts_with(
"avx512.mask.min.p")) &&
3876 Name.drop_front(18) ==
".512") {
3877 bool IsDouble = Name[17] ==
'd';
3878 bool IsMin = Name[13] ==
'i';
3880 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3881 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3884 Rep = Builder.CreateIntrinsic(
3889 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3891 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3892 {CI->getArgOperand(0), Builder.getInt1(false)});
3895 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3896 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3897 bool IsVariable = Name[16] ==
'v';
3898 char Size = Name[16] ==
'.' ? Name[17]
3899 : Name[17] ==
'.' ? Name[18]
3900 : Name[18] ==
'.' ? Name[19]
3904 if (IsVariable && Name[17] !=
'.') {
3905 if (
Size ==
'd' && Name[17] ==
'2')
3906 IID = Intrinsic::x86_avx2_psllv_q;
3907 else if (
Size ==
'd' && Name[17] ==
'4')
3908 IID = Intrinsic::x86_avx2_psllv_q_256;
3909 else if (
Size ==
's' && Name[17] ==
'4')
3910 IID = Intrinsic::x86_avx2_psllv_d;
3911 else if (
Size ==
's' && Name[17] ==
'8')
3912 IID = Intrinsic::x86_avx2_psllv_d_256;
3913 else if (
Size ==
'h' && Name[17] ==
'8')
3914 IID = Intrinsic::x86_avx512_psllv_w_128;
3915 else if (
Size ==
'h' && Name[17] ==
'1')
3916 IID = Intrinsic::x86_avx512_psllv_w_256;
3917 else if (Name[17] ==
'3' && Name[18] ==
'2')
3918 IID = Intrinsic::x86_avx512_psllv_w_512;
3921 }
else if (Name.ends_with(
".128")) {
3923 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3924 : Intrinsic::x86_sse2_psll_d;
3925 else if (
Size ==
'q')
3926 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3927 : Intrinsic::x86_sse2_psll_q;
3928 else if (
Size ==
'w')
3929 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3930 : Intrinsic::x86_sse2_psll_w;
3933 }
else if (Name.ends_with(
".256")) {
3935 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3936 : Intrinsic::x86_avx2_psll_d;
3937 else if (
Size ==
'q')
3938 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3939 : Intrinsic::x86_avx2_psll_q;
3940 else if (
Size ==
'w')
3941 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3942 : Intrinsic::x86_avx2_psll_w;
3947 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3948 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3949 : Intrinsic::x86_avx512_psll_d_512;
3950 else if (
Size ==
'q')
3951 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3952 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3953 : Intrinsic::x86_avx512_psll_q_512;
3954 else if (
Size ==
'w')
3955 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3956 : Intrinsic::x86_avx512_psll_w_512;
3962 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3963 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3964 bool IsVariable = Name[16] ==
'v';
3965 char Size = Name[16] ==
'.' ? Name[17]
3966 : Name[17] ==
'.' ? Name[18]
3967 : Name[18] ==
'.' ? Name[19]
3971 if (IsVariable && Name[17] !=
'.') {
3972 if (
Size ==
'd' && Name[17] ==
'2')
3973 IID = Intrinsic::x86_avx2_psrlv_q;
3974 else if (
Size ==
'd' && Name[17] ==
'4')
3975 IID = Intrinsic::x86_avx2_psrlv_q_256;
3976 else if (
Size ==
's' && Name[17] ==
'4')
3977 IID = Intrinsic::x86_avx2_psrlv_d;
3978 else if (
Size ==
's' && Name[17] ==
'8')
3979 IID = Intrinsic::x86_avx2_psrlv_d_256;
3980 else if (
Size ==
'h' && Name[17] ==
'8')
3981 IID = Intrinsic::x86_avx512_psrlv_w_128;
3982 else if (
Size ==
'h' && Name[17] ==
'1')
3983 IID = Intrinsic::x86_avx512_psrlv_w_256;
3984 else if (Name[17] ==
'3' && Name[18] ==
'2')
3985 IID = Intrinsic::x86_avx512_psrlv_w_512;
3988 }
else if (Name.ends_with(
".128")) {
3990 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3991 : Intrinsic::x86_sse2_psrl_d;
3992 else if (
Size ==
'q')
3993 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3994 : Intrinsic::x86_sse2_psrl_q;
3995 else if (
Size ==
'w')
3996 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3997 : Intrinsic::x86_sse2_psrl_w;
4000 }
else if (Name.ends_with(
".256")) {
4002 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4003 : Intrinsic::x86_avx2_psrl_d;
4004 else if (
Size ==
'q')
4005 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4006 : Intrinsic::x86_avx2_psrl_q;
4007 else if (
Size ==
'w')
4008 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4009 : Intrinsic::x86_avx2_psrl_w;
4014 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4015 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4016 : Intrinsic::x86_avx512_psrl_d_512;
4017 else if (
Size ==
'q')
4018 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4019 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4020 : Intrinsic::x86_avx512_psrl_q_512;
4021 else if (
Size ==
'w')
4022 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4023 : Intrinsic::x86_avx512_psrl_w_512;
4029 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4030 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4031 bool IsVariable = Name[16] ==
'v';
4032 char Size = Name[16] ==
'.' ? Name[17]
4033 : Name[17] ==
'.' ? Name[18]
4034 : Name[18] ==
'.' ? Name[19]
4038 if (IsVariable && Name[17] !=
'.') {
4039 if (
Size ==
's' && Name[17] ==
'4')
4040 IID = Intrinsic::x86_avx2_psrav_d;
4041 else if (
Size ==
's' && Name[17] ==
'8')
4042 IID = Intrinsic::x86_avx2_psrav_d_256;
4043 else if (
Size ==
'h' && Name[17] ==
'8')
4044 IID = Intrinsic::x86_avx512_psrav_w_128;
4045 else if (
Size ==
'h' && Name[17] ==
'1')
4046 IID = Intrinsic::x86_avx512_psrav_w_256;
4047 else if (Name[17] ==
'3' && Name[18] ==
'2')
4048 IID = Intrinsic::x86_avx512_psrav_w_512;
4051 }
else if (Name.ends_with(
".128")) {
4053 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4054 : Intrinsic::x86_sse2_psra_d;
4055 else if (
Size ==
'q')
4056 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4057 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4058 : Intrinsic::x86_avx512_psra_q_128;
4059 else if (
Size ==
'w')
4060 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4061 : Intrinsic::x86_sse2_psra_w;
4064 }
else if (Name.ends_with(
".256")) {
4066 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4067 : Intrinsic::x86_avx2_psra_d;
4068 else if (
Size ==
'q')
4069 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4070 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4071 : Intrinsic::x86_avx512_psra_q_256;
4072 else if (
Size ==
'w')
4073 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4074 : Intrinsic::x86_avx2_psra_w;
4079 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4080 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4081 : Intrinsic::x86_avx512_psra_d_512;
4082 else if (
Size ==
'q')
4083 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4084 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4085 : Intrinsic::x86_avx512_psra_q_512;
4086 else if (
Size ==
'w')
4087 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4088 : Intrinsic::x86_avx512_psra_w_512;
4094 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4096 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4098 }
else if (Name.ends_with(
".movntdqa")) {
4102 LoadInst *LI = Builder.CreateAlignedLoad(
4107 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4108 Name.starts_with(
"fma.vfmsub.") ||
4109 Name.starts_with(
"fma.vfnmadd.") ||
4110 Name.starts_with(
"fma.vfnmsub.")) {
4111 bool NegMul = Name[6] ==
'n';
4112 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4113 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4124 if (NegMul && !IsScalar)
4125 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4126 if (NegMul && IsScalar)
4127 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4129 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4131 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4135 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4143 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4147 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4148 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4149 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4150 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4151 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4152 bool IsMask3 = Name[11] ==
'3';
4153 bool IsMaskZ = Name[11] ==
'z';
4155 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4156 bool NegMul = Name[2] ==
'n';
4157 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4163 if (NegMul && (IsMask3 || IsMaskZ))
4164 A = Builder.CreateFNeg(
A);
4165 if (NegMul && !(IsMask3 || IsMaskZ))
4166 B = Builder.CreateFNeg(
B);
4168 C = Builder.CreateFNeg(
C);
4170 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4171 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4172 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4179 if (Name.back() ==
'd')
4180 IID = Intrinsic::x86_avx512_vfmadd_f64;
4182 IID = Intrinsic::x86_avx512_vfmadd_f32;
4183 Rep = Builder.CreateIntrinsic(IID,
Ops);
4185 Rep = Builder.CreateFMA(
A,
B,
C);
4194 if (NegAcc && IsMask3)
4199 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4201 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4202 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4203 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4204 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4205 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4206 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4207 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4208 bool IsMask3 = Name[11] ==
'3';
4209 bool IsMaskZ = Name[11] ==
'z';
4211 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4212 bool NegMul = Name[2] ==
'n';
4213 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4219 if (NegMul && (IsMask3 || IsMaskZ))
4220 A = Builder.CreateFNeg(
A);
4221 if (NegMul && !(IsMask3 || IsMaskZ))
4222 B = Builder.CreateFNeg(
B);
4224 C = Builder.CreateFNeg(
C);
4231 if (Name[Name.size() - 5] ==
's')
4232 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4234 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4238 Rep = Builder.CreateFMA(
A,
B,
C);
4246 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4250 if (VecWidth == 128 && EltWidth == 32)
4251 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4252 else if (VecWidth == 256 && EltWidth == 32)
4253 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4254 else if (VecWidth == 128 && EltWidth == 64)
4255 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4256 else if (VecWidth == 256 && EltWidth == 64)
4257 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4263 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4264 Rep = Builder.CreateIntrinsic(IID,
Ops);
4265 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4266 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4267 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4268 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4269 bool IsMask3 = Name[11] ==
'3';
4270 bool IsMaskZ = Name[11] ==
'z';
4272 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4273 bool IsSubAdd = Name[3] ==
's';
4277 if (Name[Name.size() - 5] ==
's')
4278 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4280 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4285 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4287 Rep = Builder.CreateIntrinsic(IID,
Ops);
4296 Value *Odd = Builder.CreateCall(FMA,
Ops);
4297 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4298 Value *Even = Builder.CreateCall(FMA,
Ops);
4304 for (
int i = 0; i != NumElts; ++i)
4305 Idxs[i] = i + (i % 2) * NumElts;
4307 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4315 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4316 Name.starts_with(
"avx512.maskz.pternlog.")) {
4317 bool ZeroMask = Name[11] ==
'z';
4321 if (VecWidth == 128 && EltWidth == 32)
4322 IID = Intrinsic::x86_avx512_pternlog_d_128;
4323 else if (VecWidth == 256 && EltWidth == 32)
4324 IID = Intrinsic::x86_avx512_pternlog_d_256;
4325 else if (VecWidth == 512 && EltWidth == 32)
4326 IID = Intrinsic::x86_avx512_pternlog_d_512;
4327 else if (VecWidth == 128 && EltWidth == 64)
4328 IID = Intrinsic::x86_avx512_pternlog_q_128;
4329 else if (VecWidth == 256 && EltWidth == 64)
4330 IID = Intrinsic::x86_avx512_pternlog_q_256;
4331 else if (VecWidth == 512 && EltWidth == 64)
4332 IID = Intrinsic::x86_avx512_pternlog_q_512;
4338 Rep = Builder.CreateIntrinsic(IID, Args);
4342 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4343 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4344 bool ZeroMask = Name[11] ==
'z';
4345 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4348 if (VecWidth == 128 && !
High)
4349 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4350 else if (VecWidth == 256 && !
High)
4351 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4352 else if (VecWidth == 512 && !
High)
4353 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4354 else if (VecWidth == 128 &&
High)
4355 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4356 else if (VecWidth == 256 &&
High)
4357 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4358 else if (VecWidth == 512 &&
High)
4359 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4365 Rep = Builder.CreateIntrinsic(IID, Args);
4369 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4370 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4371 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4372 bool ZeroMask = Name[11] ==
'z';
4373 bool IndexForm = Name[17] ==
'i';
4375 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4376 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4377 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4378 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4379 bool ZeroMask = Name[11] ==
'z';
4380 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4383 if (VecWidth == 128 && !IsSaturating)
4384 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4385 else if (VecWidth == 256 && !IsSaturating)
4386 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4387 else if (VecWidth == 512 && !IsSaturating)
4388 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4389 else if (VecWidth == 128 && IsSaturating)
4390 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4391 else if (VecWidth == 256 && IsSaturating)
4392 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4393 else if (VecWidth == 512 && IsSaturating)
4394 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4404 if (Args[1]->
getType()->isVectorTy() &&
4407 ->isIntegerTy(32) &&
4408 Args[2]->
getType()->isVectorTy() &&
4411 ->isIntegerTy(32)) {
4412 Type *NewArgType =
nullptr;
4413 if (VecWidth == 128)
4415 else if (VecWidth == 256)
4417 else if (VecWidth == 512)
4423 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4424 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4427 Rep = Builder.CreateIntrinsic(IID, Args);
4431 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4432 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4433 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4434 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4435 bool ZeroMask = Name[11] ==
'z';
4436 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4439 if (VecWidth == 128 && !IsSaturating)
4440 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4441 else if (VecWidth == 256 && !IsSaturating)
4442 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4443 else if (VecWidth == 512 && !IsSaturating)
4444 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4445 else if (VecWidth == 128 && IsSaturating)
4446 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4447 else if (VecWidth == 256 && IsSaturating)
4448 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4449 else if (VecWidth == 512 && IsSaturating)
4450 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4460 if (Args[1]->
getType()->isVectorTy() &&
4463 ->isIntegerTy(32) &&
4464 Args[2]->
getType()->isVectorTy() &&
4467 ->isIntegerTy(32)) {
4468 Type *NewArgType =
nullptr;
4469 if (VecWidth == 128)
4471 else if (VecWidth == 256)
4473 else if (VecWidth == 512)
4479 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4480 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4483 Rep = Builder.CreateIntrinsic(IID, Args);
4487 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4488 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4489 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4491 if (Name[0] ==
'a' && Name.back() ==
'2')
4492 IID = Intrinsic::x86_addcarry_32;
4493 else if (Name[0] ==
'a' && Name.back() ==
'4')
4494 IID = Intrinsic::x86_addcarry_64;
4495 else if (Name[0] ==
's' && Name.back() ==
'2')
4496 IID = Intrinsic::x86_subborrow_32;
4497 else if (Name[0] ==
's' && Name.back() ==
'4')
4498 IID = Intrinsic::x86_subborrow_64;
4505 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4508 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4511 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4515 }
else if (Name.starts_with(
"avx512.mask.") &&
4526 if (Name.starts_with(
"neon.bfcvt")) {
4527 if (Name.starts_with(
"neon.bfcvtn2")) {
4529 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4531 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4532 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4535 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4536 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4538 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4542 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4543 return Builder.CreateShuffleVector(
4546 return Builder.CreateFPTrunc(CI->
getOperand(0),
4549 }
else if (Name.starts_with(
"sve.fcvt")) {
4552 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4553 .
Case(
"sve.fcvtnt.bf16f32",
4554 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4566 if (Args[1]->
getType() != BadPredTy)
4569 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4570 BadPredTy, Args[1]);
4571 Args[1] = Builder.CreateIntrinsic(
4572 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4574 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4583 if (Name ==
"mve.vctp64.old") {
4586 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4589 Value *C1 = Builder.CreateIntrinsic(
4590 Intrinsic::arm_mve_pred_v2i,
4592 return Builder.CreateIntrinsic(
4593 Intrinsic::arm_mve_pred_i2v,
4595 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4596 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4597 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4598 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4600 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4601 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4602 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4603 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4605 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4606 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4607 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4608 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4609 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4610 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4611 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4612 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4613 std::vector<Type *> Tys;
4617 case Intrinsic::arm_mve_mull_int_predicated:
4618 case Intrinsic::arm_mve_vqdmull_predicated:
4619 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4622 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4623 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4624 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4628 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4632 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4636 case Intrinsic::arm_cde_vcx1q_predicated:
4637 case Intrinsic::arm_cde_vcx1qa_predicated:
4638 case Intrinsic::arm_cde_vcx2q_predicated:
4639 case Intrinsic::arm_cde_vcx2qa_predicated:
4640 case Intrinsic::arm_cde_vcx3q_predicated:
4641 case Intrinsic::arm_cde_vcx3qa_predicated:
4648 std::vector<Value *>
Ops;
4650 Type *Ty =
Op->getType();
4651 if (Ty->getScalarSizeInBits() == 1) {
4652 Value *C1 = Builder.CreateIntrinsic(
4653 Intrinsic::arm_mve_pred_v2i,
4655 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4660 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4675 auto UpgradeLegacyWMMAIUIntrinsicCall =
4680 Args.push_back(Builder.getFalse());
4684 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4691 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4696 NewCall->copyMetadata(*CI);
4700 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4701 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4702 "intrinsic should have 7 arguments");
4705 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4707 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4708 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4709 "intrinsic should have 8 arguments");
4714 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4734 if (NumOperands < 3)
4747 bool IsVolatile =
false;
4751 if (NumOperands > 3)
4756 if (NumOperands > 5) {
4758 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4772 if (VT->getElementType()->isIntegerTy(16)) {
4775 Val = Builder.CreateBitCast(Val, AsBF16);
4783 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4785 unsigned AddrSpace = PtrTy->getAddressSpace();
4788 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4790 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4795 MDNode *RangeNotPrivate =
4798 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4804 return Builder.CreateBitCast(RMW, RetTy);
4825 return MAV->getMetadata();
4832 return I->getDebugLoc().getAsMDNode();
4840 if (Name ==
"label") {
4843 }
else if (Name ==
"assign") {
4850 }
else if (Name ==
"declare") {
4855 }
else if (Name ==
"addr") {
4865 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4867 }
else if (Name ==
"value") {
4870 unsigned ExprOp = 2;
4884 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4892 int64_t OffsetVal =
Offset->getSExtValue();
4893 return Builder.CreateIntrinsic(OffsetVal >= 0
4894 ? Intrinsic::vector_splice_left
4895 : Intrinsic::vector_splice_right,
4897 {CI->getArgOperand(0), CI->getArgOperand(1),
4898 Builder.getInt32(std::abs(OffsetVal))});
4903 if (Name.starts_with(
"to.fp16")) {
4905 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4906 return Builder.CreateBitCast(Cast, CI->
getType());
4909 if (Name.starts_with(
"from.fp16")) {
4911 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4912 return Builder.CreateFPExt(Cast, CI->
getType());
4937 if (!Name.consume_front(
"llvm."))
4940 bool IsX86 = Name.consume_front(
"x86.");
4941 bool IsNVVM = Name.consume_front(
"nvvm.");
4942 bool IsAArch64 = Name.consume_front(
"aarch64.");
4943 bool IsARM = Name.consume_front(
"arm.");
4944 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4945 bool IsDbg = Name.consume_front(
"dbg.");
4947 (Name.consume_front(
"experimental.vector.splice") ||
4948 Name.consume_front(
"vector.splice")) &&
4949 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
4950 Value *Rep =
nullptr;
4952 if (!IsX86 && Name ==
"stackprotectorcheck") {
4954 }
else if (IsNVVM) {
4958 }
else if (IsAArch64) {
4962 }
else if (IsAMDGCN) {
4966 }
else if (IsOldSplice) {
4968 }
else if (Name.consume_front(
"convert.")) {
4980 const auto &DefaultCase = [&]() ->
void {
4988 "Unknown function for CallBase upgrade and isn't just a name change");
4996 "Return type must have changed");
4997 assert(OldST->getNumElements() ==
4999 "Must have same number of elements");
5002 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5005 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5006 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5007 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5026 case Intrinsic::arm_neon_vst1:
5027 case Intrinsic::arm_neon_vst2:
5028 case Intrinsic::arm_neon_vst3:
5029 case Intrinsic::arm_neon_vst4:
5030 case Intrinsic::arm_neon_vst2lane:
5031 case Intrinsic::arm_neon_vst3lane:
5032 case Intrinsic::arm_neon_vst4lane: {
5034 NewCall = Builder.CreateCall(NewFn, Args);
5037 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5038 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5039 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5044 NewCall = Builder.CreateCall(NewFn, Args);
5047 case Intrinsic::aarch64_sve_ld3_sret:
5048 case Intrinsic::aarch64_sve_ld4_sret:
5049 case Intrinsic::aarch64_sve_ld2_sret: {
5051 Name = Name.substr(5);
5058 unsigned MinElts = RetTy->getMinNumElements() /
N;
5060 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5062 for (
unsigned I = 0;
I <
N;
I++) {
5063 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5064 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5070 case Intrinsic::coro_end: {
5073 NewCall = Builder.CreateCall(NewFn, Args);
5077 case Intrinsic::vector_extract: {
5079 Name = Name.substr(5);
5080 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5085 unsigned MinElts = RetTy->getMinNumElements();
5088 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5092 case Intrinsic::vector_insert: {
5094 Name = Name.substr(5);
5095 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5099 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5104 NewCall = Builder.CreateCall(
5108 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5114 assert(
N > 1 &&
"Create is expected to be between 2-4");
5117 unsigned MinElts = RetTy->getMinNumElements() /
N;
5118 for (
unsigned I = 0;
I <
N;
I++) {
5120 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5127 case Intrinsic::arm_neon_bfdot:
5128 case Intrinsic::arm_neon_bfmmla:
5129 case Intrinsic::arm_neon_bfmlalb:
5130 case Intrinsic::arm_neon_bfmlalt:
5131 case Intrinsic::aarch64_neon_bfdot:
5132 case Intrinsic::aarch64_neon_bfmmla:
5133 case Intrinsic::aarch64_neon_bfmlalb:
5134 case Intrinsic::aarch64_neon_bfmlalt: {
5137 "Mismatch between function args and call args");
5138 size_t OperandWidth =
5140 assert((OperandWidth == 64 || OperandWidth == 128) &&
5141 "Unexpected operand width");
5143 auto Iter = CI->
args().begin();
5144 Args.push_back(*Iter++);
5145 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5146 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5147 NewCall = Builder.CreateCall(NewFn, Args);
5151 case Intrinsic::bitreverse:
5152 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5155 case Intrinsic::ctlz:
5156 case Intrinsic::cttz: {
5163 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5167 case Intrinsic::objectsize: {
5168 Value *NullIsUnknownSize =
5172 NewCall = Builder.CreateCall(
5177 case Intrinsic::ctpop:
5178 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5180 case Intrinsic::dbg_value: {
5182 Name = Name.substr(5);
5184 if (Name.starts_with(
"dbg.addr")) {
5198 if (
Offset->isNullValue()) {
5199 NewCall = Builder.CreateCall(
5208 case Intrinsic::ptr_annotation:
5216 NewCall = Builder.CreateCall(
5225 case Intrinsic::var_annotation:
5232 NewCall = Builder.CreateCall(
5241 case Intrinsic::riscv_aes32dsi:
5242 case Intrinsic::riscv_aes32dsmi:
5243 case Intrinsic::riscv_aes32esi:
5244 case Intrinsic::riscv_aes32esmi:
5245 case Intrinsic::riscv_sm4ks:
5246 case Intrinsic::riscv_sm4ed: {
5256 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5257 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5263 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5264 Value *Res = NewCall;
5266 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5272 case Intrinsic::nvvm_mapa_shared_cluster: {
5276 Value *Res = NewCall;
5277 Res = Builder.CreateAddrSpaceCast(
5284 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5285 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5288 Args[0] = Builder.CreateAddrSpaceCast(
5291 NewCall = Builder.CreateCall(NewFn, Args);
5297 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5298 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5299 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5300 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5301 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5302 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5303 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5304 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5311 Args[0] = Builder.CreateAddrSpaceCast(
5320 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5322 NewCall = Builder.CreateCall(NewFn, Args);
5328 case Intrinsic::riscv_sha256sig0:
5329 case Intrinsic::riscv_sha256sig1:
5330 case Intrinsic::riscv_sha256sum0:
5331 case Intrinsic::riscv_sha256sum1:
5332 case Intrinsic::riscv_sm3p0:
5333 case Intrinsic::riscv_sm3p1: {
5340 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5342 NewCall = Builder.CreateCall(NewFn, Arg);
5344 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5351 case Intrinsic::x86_xop_vfrcz_ss:
5352 case Intrinsic::x86_xop_vfrcz_sd:
5353 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5356 case Intrinsic::x86_xop_vpermil2pd:
5357 case Intrinsic::x86_xop_vpermil2ps:
5358 case Intrinsic::x86_xop_vpermil2pd_256:
5359 case Intrinsic::x86_xop_vpermil2ps_256: {
5363 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5364 NewCall = Builder.CreateCall(NewFn, Args);
5368 case Intrinsic::x86_sse41_ptestc:
5369 case Intrinsic::x86_sse41_ptestz:
5370 case Intrinsic::x86_sse41_ptestnzc: {
5384 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5385 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5387 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5391 case Intrinsic::x86_rdtscp: {
5397 NewCall = Builder.CreateCall(NewFn);
5399 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5402 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5410 case Intrinsic::x86_sse41_insertps:
5411 case Intrinsic::x86_sse41_dppd:
5412 case Intrinsic::x86_sse41_dpps:
5413 case Intrinsic::x86_sse41_mpsadbw:
5414 case Intrinsic::x86_avx_dp_ps_256:
5415 case Intrinsic::x86_avx2_mpsadbw: {
5421 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5422 NewCall = Builder.CreateCall(NewFn, Args);
5426 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5427 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5428 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5429 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5430 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5431 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5437 NewCall = Builder.CreateCall(NewFn, Args);
5446 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5447 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5448 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5449 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5450 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5451 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5455 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5456 Args[1] = Builder.CreateBitCast(
5459 NewCall = Builder.CreateCall(NewFn, Args);
5460 Value *Res = Builder.CreateBitCast(
5468 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5469 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5470 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5474 Args[1] = Builder.CreateBitCast(
5476 Args[2] = Builder.CreateBitCast(
5479 NewCall = Builder.CreateCall(NewFn, Args);
5483 case Intrinsic::thread_pointer: {
5484 NewCall = Builder.CreateCall(NewFn, {});
5488 case Intrinsic::memcpy:
5489 case Intrinsic::memmove:
5490 case Intrinsic::memset: {
5506 NewCall = Builder.CreateCall(NewFn, Args);
5508 AttributeList NewAttrs = AttributeList::get(
5509 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5510 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5511 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5516 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5519 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5523 case Intrinsic::masked_load:
5524 case Intrinsic::masked_gather:
5525 case Intrinsic::masked_store:
5526 case Intrinsic::masked_scatter: {
5532 auto GetMaybeAlign = [](
Value *
Op) {
5542 auto GetAlign = [&](
Value *
Op) {
5551 case Intrinsic::masked_load:
5552 NewCall = Builder.CreateMaskedLoad(
5556 case Intrinsic::masked_gather:
5557 NewCall = Builder.CreateMaskedGather(
5563 case Intrinsic::masked_store:
5564 NewCall = Builder.CreateMaskedStore(
5568 case Intrinsic::masked_scatter:
5569 NewCall = Builder.CreateMaskedScatter(
5571 DL.getValueOrABITypeAlignment(
5585 case Intrinsic::lifetime_start:
5586 case Intrinsic::lifetime_end: {
5598 NewCall = Builder.CreateLifetimeStart(Ptr);
5600 NewCall = Builder.CreateLifetimeEnd(Ptr);
5609 case Intrinsic::x86_avx512_vpdpbusd_128:
5610 case Intrinsic::x86_avx512_vpdpbusd_256:
5611 case Intrinsic::x86_avx512_vpdpbusd_512:
5612 case Intrinsic::x86_avx512_vpdpbusds_128:
5613 case Intrinsic::x86_avx512_vpdpbusds_256:
5614 case Intrinsic::x86_avx512_vpdpbusds_512:
5615 case Intrinsic::x86_avx2_vpdpbssd_128:
5616 case Intrinsic::x86_avx2_vpdpbssd_256:
5617 case Intrinsic::x86_avx10_vpdpbssd_512:
5618 case Intrinsic::x86_avx2_vpdpbssds_128:
5619 case Intrinsic::x86_avx2_vpdpbssds_256:
5620 case Intrinsic::x86_avx10_vpdpbssds_512:
5621 case Intrinsic::x86_avx2_vpdpbsud_128:
5622 case Intrinsic::x86_avx2_vpdpbsud_256:
5623 case Intrinsic::x86_avx10_vpdpbsud_512:
5624 case Intrinsic::x86_avx2_vpdpbsuds_128:
5625 case Intrinsic::x86_avx2_vpdpbsuds_256:
5626 case Intrinsic::x86_avx10_vpdpbsuds_512:
5627 case Intrinsic::x86_avx2_vpdpbuud_128:
5628 case Intrinsic::x86_avx2_vpdpbuud_256:
5629 case Intrinsic::x86_avx10_vpdpbuud_512:
5630 case Intrinsic::x86_avx2_vpdpbuuds_128:
5631 case Intrinsic::x86_avx2_vpdpbuuds_256:
5632 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5637 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5638 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5640 NewCall = Builder.CreateCall(NewFn, Args);
5643 case Intrinsic::x86_avx512_vpdpwssd_128:
5644 case Intrinsic::x86_avx512_vpdpwssd_256:
5645 case Intrinsic::x86_avx512_vpdpwssd_512:
5646 case Intrinsic::x86_avx512_vpdpwssds_128:
5647 case Intrinsic::x86_avx512_vpdpwssds_256:
5648 case Intrinsic::x86_avx512_vpdpwssds_512:
5649 case Intrinsic::x86_avx2_vpdpwsud_128:
5650 case Intrinsic::x86_avx2_vpdpwsud_256:
5651 case Intrinsic::x86_avx10_vpdpwsud_512:
5652 case Intrinsic::x86_avx2_vpdpwsuds_128:
5653 case Intrinsic::x86_avx2_vpdpwsuds_256:
5654 case Intrinsic::x86_avx10_vpdpwsuds_512:
5655 case Intrinsic::x86_avx2_vpdpwusd_128:
5656 case Intrinsic::x86_avx2_vpdpwusd_256:
5657 case Intrinsic::x86_avx10_vpdpwusd_512:
5658 case Intrinsic::x86_avx2_vpdpwusds_128:
5659 case Intrinsic::x86_avx2_vpdpwusds_256:
5660 case Intrinsic::x86_avx10_vpdpwusds_512:
5661 case Intrinsic::x86_avx2_vpdpwuud_128:
5662 case Intrinsic::x86_avx2_vpdpwuud_256:
5663 case Intrinsic::x86_avx10_vpdpwuud_512:
5664 case Intrinsic::x86_avx2_vpdpwuuds_128:
5665 case Intrinsic::x86_avx2_vpdpwuuds_256:
5666 case Intrinsic::x86_avx10_vpdpwuuds_512:
5671 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5672 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5674 NewCall = Builder.CreateCall(NewFn, Args);
5677 assert(NewCall &&
"Should have either set this variable or returned through "
5678 "the default case");
5685 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5699 F->eraseFromParent();
5705 if (NumOperands == 0)
5713 if (NumOperands == 3) {
5717 Metadata *Elts2[] = {ScalarType, ScalarType,
5731 if (
Opc != Instruction::BitCast)
5735 Type *SrcTy = V->getType();
5752 if (
Opc != Instruction::BitCast)
5755 Type *SrcTy =
C->getType();
5782 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5783 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5784 if (Flag->getNumOperands() < 3)
5786 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5787 return K->getString() ==
"Debug Info Version";
5790 if (OpIt != ModFlags->op_end()) {
5791 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5798 bool BrokenDebugInfo =
false;
5801 if (!BrokenDebugInfo)
5807 M.getContext().diagnose(Diag);
5814 M.getContext().diagnose(DiagVersion);
5824 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5827 if (
F->hasFnAttribute(Attr)) {
5830 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5832 auto [Part, Rest] = S.
split(
',');
5838 const unsigned Dim = DimC -
'x';
5839 assert(Dim < 3 &&
"Unexpected dim char");
5849 F->addFnAttr(Attr, NewAttr);
5853 return S ==
"x" || S ==
"y" || S ==
"z";
5858 if (K ==
"kernel") {
5870 const unsigned Idx = (AlignIdxValuePair >> 16);
5871 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5876 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5881 if (K ==
"minctasm") {
5886 if (K ==
"maxnreg") {
5891 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5895 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5899 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5903 if (K ==
"grid_constant") {
5918 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5925 if (!SeenNodes.
insert(MD).second)
5932 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5939 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5941 const MDOperand &V = MD->getOperand(j + 1);
5944 NewOperands.
append({K, V});
5947 if (NewOperands.
size() > 1)
5960 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5961 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5962 if (ModRetainReleaseMarker) {
5968 ID->getString().split(ValueComp,
"#");
5969 if (ValueComp.
size() == 2) {
5970 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5974 M.eraseNamedMetadata(ModRetainReleaseMarker);
5985 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6011 bool InvalidCast =
false;
6013 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6026 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6028 Args.push_back(Arg);
6035 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6040 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6053 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6061 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6062 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6063 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6064 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6065 {
"objc_autoreleaseReturnValue",
6066 llvm::Intrinsic::objc_autoreleaseReturnValue},
6067 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6068 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6069 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6070 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6071 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6072 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6073 {
"objc_release", llvm::Intrinsic::objc_release},
6074 {
"objc_retain", llvm::Intrinsic::objc_retain},
6075 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6076 {
"objc_retainAutoreleaseReturnValue",
6077 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6078 {
"objc_retainAutoreleasedReturnValue",
6079 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6080 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6081 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6082 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6083 {
"objc_unsafeClaimAutoreleasedReturnValue",
6084 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6085 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6086 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6087 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6088 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6089 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6090 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6091 {
"objc_arc_annotation_topdown_bbstart",
6092 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6093 {
"objc_arc_annotation_topdown_bbend",
6094 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6095 {
"objc_arc_annotation_bottomup_bbstart",
6096 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6097 {
"objc_arc_annotation_bottomup_bbend",
6098 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6100 for (
auto &
I : RuntimeFuncs)
6101 UpgradeToIntrinsic(
I.first,
I.second);
6105 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6109 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6110 bool HasSwiftVersionFlag =
false;
6111 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6118 if (
Op->getNumOperands() != 3)
6132 if (
ID->getString() ==
"Objective-C Image Info Version")
6134 if (
ID->getString() ==
"Objective-C Class Properties")
6135 HasClassProperties =
true;
6137 if (
ID->getString() ==
"PIC Level") {
6138 if (
auto *Behavior =
6140 uint64_t V = Behavior->getLimitedValue();
6146 if (
ID->getString() ==
"PIE Level")
6147 if (
auto *Behavior =
6154 if (
ID->getString() ==
"branch-target-enforcement" ||
6155 ID->getString().starts_with(
"sign-return-address")) {
6156 if (
auto *Behavior =
6162 Op->getOperand(1),
Op->getOperand(2)};
6172 if (
ID->getString() ==
"Objective-C Image Info Section") {
6175 Value->getString().split(ValueComp,
" ");
6176 if (ValueComp.
size() != 1) {
6177 std::string NewValue;
6178 for (
auto &S : ValueComp)
6179 NewValue += S.str();
6190 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6193 assert(Md->getValue() &&
"Expected non-empty metadata");
6194 auto Type = Md->getValue()->getType();
6197 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6198 if ((Val & 0xff) != Val) {
6199 HasSwiftVersionFlag =
true;
6200 SwiftABIVersion = (Val & 0xff00) >> 8;
6201 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6202 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6213 if (
ID->getString() ==
"amdgpu_code_object_version") {
6216 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6228 if (HasObjCFlag && !HasClassProperties) {
6234 if (HasSwiftVersionFlag) {
6238 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6240 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6248 auto TrimSpaces = [](
StringRef Section) -> std::string {
6250 Section.split(Components,
',');
6255 for (
auto Component : Components)
6256 OS <<
',' << Component.trim();
6261 for (
auto &GV : M.globals()) {
6262 if (!GV.hasSection())
6267 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6272 GV.setSection(TrimSpaces(Section));
6288struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6289 StrictFPUpgradeVisitor() =
default;
6292 if (!
Call.isStrictFP())
6298 Call.removeFnAttr(Attribute::StrictFP);
6299 Call.addFnAttr(Attribute::NoBuiltin);
6304struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6305 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6306 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6308 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6323 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6324 StrictFPUpgradeVisitor SFPV;
6329 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6330 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6331 for (
auto &Arg :
F.args())
6333 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6335 bool AddingAttrs =
false, RemovingAttrs =
false;
6336 AttrBuilder AttrsToAdd(
F.getContext());
6341 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6342 A.isValid() &&
A.isStringAttribute()) {
6343 F.setSection(
A.getValueAsString());
6345 RemovingAttrs =
true;
6349 A.isValid() &&
A.isStringAttribute()) {
6352 AddingAttrs = RemovingAttrs =
true;
6355 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6356 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6358 RemovingAttrs =
true;
6359 if (
A.getValueAsString() ==
"true") {
6360 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6369 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6372 if (
A.getValueAsBool()) {
6373 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6379 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6380 RemovingAttrs =
true;
6387 bool HandleDenormalMode =
false;
6389 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6392 DenormalFPMath = ParsedMode;
6394 AddingAttrs = RemovingAttrs =
true;
6395 HandleDenormalMode =
true;
6399 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6403 DenormalFPMathF32 = ParsedMode;
6405 AddingAttrs = RemovingAttrs =
true;
6406 HandleDenormalMode =
true;
6410 if (HandleDenormalMode)
6411 AttrsToAdd.addDenormalFPEnvAttr(
6415 F.removeFnAttrs(AttrsToRemove);
6418 F.addFnAttrs(AttrsToAdd);
6424 if (!
F.hasFnAttribute(FnAttrName))
6425 F.addFnAttr(FnAttrName,
Value);
6432 if (!
F.hasFnAttribute(FnAttrName)) {
6434 F.addFnAttr(FnAttrName);
6436 auto A =
F.getFnAttribute(FnAttrName);
6437 if (
"false" ==
A.getValueAsString())
6438 F.removeFnAttr(FnAttrName);
6439 else if (
"true" ==
A.getValueAsString()) {
6440 F.removeFnAttr(FnAttrName);
6441 F.addFnAttr(FnAttrName);
6447 Triple T(M.getTargetTriple());
6448 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6458 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6462 if (
Op->getNumOperands() != 3)
6471 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6472 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6473 : IDStr ==
"guarded-control-stack" ? &GCSValue
6474 : IDStr ==
"sign-return-address" ? &SRAValue
6475 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6476 : IDStr ==
"sign-return-address-with-bkey"
6482 *ValPtr = CI->getZExtValue();
6488 bool BTE = BTEValue == 1;
6489 bool BPPLR = BPPLRValue == 1;
6490 bool GCS = GCSValue == 1;
6491 bool SRA = SRAValue == 1;
6494 if (SRA && SRAALLValue == 1)
6495 SignTypeValue =
"all";
6498 if (SRA && SRABKeyValue == 1)
6499 SignKeyValue =
"b_key";
6501 for (
Function &
F : M.getFunctionList()) {
6502 if (
F.isDeclaration())
6509 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6510 A.isValid() &&
"none" ==
A.getValueAsString()) {
6511 F.removeFnAttr(
"sign-return-address");
6512 F.removeFnAttr(
"sign-return-address-key");
6528 if (SRAALLValue == 1)
6530 if (SRABKeyValue == 1)
6539 if (
T->getNumOperands() < 1)
6544 return S->getString().starts_with(
"llvm.vectorizer.");
6548 StringRef OldPrefix =
"llvm.vectorizer.";
6551 if (OldTag ==
"llvm.vectorizer.unroll")
6563 if (
T->getNumOperands() < 1)
6568 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6573 Ops.reserve(
T->getNumOperands());
6575 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6576 Ops.push_back(
T->getOperand(
I));
6590 Ops.reserve(
T->getNumOperands());
6601 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6602 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6603 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6606 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6608 auto I =
DL.find(
"-n64-");
6610 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6615 std::string Res =
DL.str();
6618 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6619 Res.append(Res.empty() ?
"G1" :
"-G1");
6627 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6628 Res.append(
"-ni:7:8:9");
6630 if (
DL.ends_with(
"ni:7"))
6632 if (
DL.ends_with(
"ni:7:8"))
6637 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6638 Res.append(
"-p7:160:256:256:32");
6639 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6640 Res.append(
"-p8:128:128:128:48");
6641 constexpr StringRef OldP8(
"-p8:128:128-");
6642 if (
DL.contains(OldP8))
6643 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6644 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6645 Res.append(
"-p9:192:256:256:32");
6649 if (!
DL.contains(
"m:e"))
6650 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6655 if (
T.isSystemZ() && !
DL.empty()) {
6657 if (!
DL.contains(
"-S64"))
6658 return "E-S64" +
DL.drop_front(1).str();
6662 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6665 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6666 if (!
DL.contains(AddrSpaces)) {
6668 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6669 if (R.match(Res, &
Groups))
6675 if (
T.isAArch64()) {
6677 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6678 Res.append(
"-Fn32");
6679 AddPtr32Ptr64AddrSpaces();
6683 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6687 std::string I64 =
"-i64:64";
6688 std::string I128 =
"-i128:128";
6690 size_t Pos = Res.find(I64);
6691 if (Pos !=
size_t(-1))
6692 Res.insert(Pos + I64.size(), I128);
6696 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6697 size_t Pos = Res.find(
"-S128");
6700 Res.insert(Pos,
"-f64:32:64");
6706 AddPtr32Ptr64AddrSpaces();
6714 if (!
T.isOSIAMCU()) {
6715 std::string I128 =
"-i128:128";
6718 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6719 if (R.match(Res, &
Groups))
6727 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6729 auto I =
Ref.find(
"-f80:32-");
6731 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6739 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6742 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6743 B.removeAttribute(
"no-frame-pointer-elim");
6745 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6747 if (FramePointer !=
"all")
6748 FramePointer =
"non-leaf";
6749 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6751 if (!FramePointer.
empty())
6752 B.addAttribute(
"frame-pointer", FramePointer);
6754 A =
B.getAttribute(
"null-pointer-is-valid");
6757 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6758 B.removeAttribute(
"null-pointer-is-valid");
6759 if (NullPointerIsValid)
6760 B.addAttribute(Attribute::NullPointerIsValid);
6763 A =
B.getAttribute(
"uniform-work-group-size");
6767 bool IsTrue = Val ==
"true";
6768 B.removeAttribute(
"uniform-work-group-size");
6770 B.addAttribute(
"uniform-work-group-size");
6781 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.