34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name ==
"mmla") {
1008 Type *Tys[] = {
F->getReturnType(),
1009 std::next(
F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1014 if (Name.consume_back(
".lane")) {
1018 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1032 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1037 if (Name.consume_front(
"addqv")) {
1039 if (!
F->getReturnType()->isFPOrFPVectorTy())
1042 auto Args =
F->getFunctionType()->params();
1043 Type *Tys[] = {
F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1049 if (Name.consume_front(
"ld")) {
1051 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.
match(Name)) {
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy =
F->getArg(1)->getType();
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1067 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1073 if (Name.consume_front(
"tuple.")) {
1075 if (Name.starts_with(
"get")) {
1077 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1083 if (Name.starts_with(
"set")) {
1085 auto Args =
F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1092 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.
match(Name)) {
1095 auto Args =
F->getFunctionType()->params();
1096 Type *Tys[] = {
F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1104 if (Name.starts_with(
"rev.nxv")) {
1107 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1119 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1141 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1155 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1166 if (Name.consume_front(
"mapa.shared.cluster"))
1167 if (
F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1171 if (Name.consume_front(
"cp.async.bulk.")) {
1174 .
Case(
"global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .
Case(
"shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1181 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1190 if (Name.consume_front(
"fma.rn."))
1192 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1198 if (Name.consume_front(
"fmax."))
1200 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1201 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .
Case(
"ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .
Case(
"ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .
Case(
"nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1222 if (Name.consume_front(
"fmin."))
1224 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1225 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .
Case(
"ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .
Case(
"ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .
Case(
"nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1246 if (Name.consume_front(
"neg."))
1248 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1249 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1256 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1257 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1258 Name.consume_front(
"param");
1264 if (Name.starts_with(
"to.fp16")) {
1268 FuncTy->getReturnType());
1271 if (Name.starts_with(
"from.fp16")) {
1275 FuncTy->getReturnType());
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1288 if (!Name.consume_front(
"llvm.") || Name.empty())
1294 bool IsArm = Name.consume_front(
"arm.");
1295 if (IsArm || Name.consume_front(
"aarch64.")) {
1301 if (Name.consume_front(
"amdgcn.")) {
1302 if (Name ==
"alignbit") {
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1309 if (Name.consume_front(
"atomic.")) {
1310 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1311 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1320 switch (
F->getIntrinsicID()) {
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (
F->arg_size() == 7) {
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (
F->arg_size() == 8) {
1344 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1345 Name.consume_front(
"flat.atomic.")) {
1346 if (Name.starts_with(
"fadd") ||
1348 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1349 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1357 if (Name.starts_with(
"ldexp.")) {
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1370 if (
F->arg_size() == 1) {
1371 if (Name.consume_front(
"convert.")) {
1385 F->arg_begin()->getType());
1390 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1393 Intrinsic::coro_end);
1400 if (Name.consume_front(
"dbg.")) {
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1404 Name ==
"declare" || Name ==
"label") {
1413 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1416 Intrinsic::dbg_value);
1423 if (Name.consume_front(
"experimental.vector.")) {
1429 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1430 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1431 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1432 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1433 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT =
F->getFunctionType();
1440 if (
ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1444 if (
ID != Intrinsic::vector_interleave2)
1446 if (
ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1455 if (Name.consume_front(
"reduce.")) {
1457 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &
Groups))
1460 .
Case(
"add", Intrinsic::vector_reduce_add)
1461 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1462 .
Case(
"and", Intrinsic::vector_reduce_and)
1463 .
Case(
"or", Intrinsic::vector_reduce_or)
1464 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1465 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1466 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1467 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1468 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1469 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1470 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1475 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1480 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1481 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1486 auto Args =
F->getFunctionType()->params();
1488 {Args[V2 ? 1 : 0]});
1494 if (Name.consume_front(
"splice"))
1498 if (Name.consume_front(
"experimental.stepvector.")) {
1502 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1507 if (Name.starts_with(
"flt.rounds")) {
1510 Intrinsic::get_rounding);
1515 if (Name.starts_with(
"invariant.group.barrier")) {
1517 auto Args =
F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1526 if ((Name.starts_with(
"lifetime.start") ||
1527 Name.starts_with(
"lifetime.end")) &&
1528 F->arg_size() == 2) {
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1534 F->getArg(0)->getType());
1543 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1544 .StartsWith(
"memmove.", Intrinsic::memmove)
1546 if (
F->arg_size() == 5) {
1550 F->getFunctionType()->params().slice(0, 3);
1556 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1559 const auto *FT =
F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0),
1565 Intrinsic::memset, ParamTypes);
1571 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1572 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1573 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1574 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1576 if (MaskedID &&
F->arg_size() == 4) {
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1593 if (Name.consume_front(
"nvvm.")) {
1595 if (
F->arg_size() == 1) {
1598 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1599 .Case(
"clz.i", Intrinsic::ctlz)
1600 .
Case(
"popc.i", Intrinsic::ctpop)
1604 {F->getReturnType()});
1607 }
else if (
F->arg_size() == 2) {
1610 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1611 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1612 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1613 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1617 {F->getReturnType()});
1623 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1651 bool Expand =
false;
1652 if (Name.consume_front(
"abs."))
1655 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1656 else if (Name.consume_front(
"fabs."))
1658 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1659 else if (Name.consume_front(
"ex2.approx."))
1662 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1663 else if (Name.consume_front(
"atomic.load."))
1672 else if (Name.consume_front(
"bitcast."))
1675 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1676 else if (Name.consume_front(
"rotate."))
1678 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1679 else if (Name.consume_front(
"ptr.gen.to."))
1682 else if (Name.consume_front(
"ptr."))
1685 else if (Name.consume_front(
"ldg.global."))
1687 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1688 Name.starts_with(
"p."));
1691 .
Case(
"barrier0",
true)
1692 .
Case(
"barrier.n",
true)
1693 .
Case(
"barrier.sync.cnt",
true)
1694 .
Case(
"barrier.sync",
true)
1695 .
Case(
"barrier",
true)
1696 .
Case(
"bar.sync",
true)
1697 .
Case(
"barrier0.popc",
true)
1698 .
Case(
"barrier0.and",
true)
1699 .
Case(
"barrier0.or",
true)
1700 .
Case(
"clz.ll",
true)
1701 .
Case(
"popc.ll",
true)
1703 .
Case(
"swap.lo.hi.b64",
true)
1704 .
Case(
"tanh.approx.f32",
true)
1716 if (Name.starts_with(
"objectsize.")) {
1717 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1718 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1721 Intrinsic::objectsize, Tys);
1728 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1738 if (Name.consume_front(
"riscv.")) {
1741 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1744 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1760 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1770 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1778 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1787 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1798 if (Name ==
"stackprotectorcheck") {
1805 if (Name ==
"thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1813 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1820 if (Name.consume_front(
"vector.splice")) {
1821 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1829 if (Name.consume_front(
"wasm.")) {
1832 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1833 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1839 F->getReturnType());
1843 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1845 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1866 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1875 auto *FT =
F->getFunctionType();
1878 std::string
Name =
F->getName().str();
1881 Name,
F->getParent());
1892 if (Result != std::nullopt) {
1905 bool CanUpgradeDebugIntrinsicsToRecords) {
1925 GV->
getName() ==
"llvm.global_dtors")) ||
1940 unsigned N =
Init->getNumOperands();
1941 std::vector<Constant *> NewCtors(
N);
1942 for (
unsigned i = 0; i !=
N; ++i) {
1945 Ctor->getAggregateElement(1),
1959 unsigned NumElts = ResultTy->getNumElements() * 8;
1963 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1973 for (
unsigned l = 0; l != NumElts; l += 16)
1974 for (
unsigned i = 0; i != 16; ++i) {
1975 unsigned Idx = NumElts + i - Shift;
1977 Idx -= NumElts - 16;
1978 Idxs[l + i] = Idx + l;
1981 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1985 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1993 unsigned NumElts = ResultTy->getNumElements() * 8;
1997 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2007 for (
unsigned l = 0; l != NumElts; l += 16)
2008 for (
unsigned i = 0; i != 16; ++i) {
2009 unsigned Idx = i + Shift;
2011 Idx += NumElts - 16;
2012 Idxs[l + i] = Idx + l;
2015 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2019 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2027 Mask = Builder.CreateBitCast(Mask, MaskTy);
2033 for (
unsigned i = 0; i != NumElts; ++i)
2035 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2046 if (
C->isAllOnesValue())
2051 return Builder.CreateSelect(Mask, Op0, Op1);
2058 if (
C->isAllOnesValue())
2062 Mask->getType()->getIntegerBitWidth());
2063 Mask = Builder.CreateBitCast(Mask, MaskTy);
2064 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2065 return Builder.CreateSelect(Mask, Op0, Op1);
2078 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2079 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2084 ShiftVal &= (NumElts - 1);
2093 if (ShiftVal > 16) {
2101 for (
unsigned l = 0; l < NumElts; l += 16) {
2102 for (
unsigned i = 0; i != 16; ++i) {
2103 unsigned Idx = ShiftVal + i;
2104 if (!IsVALIGN && Idx >= 16)
2105 Idx += NumElts - 16;
2106 Indices[l + i] = Idx + l;
2111 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2117 bool ZeroMask,
bool IndexForm) {
2120 unsigned EltWidth = Ty->getScalarSizeInBits();
2121 bool IsFloat = Ty->isFPOrFPVectorTy();
2123 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2125 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2129 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2131 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2133 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2137 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2139 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2141 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2145 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2146 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2147 else if (VecWidth == 128 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2149 else if (VecWidth == 256 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2151 else if (VecWidth == 512 && EltWidth == 16)
2152 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2153 else if (VecWidth == 128 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2155 else if (VecWidth == 256 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2157 else if (VecWidth == 512 && EltWidth == 8)
2158 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2169 Value *V = Builder.CreateIntrinsic(IID, Args);
2181 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2192 bool IsRotateRight) {
2202 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2203 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2206 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2207 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2252 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2257 bool IsShiftRight,
bool ZeroMask) {
2271 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2272 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2275 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2276 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2291 const Align Alignment =
2293 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2298 if (
C->isAllOnesValue())
2299 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2304 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2310 const Align Alignment =
2319 if (
C->isAllOnesValue())
2320 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2325 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2331 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2332 {Op0, Builder.getInt1(
false)});
2347 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2348 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2349 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2350 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2351 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2354 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2355 LHS = Builder.CreateAnd(
LHS, Mask);
2356 RHS = Builder.CreateAnd(
RHS, Mask);
2373 if (!
C || !
C->isAllOnesValue())
2374 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2379 for (
unsigned i = 0; i != NumElts; ++i)
2381 for (
unsigned i = NumElts; i != 8; ++i)
2382 Indices[i] = NumElts + i % NumElts;
2383 Vec = Builder.CreateShuffleVector(Vec,
2387 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2391 unsigned CC,
bool Signed) {
2399 }
else if (CC == 7) {
2435 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2436 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2438 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2439 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2448 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2454 Name = Name.substr(12);
2459 if (Name.starts_with(
"max.p")) {
2460 if (VecWidth == 128 && EltWidth == 32)
2461 IID = Intrinsic::x86_sse_max_ps;
2462 else if (VecWidth == 128 && EltWidth == 64)
2463 IID = Intrinsic::x86_sse2_max_pd;
2464 else if (VecWidth == 256 && EltWidth == 32)
2465 IID = Intrinsic::x86_avx_max_ps_256;
2466 else if (VecWidth == 256 && EltWidth == 64)
2467 IID = Intrinsic::x86_avx_max_pd_256;
2470 }
else if (Name.starts_with(
"min.p")) {
2471 if (VecWidth == 128 && EltWidth == 32)
2472 IID = Intrinsic::x86_sse_min_ps;
2473 else if (VecWidth == 128 && EltWidth == 64)
2474 IID = Intrinsic::x86_sse2_min_pd;
2475 else if (VecWidth == 256 && EltWidth == 32)
2476 IID = Intrinsic::x86_avx_min_ps_256;
2477 else if (VecWidth == 256 && EltWidth == 64)
2478 IID = Intrinsic::x86_avx_min_pd_256;
2481 }
else if (Name.starts_with(
"pshuf.b.")) {
2482 if (VecWidth == 128)
2483 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2484 else if (VecWidth == 256)
2485 IID = Intrinsic::x86_avx2_pshuf_b;
2486 else if (VecWidth == 512)
2487 IID = Intrinsic::x86_avx512_pshuf_b_512;
2490 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2491 if (VecWidth == 128)
2492 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2493 else if (VecWidth == 256)
2494 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2495 else if (VecWidth == 512)
2496 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2499 }
else if (Name.starts_with(
"pmulh.w.")) {
2500 if (VecWidth == 128)
2501 IID = Intrinsic::x86_sse2_pmulh_w;
2502 else if (VecWidth == 256)
2503 IID = Intrinsic::x86_avx2_pmulh_w;
2504 else if (VecWidth == 512)
2505 IID = Intrinsic::x86_avx512_pmulh_w_512;
2508 }
else if (Name.starts_with(
"pmulhu.w.")) {
2509 if (VecWidth == 128)
2510 IID = Intrinsic::x86_sse2_pmulhu_w;
2511 else if (VecWidth == 256)
2512 IID = Intrinsic::x86_avx2_pmulhu_w;
2513 else if (VecWidth == 512)
2514 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2517 }
else if (Name.starts_with(
"pmaddw.d.")) {
2518 if (VecWidth == 128)
2519 IID = Intrinsic::x86_sse2_pmadd_wd;
2520 else if (VecWidth == 256)
2521 IID = Intrinsic::x86_avx2_pmadd_wd;
2522 else if (VecWidth == 512)
2523 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2526 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2527 if (VecWidth == 128)
2528 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2529 else if (VecWidth == 256)
2530 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2531 else if (VecWidth == 512)
2532 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2535 }
else if (Name.starts_with(
"packsswb.")) {
2536 if (VecWidth == 128)
2537 IID = Intrinsic::x86_sse2_packsswb_128;
2538 else if (VecWidth == 256)
2539 IID = Intrinsic::x86_avx2_packsswb;
2540 else if (VecWidth == 512)
2541 IID = Intrinsic::x86_avx512_packsswb_512;
2544 }
else if (Name.starts_with(
"packssdw.")) {
2545 if (VecWidth == 128)
2546 IID = Intrinsic::x86_sse2_packssdw_128;
2547 else if (VecWidth == 256)
2548 IID = Intrinsic::x86_avx2_packssdw;
2549 else if (VecWidth == 512)
2550 IID = Intrinsic::x86_avx512_packssdw_512;
2553 }
else if (Name.starts_with(
"packuswb.")) {
2554 if (VecWidth == 128)
2555 IID = Intrinsic::x86_sse2_packuswb_128;
2556 else if (VecWidth == 256)
2557 IID = Intrinsic::x86_avx2_packuswb;
2558 else if (VecWidth == 512)
2559 IID = Intrinsic::x86_avx512_packuswb_512;
2562 }
else if (Name.starts_with(
"packusdw.")) {
2563 if (VecWidth == 128)
2564 IID = Intrinsic::x86_sse41_packusdw;
2565 else if (VecWidth == 256)
2566 IID = Intrinsic::x86_avx2_packusdw;
2567 else if (VecWidth == 512)
2568 IID = Intrinsic::x86_avx512_packusdw_512;
2571 }
else if (Name.starts_with(
"vpermilvar.")) {
2572 if (VecWidth == 128 && EltWidth == 32)
2573 IID = Intrinsic::x86_avx_vpermilvar_ps;
2574 else if (VecWidth == 128 && EltWidth == 64)
2575 IID = Intrinsic::x86_avx_vpermilvar_pd;
2576 else if (VecWidth == 256 && EltWidth == 32)
2577 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2578 else if (VecWidth == 256 && EltWidth == 64)
2579 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2580 else if (VecWidth == 512 && EltWidth == 32)
2581 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2582 else if (VecWidth == 512 && EltWidth == 64)
2583 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2586 }
else if (Name ==
"cvtpd2dq.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2588 }
else if (Name ==
"cvtpd2ps.256") {
2589 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2590 }
else if (Name ==
"cvttpd2dq.256") {
2591 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2592 }
else if (Name ==
"cvttps2dq.128") {
2593 IID = Intrinsic::x86_sse2_cvttps2dq;
2594 }
else if (Name ==
"cvttps2dq.256") {
2595 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2596 }
else if (Name.starts_with(
"permvar.")) {
2598 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2599 IID = Intrinsic::x86_avx2_permps;
2600 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2601 IID = Intrinsic::x86_avx2_permd;
2602 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_df_256;
2604 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_di_256;
2606 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_sf_512;
2608 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_si_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_df_512;
2612 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2613 IID = Intrinsic::x86_avx512_permvar_di_512;
2614 else if (VecWidth == 128 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_128;
2616 else if (VecWidth == 256 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_256;
2618 else if (VecWidth == 512 && EltWidth == 16)
2619 IID = Intrinsic::x86_avx512_permvar_hi_512;
2620 else if (VecWidth == 128 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_128;
2622 else if (VecWidth == 256 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_256;
2624 else if (VecWidth == 512 && EltWidth == 8)
2625 IID = Intrinsic::x86_avx512_permvar_qi_512;
2628 }
else if (Name.starts_with(
"dbpsadbw.")) {
2629 if (VecWidth == 128)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2631 else if (VecWidth == 256)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2633 else if (VecWidth == 512)
2634 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2637 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2638 if (VecWidth == 128)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2640 else if (VecWidth == 256)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2642 else if (VecWidth == 512)
2643 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2646 }
else if (Name.starts_with(
"conflict.")) {
2647 if (Name[9] ==
'd' && VecWidth == 128)
2648 IID = Intrinsic::x86_avx512_conflict_d_128;
2649 else if (Name[9] ==
'd' && VecWidth == 256)
2650 IID = Intrinsic::x86_avx512_conflict_d_256;
2651 else if (Name[9] ==
'd' && VecWidth == 512)
2652 IID = Intrinsic::x86_avx512_conflict_d_512;
2653 else if (Name[9] ==
'q' && VecWidth == 128)
2654 IID = Intrinsic::x86_avx512_conflict_q_128;
2655 else if (Name[9] ==
'q' && VecWidth == 256)
2656 IID = Intrinsic::x86_avx512_conflict_q_256;
2657 else if (Name[9] ==
'q' && VecWidth == 512)
2658 IID = Intrinsic::x86_avx512_conflict_q_512;
2661 }
else if (Name.starts_with(
"pavg.")) {
2662 if (Name[5] ==
'b' && VecWidth == 128)
2663 IID = Intrinsic::x86_sse2_pavg_b;
2664 else if (Name[5] ==
'b' && VecWidth == 256)
2665 IID = Intrinsic::x86_avx2_pavg_b;
2666 else if (Name[5] ==
'b' && VecWidth == 512)
2667 IID = Intrinsic::x86_avx512_pavg_b_512;
2668 else if (Name[5] ==
'w' && VecWidth == 128)
2669 IID = Intrinsic::x86_sse2_pavg_w;
2670 else if (Name[5] ==
'w' && VecWidth == 256)
2671 IID = Intrinsic::x86_avx2_pavg_w;
2672 else if (Name[5] ==
'w' && VecWidth == 512)
2673 IID = Intrinsic::x86_avx512_pavg_w_512;
2682 Rep = Builder.CreateIntrinsic(IID, Args);
2693 if (AsmStr->find(
"mov\tfp") == 0 &&
2694 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2695 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2696 AsmStr->replace(Pos, 1,
";");
2702 Value *Rep =
nullptr;
2704 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2706 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2707 Value *Cmp = Builder.CreateICmpSGE(
2709 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2710 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2711 Type *Ty = (Name ==
"abs.bf16")
2715 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2716 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2717 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2718 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2719 : Intrinsic::nvvm_fabs;
2720 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2721 }
else if (Name.consume_front(
"ex2.approx.")) {
2723 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2724 : Intrinsic::nvvm_ex2_approx;
2725 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2726 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2727 Name.starts_with(
"atomic.load.add.f64.p")) {
2732 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2733 Name.starts_with(
"atomic.load.dec.32.p")) {
2738 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2740 }
else if (Name ==
"clz.ll") {
2743 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2744 {Arg, Builder.getFalse()},
2746 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2747 }
else if (Name ==
"popc.ll") {
2751 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2752 Arg,
nullptr,
"ctpop");
2753 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2754 }
else if (Name ==
"h2f") {
2756 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2757 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2758 }
else if (Name.consume_front(
"bitcast.") &&
2759 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2762 }
else if (Name ==
"rotate.b32") {
2765 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2766 {Arg, Arg, ShiftAmt});
2767 }
else if (Name ==
"rotate.b64") {
2771 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2772 {Arg, Arg, ZExtShiftAmt});
2773 }
else if (Name ==
"rotate.right.b64") {
2777 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2778 {Arg, Arg, ZExtShiftAmt});
2779 }
else if (Name ==
"swap.lo.hi.b64") {
2782 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2783 {Arg, Arg, Builder.getInt64(32)});
2784 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2787 Name.starts_with(
".to.gen"))) {
2789 }
else if (Name.consume_front(
"ldg.global")) {
2793 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2796 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2798 }
else if (Name ==
"tanh.approx.f32") {
2802 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2804 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2806 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2807 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2809 }
else if (Name ==
"barrier") {
2810 Rep = Builder.CreateIntrinsic(
2811 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2813 }
else if (Name ==
"barrier.sync") {
2814 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2816 }
else if (Name ==
"barrier.sync.cnt") {
2817 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2819 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2820 Name ==
"barrier0.or") {
2822 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2826 .
Case(
"barrier0.popc",
2827 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2828 .
Case(
"barrier0.and",
2829 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2830 .
Case(
"barrier0.or",
2831 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2832 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2833 Rep = Builder.CreateZExt(Bar, CI->
getType());
2837 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2847 ? Builder.CreateBitCast(Arg, NewType)
2850 Rep = Builder.CreateCall(NewFn, Args);
2851 if (
F->getReturnType()->isIntegerTy())
2852 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2862 Value *Rep =
nullptr;
2864 if (Name.starts_with(
"sse4a.movnt.")) {
2876 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2879 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2880 }
else if (Name.starts_with(
"avx.movnt.") ||
2881 Name.starts_with(
"avx512.storent.")) {
2893 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2894 }
else if (Name ==
"sse2.storel.dq") {
2899 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2900 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2901 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2902 }
else if (Name.starts_with(
"sse.storeu.") ||
2903 Name.starts_with(
"sse2.storeu.") ||
2904 Name.starts_with(
"avx.storeu.")) {
2907 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2908 }
else if (Name ==
"avx512.mask.store.ss") {
2912 }
else if (Name.starts_with(
"avx512.mask.store")) {
2914 bool Aligned = Name[17] !=
'u';
2917 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2920 bool CmpEq = Name[9] ==
'e';
2923 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2924 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2931 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2932 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2934 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2935 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2936 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2937 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2938 Name.starts_with(
"sse2.sqrt.p") ||
2939 Name.starts_with(
"sse.sqrt.p")) {
2940 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2941 {CI->getArgOperand(0)});
2942 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2946 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2947 : Intrinsic::x86_avx512_sqrt_pd_512;
2950 Rep = Builder.CreateIntrinsic(IID, Args);
2952 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2953 {CI->getArgOperand(0)});
2957 }
else if (Name.starts_with(
"avx512.ptestm") ||
2958 Name.starts_with(
"avx512.ptestnm")) {
2962 Rep = Builder.CreateAnd(Op0, Op1);
2968 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2970 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2973 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2976 }
else if (Name.starts_with(
"avx512.kunpck")) {
2981 for (
unsigned i = 0; i != NumElts; ++i)
2990 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2991 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2992 }
else if (Name ==
"avx512.kand.w") {
2995 Rep = Builder.CreateAnd(
LHS,
RHS);
2996 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2997 }
else if (Name ==
"avx512.kandn.w") {
3000 LHS = Builder.CreateNot(
LHS);
3001 Rep = Builder.CreateAnd(
LHS,
RHS);
3002 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3003 }
else if (Name ==
"avx512.kor.w") {
3006 Rep = Builder.CreateOr(
LHS,
RHS);
3007 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3008 }
else if (Name ==
"avx512.kxor.w") {
3011 Rep = Builder.CreateXor(
LHS,
RHS);
3012 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3013 }
else if (Name ==
"avx512.kxnor.w") {
3016 LHS = Builder.CreateNot(
LHS);
3017 Rep = Builder.CreateXor(
LHS,
RHS);
3018 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3019 }
else if (Name ==
"avx512.knot.w") {
3021 Rep = Builder.CreateNot(Rep);
3022 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3023 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3026 Rep = Builder.CreateOr(
LHS,
RHS);
3027 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3029 if (Name[14] ==
'c')
3033 Rep = Builder.CreateICmpEQ(Rep,
C);
3034 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3035 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3036 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3037 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3038 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3041 ConstantInt::get(I32Ty, 0));
3043 ConstantInt::get(I32Ty, 0));
3045 if (Name.contains(
".add."))
3046 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3047 else if (Name.contains(
".sub."))
3048 EltOp = Builder.CreateFSub(Elt0, Elt1);
3049 else if (Name.contains(
".mul."))
3050 EltOp = Builder.CreateFMul(Elt0, Elt1);
3052 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3053 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3054 ConstantInt::get(I32Ty, 0));
3055 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3057 bool CmpEq = Name[16] ==
'e';
3059 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3068 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3071 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3074 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3081 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3086 if (VecWidth == 128 && EltWidth == 32)
3087 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3088 else if (VecWidth == 256 && EltWidth == 32)
3089 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3090 else if (VecWidth == 512 && EltWidth == 32)
3091 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3092 else if (VecWidth == 128 && EltWidth == 64)
3093 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3094 else if (VecWidth == 256 && EltWidth == 64)
3095 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3096 else if (VecWidth == 512 && EltWidth == 64)
3097 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3104 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3106 Type *OpTy = Args[0]->getType();
3110 if (VecWidth == 128 && EltWidth == 32)
3111 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3112 else if (VecWidth == 256 && EltWidth == 32)
3113 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3114 else if (VecWidth == 512 && EltWidth == 32)
3115 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3116 else if (VecWidth == 128 && EltWidth == 64)
3117 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3118 else if (VecWidth == 256 && EltWidth == 64)
3119 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3120 else if (VecWidth == 512 && EltWidth == 64)
3121 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3126 if (VecWidth == 512)
3128 Args.push_back(Mask);
3130 Rep = Builder.CreateIntrinsic(IID, Args);
3131 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3135 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3138 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3139 Name.starts_with(
"avx512.cvtw2mask.") ||
3140 Name.starts_with(
"avx512.cvtd2mask.") ||
3141 Name.starts_with(
"avx512.cvtq2mask.")) {
3146 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3147 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3148 Name.starts_with(
"avx512.mask.pabs")) {
3150 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3151 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3152 Name.starts_with(
"avx512.mask.pmaxs")) {
3154 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3155 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3156 Name.starts_with(
"avx512.mask.pmaxu")) {
3158 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3159 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3160 Name.starts_with(
"avx512.mask.pmins")) {
3162 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3163 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3164 Name.starts_with(
"avx512.mask.pminu")) {
3166 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3167 Name ==
"avx512.pmulu.dq.512" ||
3168 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3170 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3171 Name ==
"avx512.pmul.dq.512" ||
3172 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3174 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3175 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3180 }
else if (Name ==
"avx512.cvtusi2sd") {
3185 }
else if (Name ==
"sse2.cvtss2sd") {
3187 Rep = Builder.CreateFPExt(
3190 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3191 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3192 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3193 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3194 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3195 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3196 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3197 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3198 Name ==
"avx512.mask.cvtqq2ps.256" ||
3199 Name ==
"avx512.mask.cvtqq2ps.512" ||
3200 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3201 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3202 Name ==
"avx.cvt.ps2.pd.256" ||
3203 Name ==
"avx512.mask.cvtps2pd.128" ||
3204 Name ==
"avx512.mask.cvtps2pd.256") {
3209 unsigned NumDstElts = DstTy->getNumElements();
3211 assert(NumDstElts == 2 &&
"Unexpected vector size");
3212 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3215 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3216 bool IsUnsigned = Name.contains(
"cvtu");
3218 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3222 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3223 : Intrinsic::x86_avx512_sitofp_round;
3224 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3227 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3228 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3234 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3235 Name.starts_with(
"vcvtph2ps.")) {
3239 unsigned NumDstElts = DstTy->getNumElements();
3240 if (NumDstElts != SrcTy->getNumElements()) {
3241 assert(NumDstElts == 4 &&
"Unexpected vector size");
3242 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3244 Rep = Builder.CreateBitCast(
3246 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3250 }
else if (Name.starts_with(
"avx512.mask.load")) {
3252 bool Aligned = Name[16] !=
'u';
3255 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3258 ResultTy->getNumElements());
3260 Rep = Builder.CreateIntrinsic(
3261 Intrinsic::masked_expandload, ResultTy,
3263 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3269 Rep = Builder.CreateIntrinsic(
3270 Intrinsic::masked_compressstore, ResultTy,
3272 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3273 Name.starts_with(
"avx512.mask.expand.")) {
3277 ResultTy->getNumElements());
3279 bool IsCompress = Name[12] ==
'c';
3280 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3281 : Intrinsic::x86_avx512_mask_expand;
3282 Rep = Builder.CreateIntrinsic(
3284 }
else if (Name.starts_with(
"xop.vpcom")) {
3286 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3287 Name.ends_with(
"uq"))
3289 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3290 Name.ends_with(
"d") || Name.ends_with(
"q"))
3299 Name = Name.substr(9);
3300 if (Name.starts_with(
"lt"))
3302 else if (Name.starts_with(
"le"))
3304 else if (Name.starts_with(
"gt"))
3306 else if (Name.starts_with(
"ge"))
3308 else if (Name.starts_with(
"eq"))
3310 else if (Name.starts_with(
"ne"))
3312 else if (Name.starts_with(
"false"))
3314 else if (Name.starts_with(
"true"))
3321 }
else if (Name.starts_with(
"xop.vpcmov")) {
3323 Value *NotSel = Builder.CreateNot(Sel);
3326 Rep = Builder.CreateOr(Sel0, Sel1);
3327 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3328 Name.starts_with(
"avx512.mask.prol")) {
3330 }
else if (Name.starts_with(
"avx512.pror") ||
3331 Name.starts_with(
"avx512.mask.pror")) {
3333 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3334 Name.starts_with(
"avx512.mask.vpshld") ||
3335 Name.starts_with(
"avx512.maskz.vpshld")) {
3336 bool ZeroMask = Name[11] ==
'z';
3338 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3339 Name.starts_with(
"avx512.mask.vpshrd") ||
3340 Name.starts_with(
"avx512.maskz.vpshrd")) {
3341 bool ZeroMask = Name[11] ==
'z';
3343 }
else if (Name ==
"sse42.crc32.64.8") {
3346 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3348 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3349 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3350 Name.starts_with(
"avx512.vbroadcast.s")) {
3353 Type *EltTy = VecTy->getElementType();
3354 unsigned EltNum = VecTy->getNumElements();
3358 for (
unsigned I = 0;
I < EltNum; ++
I)
3359 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3360 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3361 Name.starts_with(
"sse41.pmovzx") ||
3362 Name.starts_with(
"avx2.pmovsx") ||
3363 Name.starts_with(
"avx2.pmovzx") ||
3364 Name.starts_with(
"avx512.mask.pmovsx") ||
3365 Name.starts_with(
"avx512.mask.pmovzx")) {
3367 unsigned NumDstElts = DstTy->getNumElements();
3371 for (
unsigned i = 0; i != NumDstElts; ++i)
3376 bool DoSext = Name.contains(
"pmovsx");
3378 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3383 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3384 Name ==
"avx512.mask.pmov.qd.512" ||
3385 Name ==
"avx512.mask.pmov.wb.256" ||
3386 Name ==
"avx512.mask.pmov.wb.512") {
3391 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3392 Name ==
"avx2.vbroadcasti128") {
3398 if (NumSrcElts == 2)
3399 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3401 Rep = Builder.CreateShuffleVector(Load,
3403 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3404 Name.starts_with(
"avx512.mask.shuf.f")) {
3409 unsigned ControlBitsMask = NumLanes - 1;
3410 unsigned NumControlBits = NumLanes / 2;
3413 for (
unsigned l = 0; l != NumLanes; ++l) {
3414 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3416 if (l >= NumLanes / 2)
3417 LaneMask += NumLanes;
3418 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3419 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3425 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3426 Name.starts_with(
"avx512.mask.broadcasti")) {
3429 unsigned NumDstElts =
3433 for (
unsigned i = 0; i != NumDstElts; ++i)
3434 ShuffleMask[i] = i % NumSrcElts;
3440 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3441 Name.starts_with(
"avx2.vbroadcast") ||
3442 Name.starts_with(
"avx512.pbroadcast") ||
3443 Name.starts_with(
"avx512.mask.broadcast.s")) {
3450 Rep = Builder.CreateShuffleVector(
Op, M);
3455 }
else if (Name.starts_with(
"sse2.padds.") ||
3456 Name.starts_with(
"avx2.padds.") ||
3457 Name.starts_with(
"avx512.padds.") ||
3458 Name.starts_with(
"avx512.mask.padds.")) {
3460 }
else if (Name.starts_with(
"sse2.psubs.") ||
3461 Name.starts_with(
"avx2.psubs.") ||
3462 Name.starts_with(
"avx512.psubs.") ||
3463 Name.starts_with(
"avx512.mask.psubs.")) {
3465 }
else if (Name.starts_with(
"sse2.paddus.") ||
3466 Name.starts_with(
"avx2.paddus.") ||
3467 Name.starts_with(
"avx512.mask.paddus.")) {
3469 }
else if (Name.starts_with(
"sse2.psubus.") ||
3470 Name.starts_with(
"avx2.psubus.") ||
3471 Name.starts_with(
"avx512.mask.psubus.")) {
3473 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3478 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3482 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3487 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3492 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3493 Name ==
"avx512.psll.dq.512") {
3497 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3498 Name ==
"avx512.psrl.dq.512") {
3502 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3503 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3504 Name.starts_with(
"avx2.pblendd.")) {
3509 unsigned NumElts = VecTy->getNumElements();
3512 for (
unsigned i = 0; i != NumElts; ++i)
3513 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3515 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3516 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3517 Name ==
"avx2.vinserti128" ||
3518 Name.starts_with(
"avx512.mask.insert")) {
3522 unsigned DstNumElts =
3524 unsigned SrcNumElts =
3526 unsigned Scale = DstNumElts / SrcNumElts;
3533 for (
unsigned i = 0; i != SrcNumElts; ++i)
3535 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3536 Idxs[i] = SrcNumElts;
3537 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3551 for (
unsigned i = 0; i != DstNumElts; ++i)
3554 for (
unsigned i = 0; i != SrcNumElts; ++i)
3555 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3556 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3562 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3563 Name ==
"avx2.vextracti128" ||
3564 Name.starts_with(
"avx512.mask.vextract")) {
3567 unsigned DstNumElts =
3569 unsigned SrcNumElts =
3571 unsigned Scale = SrcNumElts / DstNumElts;
3578 for (
unsigned i = 0; i != DstNumElts; ++i) {
3579 Idxs[i] = i + (Imm * DstNumElts);
3581 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3587 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3588 Name.starts_with(
"avx512.mask.perm.di.")) {
3592 unsigned NumElts = VecTy->getNumElements();
3595 for (
unsigned i = 0; i != NumElts; ++i)
3596 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3598 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3603 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3615 unsigned HalfSize = NumElts / 2;
3627 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3628 for (
unsigned i = 0; i < HalfSize; ++i)
3629 ShuffleMask[i] = StartIndex + i;
3632 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3633 for (
unsigned i = 0; i < HalfSize; ++i)
3634 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3636 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3638 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3639 Name.starts_with(
"avx512.mask.vpermil.p") ||
3640 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3644 unsigned NumElts = VecTy->getNumElements();
3646 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3647 unsigned IdxMask = ((1 << IdxSize) - 1);
3653 for (
unsigned i = 0; i != NumElts; ++i)
3654 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3656 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3661 }
else if (Name ==
"sse2.pshufl.w" ||
3662 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3667 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3671 for (
unsigned l = 0; l != NumElts; l += 8) {
3672 for (
unsigned i = 0; i != 4; ++i)
3673 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3674 for (
unsigned i = 4; i != 8; ++i)
3675 Idxs[i + l] = i + l;
3678 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3683 }
else if (Name ==
"sse2.pshufh.w" ||
3684 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3689 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3693 for (
unsigned l = 0; l != NumElts; l += 8) {
3694 for (
unsigned i = 0; i != 4; ++i)
3695 Idxs[i + l] = i + l;
3696 for (
unsigned i = 0; i != 4; ++i)
3697 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3700 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3705 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3712 unsigned HalfLaneElts = NumLaneElts / 2;
3715 for (
unsigned i = 0; i != NumElts; ++i) {
3717 Idxs[i] = i - (i % NumLaneElts);
3719 if ((i % NumLaneElts) >= HalfLaneElts)
3723 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3726 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3730 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3731 Name.starts_with(
"avx512.mask.movshdup") ||
3732 Name.starts_with(
"avx512.mask.movsldup")) {
3738 if (Name.starts_with(
"avx512.mask.movshdup."))
3742 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3743 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3744 Idxs[i + l + 0] = i + l +
Offset;
3745 Idxs[i + l + 1] = i + l +
Offset;
3748 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3752 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3753 Name.starts_with(
"avx512.mask.unpckl.")) {
3760 for (
int l = 0; l != NumElts; l += NumLaneElts)
3761 for (
int i = 0; i != NumLaneElts; ++i)
3762 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3764 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3768 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3769 Name.starts_with(
"avx512.mask.unpckh.")) {
3776 for (
int l = 0; l != NumElts; l += NumLaneElts)
3777 for (
int i = 0; i != NumLaneElts; ++i)
3778 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3780 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3784 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3785 Name.starts_with(
"avx512.mask.pand.")) {
3788 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3790 Rep = Builder.CreateBitCast(Rep, FTy);
3793 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3794 Name.starts_with(
"avx512.mask.pandn.")) {
3797 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3798 Rep = Builder.CreateAnd(Rep,
3800 Rep = Builder.CreateBitCast(Rep, FTy);
3803 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3804 Name.starts_with(
"avx512.mask.por.")) {
3807 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3809 Rep = Builder.CreateBitCast(Rep, FTy);
3812 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3813 Name.starts_with(
"avx512.mask.pxor.")) {
3816 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3818 Rep = Builder.CreateBitCast(Rep, FTy);
3821 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3825 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3829 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3833 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3834 if (Name.ends_with(
".512")) {
3836 if (Name[17] ==
's')
3837 IID = Intrinsic::x86_avx512_add_ps_512;
3839 IID = Intrinsic::x86_avx512_add_pd_512;
3841 Rep = Builder.CreateIntrinsic(
3849 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3850 if (Name.ends_with(
".512")) {
3852 if (Name[17] ==
's')
3853 IID = Intrinsic::x86_avx512_div_ps_512;
3855 IID = Intrinsic::x86_avx512_div_pd_512;
3857 Rep = Builder.CreateIntrinsic(
3865 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3866 if (Name.ends_with(
".512")) {
3868 if (Name[17] ==
's')
3869 IID = Intrinsic::x86_avx512_mul_ps_512;
3871 IID = Intrinsic::x86_avx512_mul_pd_512;
3873 Rep = Builder.CreateIntrinsic(
3881 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3882 if (Name.ends_with(
".512")) {
3884 if (Name[17] ==
's')
3885 IID = Intrinsic::x86_avx512_sub_ps_512;
3887 IID = Intrinsic::x86_avx512_sub_pd_512;
3889 Rep = Builder.CreateIntrinsic(
3897 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3898 Name.starts_with(
"avx512.mask.min.p")) &&
3899 Name.drop_front(18) ==
".512") {
3900 bool IsDouble = Name[17] ==
'd';
3901 bool IsMin = Name[13] ==
'i';
3903 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3904 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3907 Rep = Builder.CreateIntrinsic(
3912 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3914 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3915 {CI->getArgOperand(0), Builder.getInt1(false)});
3918 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3919 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3920 bool IsVariable = Name[16] ==
'v';
3921 char Size = Name[16] ==
'.' ? Name[17]
3922 : Name[17] ==
'.' ? Name[18]
3923 : Name[18] ==
'.' ? Name[19]
3927 if (IsVariable && Name[17] !=
'.') {
3928 if (
Size ==
'd' && Name[17] ==
'2')
3929 IID = Intrinsic::x86_avx2_psllv_q;
3930 else if (
Size ==
'd' && Name[17] ==
'4')
3931 IID = Intrinsic::x86_avx2_psllv_q_256;
3932 else if (
Size ==
's' && Name[17] ==
'4')
3933 IID = Intrinsic::x86_avx2_psllv_d;
3934 else if (
Size ==
's' && Name[17] ==
'8')
3935 IID = Intrinsic::x86_avx2_psllv_d_256;
3936 else if (
Size ==
'h' && Name[17] ==
'8')
3937 IID = Intrinsic::x86_avx512_psllv_w_128;
3938 else if (
Size ==
'h' && Name[17] ==
'1')
3939 IID = Intrinsic::x86_avx512_psllv_w_256;
3940 else if (Name[17] ==
'3' && Name[18] ==
'2')
3941 IID = Intrinsic::x86_avx512_psllv_w_512;
3944 }
else if (Name.ends_with(
".128")) {
3946 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3947 : Intrinsic::x86_sse2_psll_d;
3948 else if (
Size ==
'q')
3949 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3950 : Intrinsic::x86_sse2_psll_q;
3951 else if (
Size ==
'w')
3952 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3953 : Intrinsic::x86_sse2_psll_w;
3956 }
else if (Name.ends_with(
".256")) {
3958 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3959 : Intrinsic::x86_avx2_psll_d;
3960 else if (
Size ==
'q')
3961 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3962 : Intrinsic::x86_avx2_psll_q;
3963 else if (
Size ==
'w')
3964 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3965 : Intrinsic::x86_avx2_psll_w;
3970 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3971 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3972 : Intrinsic::x86_avx512_psll_d_512;
3973 else if (
Size ==
'q')
3974 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3975 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3976 : Intrinsic::x86_avx512_psll_q_512;
3977 else if (
Size ==
'w')
3978 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3979 : Intrinsic::x86_avx512_psll_w_512;
3985 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3986 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3987 bool IsVariable = Name[16] ==
'v';
3988 char Size = Name[16] ==
'.' ? Name[17]
3989 : Name[17] ==
'.' ? Name[18]
3990 : Name[18] ==
'.' ? Name[19]
3994 if (IsVariable && Name[17] !=
'.') {
3995 if (
Size ==
'd' && Name[17] ==
'2')
3996 IID = Intrinsic::x86_avx2_psrlv_q;
3997 else if (
Size ==
'd' && Name[17] ==
'4')
3998 IID = Intrinsic::x86_avx2_psrlv_q_256;
3999 else if (
Size ==
's' && Name[17] ==
'4')
4000 IID = Intrinsic::x86_avx2_psrlv_d;
4001 else if (
Size ==
's' && Name[17] ==
'8')
4002 IID = Intrinsic::x86_avx2_psrlv_d_256;
4003 else if (
Size ==
'h' && Name[17] ==
'8')
4004 IID = Intrinsic::x86_avx512_psrlv_w_128;
4005 else if (
Size ==
'h' && Name[17] ==
'1')
4006 IID = Intrinsic::x86_avx512_psrlv_w_256;
4007 else if (Name[17] ==
'3' && Name[18] ==
'2')
4008 IID = Intrinsic::x86_avx512_psrlv_w_512;
4011 }
else if (Name.ends_with(
".128")) {
4013 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4014 : Intrinsic::x86_sse2_psrl_d;
4015 else if (
Size ==
'q')
4016 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4017 : Intrinsic::x86_sse2_psrl_q;
4018 else if (
Size ==
'w')
4019 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4020 : Intrinsic::x86_sse2_psrl_w;
4023 }
else if (Name.ends_with(
".256")) {
4025 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4026 : Intrinsic::x86_avx2_psrl_d;
4027 else if (
Size ==
'q')
4028 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4029 : Intrinsic::x86_avx2_psrl_q;
4030 else if (
Size ==
'w')
4031 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4032 : Intrinsic::x86_avx2_psrl_w;
4037 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4038 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4039 : Intrinsic::x86_avx512_psrl_d_512;
4040 else if (
Size ==
'q')
4041 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4042 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4043 : Intrinsic::x86_avx512_psrl_q_512;
4044 else if (
Size ==
'w')
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4046 : Intrinsic::x86_avx512_psrl_w_512;
4052 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4053 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4054 bool IsVariable = Name[16] ==
'v';
4055 char Size = Name[16] ==
'.' ? Name[17]
4056 : Name[17] ==
'.' ? Name[18]
4057 : Name[18] ==
'.' ? Name[19]
4061 if (IsVariable && Name[17] !=
'.') {
4062 if (
Size ==
's' && Name[17] ==
'4')
4063 IID = Intrinsic::x86_avx2_psrav_d;
4064 else if (
Size ==
's' && Name[17] ==
'8')
4065 IID = Intrinsic::x86_avx2_psrav_d_256;
4066 else if (
Size ==
'h' && Name[17] ==
'8')
4067 IID = Intrinsic::x86_avx512_psrav_w_128;
4068 else if (
Size ==
'h' && Name[17] ==
'1')
4069 IID = Intrinsic::x86_avx512_psrav_w_256;
4070 else if (Name[17] ==
'3' && Name[18] ==
'2')
4071 IID = Intrinsic::x86_avx512_psrav_w_512;
4074 }
else if (Name.ends_with(
".128")) {
4076 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4077 : Intrinsic::x86_sse2_psra_d;
4078 else if (
Size ==
'q')
4079 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4080 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4081 : Intrinsic::x86_avx512_psra_q_128;
4082 else if (
Size ==
'w')
4083 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4084 : Intrinsic::x86_sse2_psra_w;
4087 }
else if (Name.ends_with(
".256")) {
4089 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4090 : Intrinsic::x86_avx2_psra_d;
4091 else if (
Size ==
'q')
4092 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4093 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4094 : Intrinsic::x86_avx512_psra_q_256;
4095 else if (
Size ==
'w')
4096 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4097 : Intrinsic::x86_avx2_psra_w;
4102 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4103 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4104 : Intrinsic::x86_avx512_psra_d_512;
4105 else if (
Size ==
'q')
4106 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4107 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4108 : Intrinsic::x86_avx512_psra_q_512;
4109 else if (
Size ==
'w')
4110 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4111 : Intrinsic::x86_avx512_psra_w_512;
4117 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4119 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4121 }
else if (Name.ends_with(
".movntdqa")) {
4125 LoadInst *LI = Builder.CreateAlignedLoad(
4130 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4131 Name.starts_with(
"fma.vfmsub.") ||
4132 Name.starts_with(
"fma.vfnmadd.") ||
4133 Name.starts_with(
"fma.vfnmsub.")) {
4134 bool NegMul = Name[6] ==
'n';
4135 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4136 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4147 if (NegMul && !IsScalar)
4148 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4149 if (NegMul && IsScalar)
4150 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4152 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4154 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4158 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4166 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4170 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4171 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4172 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4173 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4174 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4175 bool IsMask3 = Name[11] ==
'3';
4176 bool IsMaskZ = Name[11] ==
'z';
4178 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4179 bool NegMul = Name[2] ==
'n';
4180 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4186 if (NegMul && (IsMask3 || IsMaskZ))
4187 A = Builder.CreateFNeg(
A);
4188 if (NegMul && !(IsMask3 || IsMaskZ))
4189 B = Builder.CreateFNeg(
B);
4191 C = Builder.CreateFNeg(
C);
4193 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4194 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4195 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4202 if (Name.back() ==
'd')
4203 IID = Intrinsic::x86_avx512_vfmadd_f64;
4205 IID = Intrinsic::x86_avx512_vfmadd_f32;
4206 Rep = Builder.CreateIntrinsic(IID,
Ops);
4208 Rep = Builder.CreateFMA(
A,
B,
C);
4217 if (NegAcc && IsMask3)
4222 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4224 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4225 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4226 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4227 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4228 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4229 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4230 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4231 bool IsMask3 = Name[11] ==
'3';
4232 bool IsMaskZ = Name[11] ==
'z';
4234 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4235 bool NegMul = Name[2] ==
'n';
4236 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4242 if (NegMul && (IsMask3 || IsMaskZ))
4243 A = Builder.CreateFNeg(
A);
4244 if (NegMul && !(IsMask3 || IsMaskZ))
4245 B = Builder.CreateFNeg(
B);
4247 C = Builder.CreateFNeg(
C);
4254 if (Name[Name.size() - 5] ==
's')
4255 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4257 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4261 Rep = Builder.CreateFMA(
A,
B,
C);
4269 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4273 if (VecWidth == 128 && EltWidth == 32)
4274 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4275 else if (VecWidth == 256 && EltWidth == 32)
4276 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4277 else if (VecWidth == 128 && EltWidth == 64)
4278 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4279 else if (VecWidth == 256 && EltWidth == 64)
4280 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4286 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4287 Rep = Builder.CreateIntrinsic(IID,
Ops);
4288 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4289 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4290 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4291 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4292 bool IsMask3 = Name[11] ==
'3';
4293 bool IsMaskZ = Name[11] ==
'z';
4295 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4296 bool IsSubAdd = Name[3] ==
's';
4300 if (Name[Name.size() - 5] ==
's')
4301 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4303 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4308 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4310 Rep = Builder.CreateIntrinsic(IID,
Ops);
4319 Value *Odd = Builder.CreateCall(FMA,
Ops);
4320 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4321 Value *Even = Builder.CreateCall(FMA,
Ops);
4327 for (
int i = 0; i != NumElts; ++i)
4328 Idxs[i] = i + (i % 2) * NumElts;
4330 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4338 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4339 Name.starts_with(
"avx512.maskz.pternlog.")) {
4340 bool ZeroMask = Name[11] ==
'z';
4344 if (VecWidth == 128 && EltWidth == 32)
4345 IID = Intrinsic::x86_avx512_pternlog_d_128;
4346 else if (VecWidth == 256 && EltWidth == 32)
4347 IID = Intrinsic::x86_avx512_pternlog_d_256;
4348 else if (VecWidth == 512 && EltWidth == 32)
4349 IID = Intrinsic::x86_avx512_pternlog_d_512;
4350 else if (VecWidth == 128 && EltWidth == 64)
4351 IID = Intrinsic::x86_avx512_pternlog_q_128;
4352 else if (VecWidth == 256 && EltWidth == 64)
4353 IID = Intrinsic::x86_avx512_pternlog_q_256;
4354 else if (VecWidth == 512 && EltWidth == 64)
4355 IID = Intrinsic::x86_avx512_pternlog_q_512;
4361 Rep = Builder.CreateIntrinsic(IID, Args);
4365 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4366 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4367 bool ZeroMask = Name[11] ==
'z';
4368 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4371 if (VecWidth == 128 && !
High)
4372 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4373 else if (VecWidth == 256 && !
High)
4374 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4375 else if (VecWidth == 512 && !
High)
4376 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4377 else if (VecWidth == 128 &&
High)
4378 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4379 else if (VecWidth == 256 &&
High)
4380 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4381 else if (VecWidth == 512 &&
High)
4382 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4388 Rep = Builder.CreateIntrinsic(IID, Args);
4392 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4393 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4394 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4395 bool ZeroMask = Name[11] ==
'z';
4396 bool IndexForm = Name[17] ==
'i';
4398 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4399 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4400 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4401 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4402 bool ZeroMask = Name[11] ==
'z';
4403 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4406 if (VecWidth == 128 && !IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4408 else if (VecWidth == 256 && !IsSaturating)
4409 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4410 else if (VecWidth == 512 && !IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4412 else if (VecWidth == 128 && IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4414 else if (VecWidth == 256 && IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4416 else if (VecWidth == 512 && IsSaturating)
4417 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4427 if (Args[1]->
getType()->isVectorTy() &&
4430 ->isIntegerTy(32) &&
4431 Args[2]->
getType()->isVectorTy() &&
4434 ->isIntegerTy(32)) {
4435 Type *NewArgType =
nullptr;
4436 if (VecWidth == 128)
4438 else if (VecWidth == 256)
4440 else if (VecWidth == 512)
4446 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4447 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4450 Rep = Builder.CreateIntrinsic(IID, Args);
4454 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4455 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4456 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4457 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4458 bool ZeroMask = Name[11] ==
'z';
4459 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4462 if (VecWidth == 128 && !IsSaturating)
4463 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4464 else if (VecWidth == 256 && !IsSaturating)
4465 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4466 else if (VecWidth == 512 && !IsSaturating)
4467 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4468 else if (VecWidth == 128 && IsSaturating)
4469 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4470 else if (VecWidth == 256 && IsSaturating)
4471 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4472 else if (VecWidth == 512 && IsSaturating)
4473 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4483 if (Args[1]->
getType()->isVectorTy() &&
4486 ->isIntegerTy(32) &&
4487 Args[2]->
getType()->isVectorTy() &&
4490 ->isIntegerTy(32)) {
4491 Type *NewArgType =
nullptr;
4492 if (VecWidth == 128)
4494 else if (VecWidth == 256)
4496 else if (VecWidth == 512)
4502 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4503 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4506 Rep = Builder.CreateIntrinsic(IID, Args);
4510 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4511 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4512 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4514 if (Name[0] ==
'a' && Name.back() ==
'2')
4515 IID = Intrinsic::x86_addcarry_32;
4516 else if (Name[0] ==
'a' && Name.back() ==
'4')
4517 IID = Intrinsic::x86_addcarry_64;
4518 else if (Name[0] ==
's' && Name.back() ==
'2')
4519 IID = Intrinsic::x86_subborrow_32;
4520 else if (Name[0] ==
's' && Name.back() ==
'4')
4521 IID = Intrinsic::x86_subborrow_64;
4528 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4531 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4534 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4538 }
else if (Name.starts_with(
"avx512.mask.") &&
4549 if (Name.starts_with(
"neon.bfcvt")) {
4550 if (Name.starts_with(
"neon.bfcvtn2")) {
4552 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4554 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4555 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4558 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4559 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4561 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4565 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4566 return Builder.CreateShuffleVector(
4569 return Builder.CreateFPTrunc(CI->
getOperand(0),
4572 }
else if (Name.starts_with(
"sve.fcvt")) {
4575 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4576 .
Case(
"sve.fcvtnt.bf16f32",
4577 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4589 if (Args[1]->
getType() != BadPredTy)
4592 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4593 BadPredTy, Args[1]);
4594 Args[1] = Builder.CreateIntrinsic(
4595 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4597 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4606 if (Name ==
"mve.vctp64.old") {
4609 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4612 Value *C1 = Builder.CreateIntrinsic(
4613 Intrinsic::arm_mve_pred_v2i,
4615 return Builder.CreateIntrinsic(
4616 Intrinsic::arm_mve_pred_i2v,
4618 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4619 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4620 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4621 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4623 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4624 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4625 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4626 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4628 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4629 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4630 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4631 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4632 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4633 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4634 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4635 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4636 std::vector<Type *> Tys;
4640 case Intrinsic::arm_mve_mull_int_predicated:
4641 case Intrinsic::arm_mve_vqdmull_predicated:
4642 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4645 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4646 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4647 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4651 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4655 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4659 case Intrinsic::arm_cde_vcx1q_predicated:
4660 case Intrinsic::arm_cde_vcx1qa_predicated:
4661 case Intrinsic::arm_cde_vcx2q_predicated:
4662 case Intrinsic::arm_cde_vcx2qa_predicated:
4663 case Intrinsic::arm_cde_vcx3q_predicated:
4664 case Intrinsic::arm_cde_vcx3qa_predicated:
4671 std::vector<Value *>
Ops;
4673 Type *Ty =
Op->getType();
4674 if (Ty->getScalarSizeInBits() == 1) {
4675 Value *C1 = Builder.CreateIntrinsic(
4676 Intrinsic::arm_mve_pred_v2i,
4678 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4683 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4698 auto UpgradeLegacyWMMAIUIntrinsicCall =
4703 Args.push_back(Builder.getFalse());
4707 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4714 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4719 NewCall->copyMetadata(*CI);
4723 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4724 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4725 "intrinsic should have 7 arguments");
4728 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4730 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4731 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4732 "intrinsic should have 8 arguments");
4737 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4740 switch (
F->getIntrinsicID()) {
4743 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4744 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4745 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4746 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4747 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4748 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4763 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4766 F->getParent(),
F->getIntrinsicID(), Overloads);
4771 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4776 NewCall->copyMetadata(*CI);
4777 NewCall->takeName(CI);
4799 if (NumOperands < 3)
4812 bool IsVolatile =
false;
4816 if (NumOperands > 3)
4821 if (NumOperands > 5) {
4823 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4837 if (VT->getElementType()->isIntegerTy(16)) {
4840 Val = Builder.CreateBitCast(Val, AsBF16);
4848 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4850 unsigned AddrSpace = PtrTy->getAddressSpace();
4853 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4855 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4860 MDNode *RangeNotPrivate =
4863 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4869 return Builder.CreateBitCast(RMW, RetTy);
4890 return MAV->getMetadata();
4897 return I->getDebugLoc().getAsMDNode();
4905 if (Name ==
"label") {
4908 }
else if (Name ==
"assign") {
4915 }
else if (Name ==
"declare") {
4920 }
else if (Name ==
"addr") {
4930 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4932 }
else if (Name ==
"value") {
4935 unsigned ExprOp = 2;
4949 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4957 int64_t OffsetVal =
Offset->getSExtValue();
4958 return Builder.CreateIntrinsic(OffsetVal >= 0
4959 ? Intrinsic::vector_splice_left
4960 : Intrinsic::vector_splice_right,
4962 {CI->getArgOperand(0), CI->getArgOperand(1),
4963 Builder.getInt32(std::abs(OffsetVal))});
4968 if (Name.starts_with(
"to.fp16")) {
4970 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4971 return Builder.CreateBitCast(Cast, CI->
getType());
4974 if (Name.starts_with(
"from.fp16")) {
4976 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4977 return Builder.CreateFPExt(Cast, CI->
getType());
5002 if (!Name.consume_front(
"llvm."))
5005 bool IsX86 = Name.consume_front(
"x86.");
5006 bool IsNVVM = Name.consume_front(
"nvvm.");
5007 bool IsAArch64 = Name.consume_front(
"aarch64.");
5008 bool IsARM = Name.consume_front(
"arm.");
5009 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5010 bool IsDbg = Name.consume_front(
"dbg.");
5012 (Name.consume_front(
"experimental.vector.splice") ||
5013 Name.consume_front(
"vector.splice")) &&
5014 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5015 Value *Rep =
nullptr;
5017 if (!IsX86 && Name ==
"stackprotectorcheck") {
5019 }
else if (IsNVVM) {
5023 }
else if (IsAArch64) {
5027 }
else if (IsAMDGCN) {
5031 }
else if (IsOldSplice) {
5033 }
else if (Name.consume_front(
"convert.")) {
5045 const auto &DefaultCase = [&]() ->
void {
5053 "Unknown function for CallBase upgrade and isn't just a name change");
5061 "Return type must have changed");
5062 assert(OldST->getNumElements() ==
5064 "Must have same number of elements");
5067 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5070 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5071 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5072 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5091 case Intrinsic::arm_neon_vst1:
5092 case Intrinsic::arm_neon_vst2:
5093 case Intrinsic::arm_neon_vst3:
5094 case Intrinsic::arm_neon_vst4:
5095 case Intrinsic::arm_neon_vst2lane:
5096 case Intrinsic::arm_neon_vst3lane:
5097 case Intrinsic::arm_neon_vst4lane: {
5099 NewCall = Builder.CreateCall(NewFn, Args);
5102 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5103 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5104 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5109 NewCall = Builder.CreateCall(NewFn, Args);
5112 case Intrinsic::aarch64_sve_ld3_sret:
5113 case Intrinsic::aarch64_sve_ld4_sret:
5114 case Intrinsic::aarch64_sve_ld2_sret: {
5122 Name = Name.substr(5);
5129 unsigned MinElts = RetTy->getMinNumElements() /
N;
5131 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5133 for (
unsigned I = 0;
I <
N;
I++) {
5134 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5135 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5141 case Intrinsic::coro_end: {
5144 NewCall = Builder.CreateCall(NewFn, Args);
5148 case Intrinsic::vector_extract: {
5150 Name = Name.substr(5);
5151 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5156 unsigned MinElts = RetTy->getMinNumElements();
5159 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5163 case Intrinsic::vector_insert: {
5165 Name = Name.substr(5);
5166 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5170 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5175 NewCall = Builder.CreateCall(
5179 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5185 assert(
N > 1 &&
"Create is expected to be between 2-4");
5188 unsigned MinElts = RetTy->getMinNumElements() /
N;
5189 for (
unsigned I = 0;
I <
N;
I++) {
5191 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5198 case Intrinsic::arm_neon_bfdot:
5199 case Intrinsic::arm_neon_bfmmla:
5200 case Intrinsic::arm_neon_bfmlalb:
5201 case Intrinsic::arm_neon_bfmlalt:
5202 case Intrinsic::aarch64_neon_bfdot:
5203 case Intrinsic::aarch64_neon_bfmmla:
5204 case Intrinsic::aarch64_neon_bfmlalb:
5205 case Intrinsic::aarch64_neon_bfmlalt: {
5208 "Mismatch between function args and call args");
5209 size_t OperandWidth =
5211 assert((OperandWidth == 64 || OperandWidth == 128) &&
5212 "Unexpected operand width");
5214 auto Iter = CI->
args().begin();
5215 Args.push_back(*Iter++);
5216 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5217 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5218 NewCall = Builder.CreateCall(NewFn, Args);
5222 case Intrinsic::bitreverse:
5223 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5226 case Intrinsic::ctlz:
5227 case Intrinsic::cttz: {
5234 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5238 case Intrinsic::objectsize: {
5239 Value *NullIsUnknownSize =
5243 NewCall = Builder.CreateCall(
5248 case Intrinsic::ctpop:
5249 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5251 case Intrinsic::dbg_value: {
5253 Name = Name.substr(5);
5255 if (Name.starts_with(
"dbg.addr")) {
5269 if (
Offset->isNullValue()) {
5270 NewCall = Builder.CreateCall(
5279 case Intrinsic::ptr_annotation:
5287 NewCall = Builder.CreateCall(
5296 case Intrinsic::var_annotation:
5303 NewCall = Builder.CreateCall(
5312 case Intrinsic::riscv_aes32dsi:
5313 case Intrinsic::riscv_aes32dsmi:
5314 case Intrinsic::riscv_aes32esi:
5315 case Intrinsic::riscv_aes32esmi:
5316 case Intrinsic::riscv_sm4ks:
5317 case Intrinsic::riscv_sm4ed: {
5327 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5328 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5334 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5335 Value *Res = NewCall;
5337 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5343 case Intrinsic::nvvm_mapa_shared_cluster: {
5347 Value *Res = NewCall;
5348 Res = Builder.CreateAddrSpaceCast(
5355 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5356 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5359 Args[0] = Builder.CreateAddrSpaceCast(
5362 NewCall = Builder.CreateCall(NewFn, Args);
5368 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5369 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5370 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5371 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5374 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5375 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5382 Args[0] = Builder.CreateAddrSpaceCast(
5391 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5393 NewCall = Builder.CreateCall(NewFn, Args);
5399 case Intrinsic::riscv_sha256sig0:
5400 case Intrinsic::riscv_sha256sig1:
5401 case Intrinsic::riscv_sha256sum0:
5402 case Intrinsic::riscv_sha256sum1:
5403 case Intrinsic::riscv_sm3p0:
5404 case Intrinsic::riscv_sm3p1: {
5411 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5413 NewCall = Builder.CreateCall(NewFn, Arg);
5415 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5422 case Intrinsic::x86_xop_vfrcz_ss:
5423 case Intrinsic::x86_xop_vfrcz_sd:
5424 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5427 case Intrinsic::x86_xop_vpermil2pd:
5428 case Intrinsic::x86_xop_vpermil2ps:
5429 case Intrinsic::x86_xop_vpermil2pd_256:
5430 case Intrinsic::x86_xop_vpermil2ps_256: {
5434 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5435 NewCall = Builder.CreateCall(NewFn, Args);
5439 case Intrinsic::x86_sse41_ptestc:
5440 case Intrinsic::x86_sse41_ptestz:
5441 case Intrinsic::x86_sse41_ptestnzc: {
5455 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5456 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5458 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5462 case Intrinsic::x86_rdtscp: {
5468 NewCall = Builder.CreateCall(NewFn);
5470 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5473 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5481 case Intrinsic::x86_sse41_insertps:
5482 case Intrinsic::x86_sse41_dppd:
5483 case Intrinsic::x86_sse41_dpps:
5484 case Intrinsic::x86_sse41_mpsadbw:
5485 case Intrinsic::x86_avx_dp_ps_256:
5486 case Intrinsic::x86_avx2_mpsadbw: {
5492 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5493 NewCall = Builder.CreateCall(NewFn, Args);
5497 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5498 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5499 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5500 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5501 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5502 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5508 NewCall = Builder.CreateCall(NewFn, Args);
5517 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5518 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5519 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5520 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5521 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5522 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5526 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5527 Args[1] = Builder.CreateBitCast(
5530 NewCall = Builder.CreateCall(NewFn, Args);
5531 Value *Res = Builder.CreateBitCast(
5539 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5540 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5541 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5545 Args[1] = Builder.CreateBitCast(
5547 Args[2] = Builder.CreateBitCast(
5550 NewCall = Builder.CreateCall(NewFn, Args);
5554 case Intrinsic::thread_pointer: {
5555 NewCall = Builder.CreateCall(NewFn, {});
5559 case Intrinsic::memcpy:
5560 case Intrinsic::memmove:
5561 case Intrinsic::memset: {
5577 NewCall = Builder.CreateCall(NewFn, Args);
5579 AttributeList NewAttrs = AttributeList::get(
5580 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5581 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5582 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5587 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5590 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5594 case Intrinsic::masked_load:
5595 case Intrinsic::masked_gather:
5596 case Intrinsic::masked_store:
5597 case Intrinsic::masked_scatter: {
5603 auto GetMaybeAlign = [](
Value *
Op) {
5613 auto GetAlign = [&](
Value *
Op) {
5622 case Intrinsic::masked_load:
5623 NewCall = Builder.CreateMaskedLoad(
5627 case Intrinsic::masked_gather:
5628 NewCall = Builder.CreateMaskedGather(
5634 case Intrinsic::masked_store:
5635 NewCall = Builder.CreateMaskedStore(
5639 case Intrinsic::masked_scatter:
5640 NewCall = Builder.CreateMaskedScatter(
5642 DL.getValueOrABITypeAlignment(
5656 case Intrinsic::lifetime_start:
5657 case Intrinsic::lifetime_end: {
5669 NewCall = Builder.CreateLifetimeStart(Ptr);
5671 NewCall = Builder.CreateLifetimeEnd(Ptr);
5680 case Intrinsic::x86_avx512_vpdpbusd_128:
5681 case Intrinsic::x86_avx512_vpdpbusd_256:
5682 case Intrinsic::x86_avx512_vpdpbusd_512:
5683 case Intrinsic::x86_avx512_vpdpbusds_128:
5684 case Intrinsic::x86_avx512_vpdpbusds_256:
5685 case Intrinsic::x86_avx512_vpdpbusds_512:
5686 case Intrinsic::x86_avx2_vpdpbssd_128:
5687 case Intrinsic::x86_avx2_vpdpbssd_256:
5688 case Intrinsic::x86_avx10_vpdpbssd_512:
5689 case Intrinsic::x86_avx2_vpdpbssds_128:
5690 case Intrinsic::x86_avx2_vpdpbssds_256:
5691 case Intrinsic::x86_avx10_vpdpbssds_512:
5692 case Intrinsic::x86_avx2_vpdpbsud_128:
5693 case Intrinsic::x86_avx2_vpdpbsud_256:
5694 case Intrinsic::x86_avx10_vpdpbsud_512:
5695 case Intrinsic::x86_avx2_vpdpbsuds_128:
5696 case Intrinsic::x86_avx2_vpdpbsuds_256:
5697 case Intrinsic::x86_avx10_vpdpbsuds_512:
5698 case Intrinsic::x86_avx2_vpdpbuud_128:
5699 case Intrinsic::x86_avx2_vpdpbuud_256:
5700 case Intrinsic::x86_avx10_vpdpbuud_512:
5701 case Intrinsic::x86_avx2_vpdpbuuds_128:
5702 case Intrinsic::x86_avx2_vpdpbuuds_256:
5703 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5708 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5709 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5711 NewCall = Builder.CreateCall(NewFn, Args);
5714 case Intrinsic::x86_avx512_vpdpwssd_128:
5715 case Intrinsic::x86_avx512_vpdpwssd_256:
5716 case Intrinsic::x86_avx512_vpdpwssd_512:
5717 case Intrinsic::x86_avx512_vpdpwssds_128:
5718 case Intrinsic::x86_avx512_vpdpwssds_256:
5719 case Intrinsic::x86_avx512_vpdpwssds_512:
5720 case Intrinsic::x86_avx2_vpdpwsud_128:
5721 case Intrinsic::x86_avx2_vpdpwsud_256:
5722 case Intrinsic::x86_avx10_vpdpwsud_512:
5723 case Intrinsic::x86_avx2_vpdpwsuds_128:
5724 case Intrinsic::x86_avx2_vpdpwsuds_256:
5725 case Intrinsic::x86_avx10_vpdpwsuds_512:
5726 case Intrinsic::x86_avx2_vpdpwusd_128:
5727 case Intrinsic::x86_avx2_vpdpwusd_256:
5728 case Intrinsic::x86_avx10_vpdpwusd_512:
5729 case Intrinsic::x86_avx2_vpdpwusds_128:
5730 case Intrinsic::x86_avx2_vpdpwusds_256:
5731 case Intrinsic::x86_avx10_vpdpwusds_512:
5732 case Intrinsic::x86_avx2_vpdpwuud_128:
5733 case Intrinsic::x86_avx2_vpdpwuud_256:
5734 case Intrinsic::x86_avx10_vpdpwuud_512:
5735 case Intrinsic::x86_avx2_vpdpwuuds_128:
5736 case Intrinsic::x86_avx2_vpdpwuuds_256:
5737 case Intrinsic::x86_avx10_vpdpwuuds_512:
5742 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5743 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5745 NewCall = Builder.CreateCall(NewFn, Args);
5748 assert(NewCall &&
"Should have either set this variable or returned through "
5749 "the default case");
5756 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5770 F->eraseFromParent();
5776 if (NumOperands == 0)
5784 if (NumOperands == 3) {
5788 Metadata *Elts2[] = {ScalarType, ScalarType,
5802 if (
Opc != Instruction::BitCast)
5806 Type *SrcTy = V->getType();
5823 if (
Opc != Instruction::BitCast)
5826 Type *SrcTy =
C->getType();
5853 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5854 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5855 if (Flag->getNumOperands() < 3)
5857 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5858 return K->getString() ==
"Debug Info Version";
5861 if (OpIt != ModFlags->op_end()) {
5862 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5869 bool BrokenDebugInfo =
false;
5872 if (!BrokenDebugInfo)
5878 M.getContext().diagnose(Diag);
5885 M.getContext().diagnose(DiagVersion);
5895 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5898 if (
F->hasFnAttribute(Attr)) {
5901 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5903 auto [Part, Rest] = S.
split(
',');
5909 const unsigned Dim = DimC -
'x';
5910 assert(Dim < 3 &&
"Unexpected dim char");
5920 F->addFnAttr(Attr, NewAttr);
5924 return S ==
"x" || S ==
"y" || S ==
"z";
5929 if (K ==
"kernel") {
5941 const unsigned Idx = (AlignIdxValuePair >> 16);
5942 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5947 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5952 if (K ==
"minctasm") {
5957 if (K ==
"maxnreg") {
5962 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5966 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5970 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5974 if (K ==
"grid_constant") {
5989 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5996 if (!SeenNodes.
insert(MD).second)
6003 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6010 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6012 const MDOperand &V = MD->getOperand(j + 1);
6015 NewOperands.
append({K, V});
6018 if (NewOperands.
size() > 1)
6031 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6032 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6033 if (ModRetainReleaseMarker) {
6039 ID->getString().split(ValueComp,
"#");
6040 if (ValueComp.
size() == 2) {
6041 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6045 M.eraseNamedMetadata(ModRetainReleaseMarker);
6056 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6082 bool InvalidCast =
false;
6084 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6097 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6099 Args.push_back(Arg);
6106 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6111 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6124 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6132 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6133 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6134 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6135 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6136 {
"objc_autoreleaseReturnValue",
6137 llvm::Intrinsic::objc_autoreleaseReturnValue},
6138 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6139 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6140 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6141 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6142 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6143 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6144 {
"objc_release", llvm::Intrinsic::objc_release},
6145 {
"objc_retain", llvm::Intrinsic::objc_retain},
6146 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6147 {
"objc_retainAutoreleaseReturnValue",
6148 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6149 {
"objc_retainAutoreleasedReturnValue",
6150 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6151 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6152 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6153 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6154 {
"objc_unsafeClaimAutoreleasedReturnValue",
6155 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6156 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6157 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6158 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6159 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6160 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6161 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6162 {
"objc_arc_annotation_topdown_bbstart",
6163 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6164 {
"objc_arc_annotation_topdown_bbend",
6165 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6166 {
"objc_arc_annotation_bottomup_bbstart",
6167 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6168 {
"objc_arc_annotation_bottomup_bbend",
6169 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6171 for (
auto &
I : RuntimeFuncs)
6172 UpgradeToIntrinsic(
I.first,
I.second);
6176 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6180 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6181 bool HasSwiftVersionFlag =
false;
6182 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6189 if (
Op->getNumOperands() != 3)
6203 if (
ID->getString() ==
"Objective-C Image Info Version")
6205 if (
ID->getString() ==
"Objective-C Class Properties")
6206 HasClassProperties =
true;
6208 if (
ID->getString() ==
"PIC Level") {
6209 if (
auto *Behavior =
6211 uint64_t V = Behavior->getLimitedValue();
6217 if (
ID->getString() ==
"PIE Level")
6218 if (
auto *Behavior =
6225 if (
ID->getString() ==
"branch-target-enforcement" ||
6226 ID->getString().starts_with(
"sign-return-address")) {
6227 if (
auto *Behavior =
6233 Op->getOperand(1),
Op->getOperand(2)};
6243 if (
ID->getString() ==
"Objective-C Image Info Section") {
6246 Value->getString().split(ValueComp,
" ");
6247 if (ValueComp.
size() != 1) {
6248 std::string NewValue;
6249 for (
auto &S : ValueComp)
6250 NewValue += S.str();
6261 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6264 assert(Md->getValue() &&
"Expected non-empty metadata");
6265 auto Type = Md->getValue()->getType();
6268 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6269 if ((Val & 0xff) != Val) {
6270 HasSwiftVersionFlag =
true;
6271 SwiftABIVersion = (Val & 0xff00) >> 8;
6272 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6273 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6284 if (
ID->getString() ==
"amdgpu_code_object_version") {
6287 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6299 if (HasObjCFlag && !HasClassProperties) {
6305 if (HasSwiftVersionFlag) {
6309 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6311 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6319 auto TrimSpaces = [](
StringRef Section) -> std::string {
6321 Section.split(Components,
',');
6326 for (
auto Component : Components)
6327 OS <<
',' << Component.trim();
6332 for (
auto &GV : M.globals()) {
6333 if (!GV.hasSection())
6338 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6343 GV.setSection(TrimSpaces(Section));
6359struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6360 StrictFPUpgradeVisitor() =
default;
6363 if (!
Call.isStrictFP())
6369 Call.removeFnAttr(Attribute::StrictFP);
6370 Call.addFnAttr(Attribute::NoBuiltin);
6375struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6376 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6377 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6379 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6394 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6395 StrictFPUpgradeVisitor SFPV;
6400 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6401 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6402 for (
auto &Arg :
F.args())
6404 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6406 bool AddingAttrs =
false, RemovingAttrs =
false;
6407 AttrBuilder AttrsToAdd(
F.getContext());
6412 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6413 A.isValid() &&
A.isStringAttribute()) {
6414 F.setSection(
A.getValueAsString());
6416 RemovingAttrs =
true;
6420 A.isValid() &&
A.isStringAttribute()) {
6423 AddingAttrs = RemovingAttrs =
true;
6426 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6427 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6429 RemovingAttrs =
true;
6430 if (
A.getValueAsString() ==
"true") {
6431 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6440 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6443 if (
A.getValueAsBool()) {
6444 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6450 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6451 RemovingAttrs =
true;
6458 bool HandleDenormalMode =
false;
6460 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6463 DenormalFPMath = ParsedMode;
6465 AddingAttrs = RemovingAttrs =
true;
6466 HandleDenormalMode =
true;
6470 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6474 DenormalFPMathF32 = ParsedMode;
6476 AddingAttrs = RemovingAttrs =
true;
6477 HandleDenormalMode =
true;
6481 if (HandleDenormalMode)
6482 AttrsToAdd.addDenormalFPEnvAttr(
6486 F.removeFnAttrs(AttrsToRemove);
6489 F.addFnAttrs(AttrsToAdd);
6495 if (!
F.hasFnAttribute(FnAttrName))
6496 F.addFnAttr(FnAttrName,
Value);
6503 if (!
F.hasFnAttribute(FnAttrName)) {
6505 F.addFnAttr(FnAttrName);
6507 auto A =
F.getFnAttribute(FnAttrName);
6508 if (
"false" ==
A.getValueAsString())
6509 F.removeFnAttr(FnAttrName);
6510 else if (
"true" ==
A.getValueAsString()) {
6511 F.removeFnAttr(FnAttrName);
6512 F.addFnAttr(FnAttrName);
6518 Triple T(M.getTargetTriple());
6519 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6529 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6533 if (
Op->getNumOperands() != 3)
6542 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6543 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6544 : IDStr ==
"guarded-control-stack" ? &GCSValue
6545 : IDStr ==
"sign-return-address" ? &SRAValue
6546 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6547 : IDStr ==
"sign-return-address-with-bkey"
6553 *ValPtr = CI->getZExtValue();
6559 bool BTE = BTEValue == 1;
6560 bool BPPLR = BPPLRValue == 1;
6561 bool GCS = GCSValue == 1;
6562 bool SRA = SRAValue == 1;
6565 if (SRA && SRAALLValue == 1)
6566 SignTypeValue =
"all";
6569 if (SRA && SRABKeyValue == 1)
6570 SignKeyValue =
"b_key";
6572 for (
Function &
F : M.getFunctionList()) {
6573 if (
F.isDeclaration())
6580 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6581 A.isValid() &&
"none" ==
A.getValueAsString()) {
6582 F.removeFnAttr(
"sign-return-address");
6583 F.removeFnAttr(
"sign-return-address-key");
6599 if (SRAALLValue == 1)
6601 if (SRABKeyValue == 1)
6610 if (
T->getNumOperands() < 1)
6615 return S->getString().starts_with(
"llvm.vectorizer.");
6619 StringRef OldPrefix =
"llvm.vectorizer.";
6622 if (OldTag ==
"llvm.vectorizer.unroll")
6634 if (
T->getNumOperands() < 1)
6639 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6644 Ops.reserve(
T->getNumOperands());
6646 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6647 Ops.push_back(
T->getOperand(
I));
6661 Ops.reserve(
T->getNumOperands());
6672 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6673 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6674 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6677 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6679 auto I =
DL.find(
"-n64-");
6681 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6686 std::string Res =
DL.str();
6689 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6690 Res.append(Res.empty() ?
"G1" :
"-G1");
6698 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6699 Res.append(
"-ni:7:8:9");
6701 if (
DL.ends_with(
"ni:7"))
6703 if (
DL.ends_with(
"ni:7:8"))
6708 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6709 Res.append(
"-p7:160:256:256:32");
6710 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6711 Res.append(
"-p8:128:128:128:48");
6712 constexpr StringRef OldP8(
"-p8:128:128-");
6713 if (
DL.contains(OldP8))
6714 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6715 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6716 Res.append(
"-p9:192:256:256:32");
6720 if (!
DL.contains(
"m:e"))
6721 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6726 if (
T.isSystemZ() && !
DL.empty()) {
6728 if (!
DL.contains(
"-S64"))
6729 return "E-S64" +
DL.drop_front(1).str();
6733 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6736 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6737 if (!
DL.contains(AddrSpaces)) {
6739 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6740 if (R.match(Res, &
Groups))
6746 if (
T.isAArch64()) {
6748 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6749 Res.append(
"-Fn32");
6750 AddPtr32Ptr64AddrSpaces();
6754 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6758 std::string I64 =
"-i64:64";
6759 std::string I128 =
"-i128:128";
6761 size_t Pos = Res.find(I64);
6762 if (Pos !=
size_t(-1))
6763 Res.insert(Pos + I64.size(), I128);
6767 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6768 size_t Pos = Res.find(
"-S128");
6771 Res.insert(Pos,
"-f64:32:64");
6777 AddPtr32Ptr64AddrSpaces();
6785 if (!
T.isOSIAMCU()) {
6786 std::string I128 =
"-i128:128";
6789 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6790 if (R.match(Res, &
Groups))
6798 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6800 auto I =
Ref.find(
"-f80:32-");
6802 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6810 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6813 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6814 B.removeAttribute(
"no-frame-pointer-elim");
6816 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6818 if (FramePointer !=
"all")
6819 FramePointer =
"non-leaf";
6820 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6822 if (!FramePointer.
empty())
6823 B.addAttribute(
"frame-pointer", FramePointer);
6825 A =
B.getAttribute(
"null-pointer-is-valid");
6828 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6829 B.removeAttribute(
"null-pointer-is-valid");
6830 if (NullPointerIsValid)
6831 B.addAttribute(Attribute::NullPointerIsValid);
6834 A =
B.getAttribute(
"uniform-work-group-size");
6838 bool IsTrue = Val ==
"true";
6839 B.removeAttribute(
"uniform-work-group-size");
6841 B.addAttribute(
"uniform-work-group-size");
6852 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys)
Gets the overload types of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.