34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name ==
"mmla") {
1008 Type *Tys[] = {
F->getReturnType(),
1009 std::next(
F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1014 if (Name.consume_back(
".lane")) {
1018 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1032 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1037 if (Name.consume_front(
"addqv")) {
1039 if (!
F->getReturnType()->isFPOrFPVectorTy())
1042 auto Args =
F->getFunctionType()->params();
1043 Type *Tys[] = {
F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1049 if (Name.consume_front(
"ld")) {
1051 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.
match(Name)) {
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy =
F->getArg(1)->getType();
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1067 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1073 if (Name.consume_front(
"tuple.")) {
1075 if (Name.starts_with(
"get")) {
1077 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1083 if (Name.starts_with(
"set")) {
1085 auto Args =
F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1092 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.
match(Name)) {
1095 auto Args =
F->getFunctionType()->params();
1096 Type *Tys[] = {
F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1104 if (Name.starts_with(
"rev.nxv")) {
1107 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1119 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1141 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1155 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1166 if (Name.consume_front(
"mapa.shared.cluster"))
1167 if (
F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1171 if (Name.consume_front(
"cp.async.bulk.")) {
1174 .
Case(
"global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .
Case(
"shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1181 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1190 if (Name.consume_front(
"fma.rn."))
1192 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1198 if (Name.consume_front(
"fmax."))
1200 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1201 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .
Case(
"ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .
Case(
"ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .
Case(
"nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1222 if (Name.consume_front(
"fmin."))
1224 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1225 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .
Case(
"ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .
Case(
"ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .
Case(
"nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1246 if (Name.consume_front(
"neg."))
1248 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1249 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1256 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1257 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1258 Name.consume_front(
"param");
1264 if (Name.starts_with(
"to.fp16")) {
1268 FuncTy->getReturnType());
1271 if (Name.starts_with(
"from.fp16")) {
1275 FuncTy->getReturnType());
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1288 if (!Name.consume_front(
"llvm.") || Name.empty())
1294 bool IsArm = Name.consume_front(
"arm.");
1295 if (IsArm || Name.consume_front(
"aarch64.")) {
1301 if (Name.consume_front(
"amdgcn.")) {
1302 if (Name ==
"alignbit") {
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1309 if (Name.consume_front(
"atomic.")) {
1310 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1311 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1320 switch (
F->getIntrinsicID()) {
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (
F->arg_size() == 7) {
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (
F->arg_size() == 8) {
1344 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1345 Name.consume_front(
"flat.atomic.")) {
1346 if (Name.starts_with(
"fadd") ||
1348 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1349 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1357 if (Name.starts_with(
"ldexp.")) {
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1370 if (
F->arg_size() == 1) {
1371 if (Name.consume_front(
"convert.")) {
1385 F->arg_begin()->getType());
1390 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1393 Intrinsic::coro_end);
1400 if (Name.consume_front(
"dbg.")) {
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1404 Name ==
"declare" || Name ==
"label") {
1413 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1416 Intrinsic::dbg_value);
1423 if (Name.consume_front(
"experimental.vector.")) {
1429 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1430 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1431 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1432 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1433 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT =
F->getFunctionType();
1440 if (
ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1444 if (
ID != Intrinsic::vector_interleave2)
1446 if (
ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1455 if (Name.consume_front(
"reduce.")) {
1457 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &
Groups))
1460 .
Case(
"add", Intrinsic::vector_reduce_add)
1461 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1462 .
Case(
"and", Intrinsic::vector_reduce_and)
1463 .
Case(
"or", Intrinsic::vector_reduce_or)
1464 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1465 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1466 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1467 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1468 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1469 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1470 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1475 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1480 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1481 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1486 auto Args =
F->getFunctionType()->params();
1488 {Args[V2 ? 1 : 0]});
1494 if (Name.consume_front(
"splice"))
1498 if (Name.consume_front(
"experimental.stepvector.")) {
1502 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1507 if (Name.starts_with(
"flt.rounds")) {
1510 Intrinsic::get_rounding);
1515 if (Name.starts_with(
"invariant.group.barrier")) {
1517 auto Args =
F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1526 if ((Name.starts_with(
"lifetime.start") ||
1527 Name.starts_with(
"lifetime.end")) &&
1528 F->arg_size() == 2) {
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1534 F->getArg(0)->getType());
1543 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1544 .StartsWith(
"memmove.", Intrinsic::memmove)
1546 if (
F->arg_size() == 5) {
1550 F->getFunctionType()->params().slice(0, 3);
1556 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1559 const auto *FT =
F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0),
1565 Intrinsic::memset, ParamTypes);
1571 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1572 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1573 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1574 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1576 if (MaskedID &&
F->arg_size() == 4) {
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1593 if (Name.consume_front(
"nvvm.")) {
1595 if (
F->arg_size() == 1) {
1598 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1599 .Case(
"clz.i", Intrinsic::ctlz)
1600 .
Case(
"popc.i", Intrinsic::ctpop)
1604 {F->getReturnType()});
1607 }
else if (
F->arg_size() == 2) {
1610 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1611 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1612 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1613 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1617 {F->getReturnType()});
1623 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1651 bool Expand =
false;
1652 if (Name.consume_front(
"abs."))
1655 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1656 else if (Name.consume_front(
"fabs."))
1658 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1659 else if (Name.consume_front(
"ex2.approx."))
1662 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1663 else if (Name.consume_front(
"atomic.load."))
1672 else if (Name.consume_front(
"bitcast."))
1675 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1676 else if (Name.consume_front(
"rotate."))
1678 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1679 else if (Name.consume_front(
"ptr.gen.to."))
1682 else if (Name.consume_front(
"ptr."))
1685 else if (Name.consume_front(
"ldg.global."))
1687 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1688 Name.starts_with(
"p."));
1691 .
Case(
"barrier0",
true)
1692 .
Case(
"barrier.n",
true)
1693 .
Case(
"barrier.sync.cnt",
true)
1694 .
Case(
"barrier.sync",
true)
1695 .
Case(
"barrier",
true)
1696 .
Case(
"bar.sync",
true)
1697 .
Case(
"barrier0.popc",
true)
1698 .
Case(
"barrier0.and",
true)
1699 .
Case(
"barrier0.or",
true)
1700 .
Case(
"clz.ll",
true)
1701 .
Case(
"popc.ll",
true)
1703 .
Case(
"swap.lo.hi.b64",
true)
1704 .
Case(
"tanh.approx.f32",
true)
1716 if (Name.starts_with(
"objectsize.")) {
1717 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1718 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1721 Intrinsic::objectsize, Tys);
1728 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1738 if (Name.consume_front(
"riscv.")) {
1741 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1744 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1760 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1770 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1778 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1787 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1798 if (Name ==
"stackprotectorcheck") {
1805 if (Name ==
"thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1813 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1820 if (Name.consume_front(
"vector.splice")) {
1821 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1829 if (Name.consume_front(
"wasm.")) {
1832 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1833 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1839 F->getReturnType());
1843 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1845 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1866 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1876 std::string
Name =
F->getName().str();
1879 Name,
F->getParent());
1890 if (Result != std::nullopt) {
1903 bool CanUpgradeDebugIntrinsicsToRecords) {
1923 GV->
getName() ==
"llvm.global_dtors")) ||
1938 unsigned N =
Init->getNumOperands();
1939 std::vector<Constant *> NewCtors(
N);
1940 for (
unsigned i = 0; i !=
N; ++i) {
1943 Ctor->getAggregateElement(1),
1957 unsigned NumElts = ResultTy->getNumElements() * 8;
1961 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1971 for (
unsigned l = 0; l != NumElts; l += 16)
1972 for (
unsigned i = 0; i != 16; ++i) {
1973 unsigned Idx = NumElts + i - Shift;
1975 Idx -= NumElts - 16;
1976 Idxs[l + i] = Idx + l;
1979 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1983 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1991 unsigned NumElts = ResultTy->getNumElements() * 8;
1995 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2005 for (
unsigned l = 0; l != NumElts; l += 16)
2006 for (
unsigned i = 0; i != 16; ++i) {
2007 unsigned Idx = i + Shift;
2009 Idx += NumElts - 16;
2010 Idxs[l + i] = Idx + l;
2013 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2017 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2025 Mask = Builder.CreateBitCast(Mask, MaskTy);
2031 for (
unsigned i = 0; i != NumElts; ++i)
2033 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2044 if (
C->isAllOnesValue())
2049 return Builder.CreateSelect(Mask, Op0, Op1);
2056 if (
C->isAllOnesValue())
2060 Mask->getType()->getIntegerBitWidth());
2061 Mask = Builder.CreateBitCast(Mask, MaskTy);
2062 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2063 return Builder.CreateSelect(Mask, Op0, Op1);
2076 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2077 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2082 ShiftVal &= (NumElts - 1);
2091 if (ShiftVal > 16) {
2099 for (
unsigned l = 0; l < NumElts; l += 16) {
2100 for (
unsigned i = 0; i != 16; ++i) {
2101 unsigned Idx = ShiftVal + i;
2102 if (!IsVALIGN && Idx >= 16)
2103 Idx += NumElts - 16;
2104 Indices[l + i] = Idx + l;
2109 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2115 bool ZeroMask,
bool IndexForm) {
2118 unsigned EltWidth = Ty->getScalarSizeInBits();
2119 bool IsFloat = Ty->isFPOrFPVectorTy();
2121 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2123 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2125 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2129 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2131 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2133 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2137 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2139 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2141 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2145 else if (VecWidth == 128 && EltWidth == 16)
2146 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2147 else if (VecWidth == 256 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2149 else if (VecWidth == 512 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2151 else if (VecWidth == 128 && EltWidth == 8)
2152 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2153 else if (VecWidth == 256 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2155 else if (VecWidth == 512 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2167 Value *V = Builder.CreateIntrinsic(IID, Args);
2179 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2190 bool IsRotateRight) {
2200 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2201 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2204 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2205 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2250 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2255 bool IsShiftRight,
bool ZeroMask) {
2269 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2270 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2273 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2274 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2289 const Align Alignment =
2291 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2296 if (
C->isAllOnesValue())
2297 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2302 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2308 const Align Alignment =
2317 if (
C->isAllOnesValue())
2318 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2323 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2329 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2330 {Op0, Builder.getInt1(
false)});
2345 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2346 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2347 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2348 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2349 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2352 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2353 LHS = Builder.CreateAnd(
LHS, Mask);
2354 RHS = Builder.CreateAnd(
RHS, Mask);
2371 if (!
C || !
C->isAllOnesValue())
2372 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2377 for (
unsigned i = 0; i != NumElts; ++i)
2379 for (
unsigned i = NumElts; i != 8; ++i)
2380 Indices[i] = NumElts + i % NumElts;
2381 Vec = Builder.CreateShuffleVector(Vec,
2385 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2389 unsigned CC,
bool Signed) {
2397 }
else if (CC == 7) {
2433 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2434 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2436 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2437 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2446 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2452 Name = Name.substr(12);
2457 if (Name.starts_with(
"max.p")) {
2458 if (VecWidth == 128 && EltWidth == 32)
2459 IID = Intrinsic::x86_sse_max_ps;
2460 else if (VecWidth == 128 && EltWidth == 64)
2461 IID = Intrinsic::x86_sse2_max_pd;
2462 else if (VecWidth == 256 && EltWidth == 32)
2463 IID = Intrinsic::x86_avx_max_ps_256;
2464 else if (VecWidth == 256 && EltWidth == 64)
2465 IID = Intrinsic::x86_avx_max_pd_256;
2468 }
else if (Name.starts_with(
"min.p")) {
2469 if (VecWidth == 128 && EltWidth == 32)
2470 IID = Intrinsic::x86_sse_min_ps;
2471 else if (VecWidth == 128 && EltWidth == 64)
2472 IID = Intrinsic::x86_sse2_min_pd;
2473 else if (VecWidth == 256 && EltWidth == 32)
2474 IID = Intrinsic::x86_avx_min_ps_256;
2475 else if (VecWidth == 256 && EltWidth == 64)
2476 IID = Intrinsic::x86_avx_min_pd_256;
2479 }
else if (Name.starts_with(
"pshuf.b.")) {
2480 if (VecWidth == 128)
2481 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2482 else if (VecWidth == 256)
2483 IID = Intrinsic::x86_avx2_pshuf_b;
2484 else if (VecWidth == 512)
2485 IID = Intrinsic::x86_avx512_pshuf_b_512;
2488 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2489 if (VecWidth == 128)
2490 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2491 else if (VecWidth == 256)
2492 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2493 else if (VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2497 }
else if (Name.starts_with(
"pmulh.w.")) {
2498 if (VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pmulh_w;
2500 else if (VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pmulh_w;
2502 else if (VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pmulh_w_512;
2506 }
else if (Name.starts_with(
"pmulhu.w.")) {
2507 if (VecWidth == 128)
2508 IID = Intrinsic::x86_sse2_pmulhu_w;
2509 else if (VecWidth == 256)
2510 IID = Intrinsic::x86_avx2_pmulhu_w;
2511 else if (VecWidth == 512)
2512 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2515 }
else if (Name.starts_with(
"pmaddw.d.")) {
2516 if (VecWidth == 128)
2517 IID = Intrinsic::x86_sse2_pmadd_wd;
2518 else if (VecWidth == 256)
2519 IID = Intrinsic::x86_avx2_pmadd_wd;
2520 else if (VecWidth == 512)
2521 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2524 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2525 if (VecWidth == 128)
2526 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2527 else if (VecWidth == 256)
2528 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2529 else if (VecWidth == 512)
2530 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2533 }
else if (Name.starts_with(
"packsswb.")) {
2534 if (VecWidth == 128)
2535 IID = Intrinsic::x86_sse2_packsswb_128;
2536 else if (VecWidth == 256)
2537 IID = Intrinsic::x86_avx2_packsswb;
2538 else if (VecWidth == 512)
2539 IID = Intrinsic::x86_avx512_packsswb_512;
2542 }
else if (Name.starts_with(
"packssdw.")) {
2543 if (VecWidth == 128)
2544 IID = Intrinsic::x86_sse2_packssdw_128;
2545 else if (VecWidth == 256)
2546 IID = Intrinsic::x86_avx2_packssdw;
2547 else if (VecWidth == 512)
2548 IID = Intrinsic::x86_avx512_packssdw_512;
2551 }
else if (Name.starts_with(
"packuswb.")) {
2552 if (VecWidth == 128)
2553 IID = Intrinsic::x86_sse2_packuswb_128;
2554 else if (VecWidth == 256)
2555 IID = Intrinsic::x86_avx2_packuswb;
2556 else if (VecWidth == 512)
2557 IID = Intrinsic::x86_avx512_packuswb_512;
2560 }
else if (Name.starts_with(
"packusdw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_sse41_packusdw;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx2_packusdw;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_packusdw_512;
2569 }
else if (Name.starts_with(
"vpermilvar.")) {
2570 if (VecWidth == 128 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx_vpermilvar_ps;
2572 else if (VecWidth == 128 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx_vpermilvar_pd;
2574 else if (VecWidth == 256 && EltWidth == 32)
2575 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2576 else if (VecWidth == 256 && EltWidth == 64)
2577 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2578 else if (VecWidth == 512 && EltWidth == 32)
2579 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2580 else if (VecWidth == 512 && EltWidth == 64)
2581 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2584 }
else if (Name ==
"cvtpd2dq.256") {
2585 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2586 }
else if (Name ==
"cvtpd2ps.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2588 }
else if (Name ==
"cvttpd2dq.256") {
2589 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2590 }
else if (Name ==
"cvttps2dq.128") {
2591 IID = Intrinsic::x86_sse2_cvttps2dq;
2592 }
else if (Name ==
"cvttps2dq.256") {
2593 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2594 }
else if (Name.starts_with(
"permvar.")) {
2596 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx2_permps;
2598 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx2_permd;
2600 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_256;
2602 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_256;
2604 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_sf_512;
2606 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_si_512;
2608 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_df_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_di_512;
2612 else if (VecWidth == 128 && EltWidth == 16)
2613 IID = Intrinsic::x86_avx512_permvar_hi_128;
2614 else if (VecWidth == 256 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_256;
2616 else if (VecWidth == 512 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_512;
2618 else if (VecWidth == 128 && EltWidth == 8)
2619 IID = Intrinsic::x86_avx512_permvar_qi_128;
2620 else if (VecWidth == 256 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_256;
2622 else if (VecWidth == 512 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_512;
2626 }
else if (Name.starts_with(
"dbpsadbw.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2635 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2644 }
else if (Name.starts_with(
"conflict.")) {
2645 if (Name[9] ==
'd' && VecWidth == 128)
2646 IID = Intrinsic::x86_avx512_conflict_d_128;
2647 else if (Name[9] ==
'd' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx512_conflict_d_256;
2649 else if (Name[9] ==
'd' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_conflict_d_512;
2651 else if (Name[9] ==
'q' && VecWidth == 128)
2652 IID = Intrinsic::x86_avx512_conflict_q_128;
2653 else if (Name[9] ==
'q' && VecWidth == 256)
2654 IID = Intrinsic::x86_avx512_conflict_q_256;
2655 else if (Name[9] ==
'q' && VecWidth == 512)
2656 IID = Intrinsic::x86_avx512_conflict_q_512;
2659 }
else if (Name.starts_with(
"pavg.")) {
2660 if (Name[5] ==
'b' && VecWidth == 128)
2661 IID = Intrinsic::x86_sse2_pavg_b;
2662 else if (Name[5] ==
'b' && VecWidth == 256)
2663 IID = Intrinsic::x86_avx2_pavg_b;
2664 else if (Name[5] ==
'b' && VecWidth == 512)
2665 IID = Intrinsic::x86_avx512_pavg_b_512;
2666 else if (Name[5] ==
'w' && VecWidth == 128)
2667 IID = Intrinsic::x86_sse2_pavg_w;
2668 else if (Name[5] ==
'w' && VecWidth == 256)
2669 IID = Intrinsic::x86_avx2_pavg_w;
2670 else if (Name[5] ==
'w' && VecWidth == 512)
2671 IID = Intrinsic::x86_avx512_pavg_w_512;
2680 Rep = Builder.CreateIntrinsic(IID, Args);
2691 if (AsmStr->find(
"mov\tfp") == 0 &&
2692 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2693 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2694 AsmStr->replace(Pos, 1,
";");
2700 Value *Rep =
nullptr;
2702 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2704 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2705 Value *Cmp = Builder.CreateICmpSGE(
2707 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2708 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2709 Type *Ty = (Name ==
"abs.bf16")
2713 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2714 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2715 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2716 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2717 : Intrinsic::nvvm_fabs;
2718 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2719 }
else if (Name.consume_front(
"ex2.approx.")) {
2721 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2722 : Intrinsic::nvvm_ex2_approx;
2723 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2724 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2725 Name.starts_with(
"atomic.load.add.f64.p")) {
2728 Rep = Builder.CreateAtomicRMW(
2734 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2735 Name.starts_with(
"atomic.load.dec.32.p")) {
2740 Rep = Builder.CreateAtomicRMW(
2744 }
else if (Name ==
"clz.ll") {
2747 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2748 {Arg, Builder.getFalse()},
2750 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2751 }
else if (Name ==
"popc.ll") {
2755 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2756 Arg,
nullptr,
"ctpop");
2757 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2758 }
else if (Name ==
"h2f") {
2760 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2761 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2762 }
else if (Name.consume_front(
"bitcast.") &&
2763 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2766 }
else if (Name ==
"rotate.b32") {
2769 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2770 {Arg, Arg, ShiftAmt});
2771 }
else if (Name ==
"rotate.b64") {
2775 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2776 {Arg, Arg, ZExtShiftAmt});
2777 }
else if (Name ==
"rotate.right.b64") {
2781 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2782 {Arg, Arg, ZExtShiftAmt});
2783 }
else if (Name ==
"swap.lo.hi.b64") {
2786 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2787 {Arg, Arg, Builder.getInt64(32)});
2788 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2791 Name.starts_with(
".to.gen"))) {
2793 }
else if (Name.consume_front(
"ldg.global")) {
2797 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2800 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2802 }
else if (Name ==
"tanh.approx.f32") {
2806 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2808 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2810 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2811 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2813 }
else if (Name ==
"barrier") {
2814 Rep = Builder.CreateIntrinsic(
2815 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2817 }
else if (Name ==
"barrier.sync") {
2818 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2820 }
else if (Name ==
"barrier.sync.cnt") {
2821 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2823 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2824 Name ==
"barrier0.or") {
2826 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2830 .
Case(
"barrier0.popc",
2831 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2832 .
Case(
"barrier0.and",
2833 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2834 .
Case(
"barrier0.or",
2835 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2836 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2837 Rep = Builder.CreateZExt(Bar, CI->
getType());
2841 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2851 ? Builder.CreateBitCast(Arg, NewType)
2854 Rep = Builder.CreateCall(NewFn, Args);
2855 if (
F->getReturnType()->isIntegerTy())
2856 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2866 Value *Rep =
nullptr;
2868 if (Name.starts_with(
"sse4a.movnt.")) {
2880 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2883 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2884 }
else if (Name.starts_with(
"avx.movnt.") ||
2885 Name.starts_with(
"avx512.storent.")) {
2897 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2898 }
else if (Name ==
"sse2.storel.dq") {
2903 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2904 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2905 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2906 }
else if (Name.starts_with(
"sse.storeu.") ||
2907 Name.starts_with(
"sse2.storeu.") ||
2908 Name.starts_with(
"avx.storeu.")) {
2911 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2912 }
else if (Name ==
"avx512.mask.store.ss") {
2916 }
else if (Name.starts_with(
"avx512.mask.store")) {
2918 bool Aligned = Name[17] !=
'u';
2921 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2924 bool CmpEq = Name[9] ==
'e';
2927 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2928 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2935 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2936 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2938 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2939 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2940 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2941 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2942 Name.starts_with(
"sse2.sqrt.p") ||
2943 Name.starts_with(
"sse.sqrt.p")) {
2944 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2945 {CI->getArgOperand(0)});
2946 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2950 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2951 : Intrinsic::x86_avx512_sqrt_pd_512;
2954 Rep = Builder.CreateIntrinsic(IID, Args);
2956 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2957 {CI->getArgOperand(0)});
2961 }
else if (Name.starts_with(
"avx512.ptestm") ||
2962 Name.starts_with(
"avx512.ptestnm")) {
2966 Rep = Builder.CreateAnd(Op0, Op1);
2972 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2974 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2977 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2980 }
else if (Name.starts_with(
"avx512.kunpck")) {
2985 for (
unsigned i = 0; i != NumElts; ++i)
2994 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2995 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2996 }
else if (Name ==
"avx512.kand.w") {
2999 Rep = Builder.CreateAnd(
LHS,
RHS);
3000 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3001 }
else if (Name ==
"avx512.kandn.w") {
3004 LHS = Builder.CreateNot(
LHS);
3005 Rep = Builder.CreateAnd(
LHS,
RHS);
3006 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3007 }
else if (Name ==
"avx512.kor.w") {
3010 Rep = Builder.CreateOr(
LHS,
RHS);
3011 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3012 }
else if (Name ==
"avx512.kxor.w") {
3015 Rep = Builder.CreateXor(
LHS,
RHS);
3016 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3017 }
else if (Name ==
"avx512.kxnor.w") {
3020 LHS = Builder.CreateNot(
LHS);
3021 Rep = Builder.CreateXor(
LHS,
RHS);
3022 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3023 }
else if (Name ==
"avx512.knot.w") {
3025 Rep = Builder.CreateNot(Rep);
3026 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3027 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3030 Rep = Builder.CreateOr(
LHS,
RHS);
3031 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3033 if (Name[14] ==
'c')
3037 Rep = Builder.CreateICmpEQ(Rep,
C);
3038 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3039 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3040 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3041 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3042 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3045 ConstantInt::get(I32Ty, 0));
3047 ConstantInt::get(I32Ty, 0));
3049 if (Name.contains(
".add."))
3050 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3051 else if (Name.contains(
".sub."))
3052 EltOp = Builder.CreateFSub(Elt0, Elt1);
3053 else if (Name.contains(
".mul."))
3054 EltOp = Builder.CreateFMul(Elt0, Elt1);
3056 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3057 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3058 ConstantInt::get(I32Ty, 0));
3059 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3061 bool CmpEq = Name[16] ==
'e';
3063 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3072 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3075 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3078 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3085 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3090 if (VecWidth == 128 && EltWidth == 32)
3091 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3092 else if (VecWidth == 256 && EltWidth == 32)
3093 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3094 else if (VecWidth == 512 && EltWidth == 32)
3095 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3096 else if (VecWidth == 128 && EltWidth == 64)
3097 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3098 else if (VecWidth == 256 && EltWidth == 64)
3099 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3100 else if (VecWidth == 512 && EltWidth == 64)
3101 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3108 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3110 Type *OpTy = Args[0]->getType();
3114 if (VecWidth == 128 && EltWidth == 32)
3115 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3116 else if (VecWidth == 256 && EltWidth == 32)
3117 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3118 else if (VecWidth == 512 && EltWidth == 32)
3119 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3120 else if (VecWidth == 128 && EltWidth == 64)
3121 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3122 else if (VecWidth == 256 && EltWidth == 64)
3123 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3124 else if (VecWidth == 512 && EltWidth == 64)
3125 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3130 if (VecWidth == 512)
3132 Args.push_back(Mask);
3134 Rep = Builder.CreateIntrinsic(IID, Args);
3135 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3139 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3142 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3143 Name.starts_with(
"avx512.cvtw2mask.") ||
3144 Name.starts_with(
"avx512.cvtd2mask.") ||
3145 Name.starts_with(
"avx512.cvtq2mask.")) {
3150 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3151 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3152 Name.starts_with(
"avx512.mask.pabs")) {
3154 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3155 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3156 Name.starts_with(
"avx512.mask.pmaxs")) {
3158 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3159 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3160 Name.starts_with(
"avx512.mask.pmaxu")) {
3162 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3163 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3164 Name.starts_with(
"avx512.mask.pmins")) {
3166 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3167 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3168 Name.starts_with(
"avx512.mask.pminu")) {
3170 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3171 Name ==
"avx512.pmulu.dq.512" ||
3172 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3174 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3175 Name ==
"avx512.pmul.dq.512" ||
3176 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3178 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3179 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3184 }
else if (Name ==
"avx512.cvtusi2sd") {
3189 }
else if (Name ==
"sse2.cvtss2sd") {
3191 Rep = Builder.CreateFPExt(
3194 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3195 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3196 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3197 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3198 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3199 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3200 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3201 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3202 Name ==
"avx512.mask.cvtqq2ps.256" ||
3203 Name ==
"avx512.mask.cvtqq2ps.512" ||
3204 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3205 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3206 Name ==
"avx.cvt.ps2.pd.256" ||
3207 Name ==
"avx512.mask.cvtps2pd.128" ||
3208 Name ==
"avx512.mask.cvtps2pd.256") {
3213 unsigned NumDstElts = DstTy->getNumElements();
3215 assert(NumDstElts == 2 &&
"Unexpected vector size");
3216 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3219 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3220 bool IsUnsigned = Name.contains(
"cvtu");
3222 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3226 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3227 : Intrinsic::x86_avx512_sitofp_round;
3228 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3231 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3232 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3238 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3239 Name.starts_with(
"vcvtph2ps.")) {
3243 unsigned NumDstElts = DstTy->getNumElements();
3244 if (NumDstElts != SrcTy->getNumElements()) {
3245 assert(NumDstElts == 4 &&
"Unexpected vector size");
3246 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3248 Rep = Builder.CreateBitCast(
3250 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3254 }
else if (Name.starts_with(
"avx512.mask.load")) {
3256 bool Aligned = Name[16] !=
'u';
3259 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3262 ResultTy->getNumElements());
3264 Rep = Builder.CreateIntrinsic(
3265 Intrinsic::masked_expandload, ResultTy,
3267 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3273 Rep = Builder.CreateIntrinsic(
3274 Intrinsic::masked_compressstore, ResultTy,
3276 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3277 Name.starts_with(
"avx512.mask.expand.")) {
3281 ResultTy->getNumElements());
3283 bool IsCompress = Name[12] ==
'c';
3284 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3285 : Intrinsic::x86_avx512_mask_expand;
3286 Rep = Builder.CreateIntrinsic(
3288 }
else if (Name.starts_with(
"xop.vpcom")) {
3290 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3291 Name.ends_with(
"uq"))
3293 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3294 Name.ends_with(
"d") || Name.ends_with(
"q"))
3303 Name = Name.substr(9);
3304 if (Name.starts_with(
"lt"))
3306 else if (Name.starts_with(
"le"))
3308 else if (Name.starts_with(
"gt"))
3310 else if (Name.starts_with(
"ge"))
3312 else if (Name.starts_with(
"eq"))
3314 else if (Name.starts_with(
"ne"))
3316 else if (Name.starts_with(
"false"))
3318 else if (Name.starts_with(
"true"))
3325 }
else if (Name.starts_with(
"xop.vpcmov")) {
3327 Value *NotSel = Builder.CreateNot(Sel);
3330 Rep = Builder.CreateOr(Sel0, Sel1);
3331 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3332 Name.starts_with(
"avx512.mask.prol")) {
3334 }
else if (Name.starts_with(
"avx512.pror") ||
3335 Name.starts_with(
"avx512.mask.pror")) {
3337 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3338 Name.starts_with(
"avx512.mask.vpshld") ||
3339 Name.starts_with(
"avx512.maskz.vpshld")) {
3340 bool ZeroMask = Name[11] ==
'z';
3342 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3343 Name.starts_with(
"avx512.mask.vpshrd") ||
3344 Name.starts_with(
"avx512.maskz.vpshrd")) {
3345 bool ZeroMask = Name[11] ==
'z';
3347 }
else if (Name ==
"sse42.crc32.64.8") {
3350 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3352 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3353 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3354 Name.starts_with(
"avx512.vbroadcast.s")) {
3357 Type *EltTy = VecTy->getElementType();
3358 unsigned EltNum = VecTy->getNumElements();
3362 for (
unsigned I = 0;
I < EltNum; ++
I)
3363 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3364 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3365 Name.starts_with(
"sse41.pmovzx") ||
3366 Name.starts_with(
"avx2.pmovsx") ||
3367 Name.starts_with(
"avx2.pmovzx") ||
3368 Name.starts_with(
"avx512.mask.pmovsx") ||
3369 Name.starts_with(
"avx512.mask.pmovzx")) {
3371 unsigned NumDstElts = DstTy->getNumElements();
3375 for (
unsigned i = 0; i != NumDstElts; ++i)
3380 bool DoSext = Name.contains(
"pmovsx");
3382 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3387 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3388 Name ==
"avx512.mask.pmov.qd.512" ||
3389 Name ==
"avx512.mask.pmov.wb.256" ||
3390 Name ==
"avx512.mask.pmov.wb.512") {
3395 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3396 Name ==
"avx2.vbroadcasti128") {
3402 if (NumSrcElts == 2)
3403 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3405 Rep = Builder.CreateShuffleVector(Load,
3407 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3408 Name.starts_with(
"avx512.mask.shuf.f")) {
3413 unsigned ControlBitsMask = NumLanes - 1;
3414 unsigned NumControlBits = NumLanes / 2;
3417 for (
unsigned l = 0; l != NumLanes; ++l) {
3418 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3420 if (l >= NumLanes / 2)
3421 LaneMask += NumLanes;
3422 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3423 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3429 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3430 Name.starts_with(
"avx512.mask.broadcasti")) {
3433 unsigned NumDstElts =
3437 for (
unsigned i = 0; i != NumDstElts; ++i)
3438 ShuffleMask[i] = i % NumSrcElts;
3444 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3445 Name.starts_with(
"avx2.vbroadcast") ||
3446 Name.starts_with(
"avx512.pbroadcast") ||
3447 Name.starts_with(
"avx512.mask.broadcast.s")) {
3454 Rep = Builder.CreateShuffleVector(
Op, M);
3459 }
else if (Name.starts_with(
"sse2.padds.") ||
3460 Name.starts_with(
"avx2.padds.") ||
3461 Name.starts_with(
"avx512.padds.") ||
3462 Name.starts_with(
"avx512.mask.padds.")) {
3464 }
else if (Name.starts_with(
"sse2.psubs.") ||
3465 Name.starts_with(
"avx2.psubs.") ||
3466 Name.starts_with(
"avx512.psubs.") ||
3467 Name.starts_with(
"avx512.mask.psubs.")) {
3469 }
else if (Name.starts_with(
"sse2.paddus.") ||
3470 Name.starts_with(
"avx2.paddus.") ||
3471 Name.starts_with(
"avx512.mask.paddus.")) {
3473 }
else if (Name.starts_with(
"sse2.psubus.") ||
3474 Name.starts_with(
"avx2.psubus.") ||
3475 Name.starts_with(
"avx512.mask.psubus.")) {
3477 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3482 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3486 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3491 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3496 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3497 Name ==
"avx512.psll.dq.512") {
3501 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3502 Name ==
"avx512.psrl.dq.512") {
3506 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3507 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3508 Name.starts_with(
"avx2.pblendd.")) {
3513 unsigned NumElts = VecTy->getNumElements();
3516 for (
unsigned i = 0; i != NumElts; ++i)
3517 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3519 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3520 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3521 Name ==
"avx2.vinserti128" ||
3522 Name.starts_with(
"avx512.mask.insert")) {
3526 unsigned DstNumElts =
3528 unsigned SrcNumElts =
3530 unsigned Scale = DstNumElts / SrcNumElts;
3537 for (
unsigned i = 0; i != SrcNumElts; ++i)
3539 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3540 Idxs[i] = SrcNumElts;
3541 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3555 for (
unsigned i = 0; i != DstNumElts; ++i)
3558 for (
unsigned i = 0; i != SrcNumElts; ++i)
3559 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3560 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3566 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3567 Name ==
"avx2.vextracti128" ||
3568 Name.starts_with(
"avx512.mask.vextract")) {
3571 unsigned DstNumElts =
3573 unsigned SrcNumElts =
3575 unsigned Scale = SrcNumElts / DstNumElts;
3582 for (
unsigned i = 0; i != DstNumElts; ++i) {
3583 Idxs[i] = i + (Imm * DstNumElts);
3585 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3591 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3592 Name.starts_with(
"avx512.mask.perm.di.")) {
3596 unsigned NumElts = VecTy->getNumElements();
3599 for (
unsigned i = 0; i != NumElts; ++i)
3600 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3602 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3607 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3619 unsigned HalfSize = NumElts / 2;
3631 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3632 for (
unsigned i = 0; i < HalfSize; ++i)
3633 ShuffleMask[i] = StartIndex + i;
3636 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3637 for (
unsigned i = 0; i < HalfSize; ++i)
3638 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3640 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3642 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3643 Name.starts_with(
"avx512.mask.vpermil.p") ||
3644 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3648 unsigned NumElts = VecTy->getNumElements();
3650 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3651 unsigned IdxMask = ((1 << IdxSize) - 1);
3657 for (
unsigned i = 0; i != NumElts; ++i)
3658 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3660 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3665 }
else if (Name ==
"sse2.pshufl.w" ||
3666 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3671 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3675 for (
unsigned l = 0; l != NumElts; l += 8) {
3676 for (
unsigned i = 0; i != 4; ++i)
3677 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3678 for (
unsigned i = 4; i != 8; ++i)
3679 Idxs[i + l] = i + l;
3682 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3687 }
else if (Name ==
"sse2.pshufh.w" ||
3688 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3693 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3697 for (
unsigned l = 0; l != NumElts; l += 8) {
3698 for (
unsigned i = 0; i != 4; ++i)
3699 Idxs[i + l] = i + l;
3700 for (
unsigned i = 0; i != 4; ++i)
3701 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3704 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3709 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3716 unsigned HalfLaneElts = NumLaneElts / 2;
3719 for (
unsigned i = 0; i != NumElts; ++i) {
3721 Idxs[i] = i - (i % NumLaneElts);
3723 if ((i % NumLaneElts) >= HalfLaneElts)
3727 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3730 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3734 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3735 Name.starts_with(
"avx512.mask.movshdup") ||
3736 Name.starts_with(
"avx512.mask.movsldup")) {
3742 if (Name.starts_with(
"avx512.mask.movshdup."))
3746 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3747 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3748 Idxs[i + l + 0] = i + l +
Offset;
3749 Idxs[i + l + 1] = i + l +
Offset;
3752 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3756 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3757 Name.starts_with(
"avx512.mask.unpckl.")) {
3764 for (
int l = 0; l != NumElts; l += NumLaneElts)
3765 for (
int i = 0; i != NumLaneElts; ++i)
3766 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3768 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3772 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3773 Name.starts_with(
"avx512.mask.unpckh.")) {
3780 for (
int l = 0; l != NumElts; l += NumLaneElts)
3781 for (
int i = 0; i != NumLaneElts; ++i)
3782 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3784 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3788 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3789 Name.starts_with(
"avx512.mask.pand.")) {
3792 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3794 Rep = Builder.CreateBitCast(Rep, FTy);
3797 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3798 Name.starts_with(
"avx512.mask.pandn.")) {
3801 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3802 Rep = Builder.CreateAnd(Rep,
3804 Rep = Builder.CreateBitCast(Rep, FTy);
3807 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3808 Name.starts_with(
"avx512.mask.por.")) {
3811 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3813 Rep = Builder.CreateBitCast(Rep, FTy);
3816 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3817 Name.starts_with(
"avx512.mask.pxor.")) {
3820 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3822 Rep = Builder.CreateBitCast(Rep, FTy);
3825 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3829 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3833 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3837 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3838 if (Name.ends_with(
".512")) {
3840 if (Name[17] ==
's')
3841 IID = Intrinsic::x86_avx512_add_ps_512;
3843 IID = Intrinsic::x86_avx512_add_pd_512;
3845 Rep = Builder.CreateIntrinsic(
3853 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3854 if (Name.ends_with(
".512")) {
3856 if (Name[17] ==
's')
3857 IID = Intrinsic::x86_avx512_div_ps_512;
3859 IID = Intrinsic::x86_avx512_div_pd_512;
3861 Rep = Builder.CreateIntrinsic(
3869 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3870 if (Name.ends_with(
".512")) {
3872 if (Name[17] ==
's')
3873 IID = Intrinsic::x86_avx512_mul_ps_512;
3875 IID = Intrinsic::x86_avx512_mul_pd_512;
3877 Rep = Builder.CreateIntrinsic(
3885 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3886 if (Name.ends_with(
".512")) {
3888 if (Name[17] ==
's')
3889 IID = Intrinsic::x86_avx512_sub_ps_512;
3891 IID = Intrinsic::x86_avx512_sub_pd_512;
3893 Rep = Builder.CreateIntrinsic(
3901 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3902 Name.starts_with(
"avx512.mask.min.p")) &&
3903 Name.drop_front(18) ==
".512") {
3904 bool IsDouble = Name[17] ==
'd';
3905 bool IsMin = Name[13] ==
'i';
3907 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3908 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3911 Rep = Builder.CreateIntrinsic(
3916 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3918 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3919 {CI->getArgOperand(0), Builder.getInt1(false)});
3922 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3923 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3924 bool IsVariable = Name[16] ==
'v';
3925 char Size = Name[16] ==
'.' ? Name[17]
3926 : Name[17] ==
'.' ? Name[18]
3927 : Name[18] ==
'.' ? Name[19]
3931 if (IsVariable && Name[17] !=
'.') {
3932 if (
Size ==
'd' && Name[17] ==
'2')
3933 IID = Intrinsic::x86_avx2_psllv_q;
3934 else if (
Size ==
'd' && Name[17] ==
'4')
3935 IID = Intrinsic::x86_avx2_psllv_q_256;
3936 else if (
Size ==
's' && Name[17] ==
'4')
3937 IID = Intrinsic::x86_avx2_psllv_d;
3938 else if (
Size ==
's' && Name[17] ==
'8')
3939 IID = Intrinsic::x86_avx2_psllv_d_256;
3940 else if (
Size ==
'h' && Name[17] ==
'8')
3941 IID = Intrinsic::x86_avx512_psllv_w_128;
3942 else if (
Size ==
'h' && Name[17] ==
'1')
3943 IID = Intrinsic::x86_avx512_psllv_w_256;
3944 else if (Name[17] ==
'3' && Name[18] ==
'2')
3945 IID = Intrinsic::x86_avx512_psllv_w_512;
3948 }
else if (Name.ends_with(
".128")) {
3950 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3951 : Intrinsic::x86_sse2_psll_d;
3952 else if (
Size ==
'q')
3953 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3954 : Intrinsic::x86_sse2_psll_q;
3955 else if (
Size ==
'w')
3956 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3957 : Intrinsic::x86_sse2_psll_w;
3960 }
else if (Name.ends_with(
".256")) {
3962 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3963 : Intrinsic::x86_avx2_psll_d;
3964 else if (
Size ==
'q')
3965 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3966 : Intrinsic::x86_avx2_psll_q;
3967 else if (
Size ==
'w')
3968 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3969 : Intrinsic::x86_avx2_psll_w;
3974 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3975 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3976 : Intrinsic::x86_avx512_psll_d_512;
3977 else if (
Size ==
'q')
3978 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3979 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3980 : Intrinsic::x86_avx512_psll_q_512;
3981 else if (
Size ==
'w')
3982 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3983 : Intrinsic::x86_avx512_psll_w_512;
3989 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3990 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3991 bool IsVariable = Name[16] ==
'v';
3992 char Size = Name[16] ==
'.' ? Name[17]
3993 : Name[17] ==
'.' ? Name[18]
3994 : Name[18] ==
'.' ? Name[19]
3998 if (IsVariable && Name[17] !=
'.') {
3999 if (
Size ==
'd' && Name[17] ==
'2')
4000 IID = Intrinsic::x86_avx2_psrlv_q;
4001 else if (
Size ==
'd' && Name[17] ==
'4')
4002 IID = Intrinsic::x86_avx2_psrlv_q_256;
4003 else if (
Size ==
's' && Name[17] ==
'4')
4004 IID = Intrinsic::x86_avx2_psrlv_d;
4005 else if (
Size ==
's' && Name[17] ==
'8')
4006 IID = Intrinsic::x86_avx2_psrlv_d_256;
4007 else if (
Size ==
'h' && Name[17] ==
'8')
4008 IID = Intrinsic::x86_avx512_psrlv_w_128;
4009 else if (
Size ==
'h' && Name[17] ==
'1')
4010 IID = Intrinsic::x86_avx512_psrlv_w_256;
4011 else if (Name[17] ==
'3' && Name[18] ==
'2')
4012 IID = Intrinsic::x86_avx512_psrlv_w_512;
4015 }
else if (Name.ends_with(
".128")) {
4017 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4018 : Intrinsic::x86_sse2_psrl_d;
4019 else if (
Size ==
'q')
4020 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4021 : Intrinsic::x86_sse2_psrl_q;
4022 else if (
Size ==
'w')
4023 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4024 : Intrinsic::x86_sse2_psrl_w;
4027 }
else if (Name.ends_with(
".256")) {
4029 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4030 : Intrinsic::x86_avx2_psrl_d;
4031 else if (
Size ==
'q')
4032 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4033 : Intrinsic::x86_avx2_psrl_q;
4034 else if (
Size ==
'w')
4035 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4036 : Intrinsic::x86_avx2_psrl_w;
4041 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4042 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4043 : Intrinsic::x86_avx512_psrl_d_512;
4044 else if (
Size ==
'q')
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4046 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4047 : Intrinsic::x86_avx512_psrl_q_512;
4048 else if (
Size ==
'w')
4049 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4050 : Intrinsic::x86_avx512_psrl_w_512;
4056 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4057 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4058 bool IsVariable = Name[16] ==
'v';
4059 char Size = Name[16] ==
'.' ? Name[17]
4060 : Name[17] ==
'.' ? Name[18]
4061 : Name[18] ==
'.' ? Name[19]
4065 if (IsVariable && Name[17] !=
'.') {
4066 if (
Size ==
's' && Name[17] ==
'4')
4067 IID = Intrinsic::x86_avx2_psrav_d;
4068 else if (
Size ==
's' && Name[17] ==
'8')
4069 IID = Intrinsic::x86_avx2_psrav_d_256;
4070 else if (
Size ==
'h' && Name[17] ==
'8')
4071 IID = Intrinsic::x86_avx512_psrav_w_128;
4072 else if (
Size ==
'h' && Name[17] ==
'1')
4073 IID = Intrinsic::x86_avx512_psrav_w_256;
4074 else if (Name[17] ==
'3' && Name[18] ==
'2')
4075 IID = Intrinsic::x86_avx512_psrav_w_512;
4078 }
else if (Name.ends_with(
".128")) {
4080 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4081 : Intrinsic::x86_sse2_psra_d;
4082 else if (
Size ==
'q')
4083 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4084 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4085 : Intrinsic::x86_avx512_psra_q_128;
4086 else if (
Size ==
'w')
4087 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4088 : Intrinsic::x86_sse2_psra_w;
4091 }
else if (Name.ends_with(
".256")) {
4093 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4094 : Intrinsic::x86_avx2_psra_d;
4095 else if (
Size ==
'q')
4096 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4097 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4098 : Intrinsic::x86_avx512_psra_q_256;
4099 else if (
Size ==
'w')
4100 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4101 : Intrinsic::x86_avx2_psra_w;
4106 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4107 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4108 : Intrinsic::x86_avx512_psra_d_512;
4109 else if (
Size ==
'q')
4110 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4111 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4112 : Intrinsic::x86_avx512_psra_q_512;
4113 else if (
Size ==
'w')
4114 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4115 : Intrinsic::x86_avx512_psra_w_512;
4121 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4123 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4125 }
else if (Name.ends_with(
".movntdqa")) {
4129 LoadInst *LI = Builder.CreateAlignedLoad(
4134 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4135 Name.starts_with(
"fma.vfmsub.") ||
4136 Name.starts_with(
"fma.vfnmadd.") ||
4137 Name.starts_with(
"fma.vfnmsub.")) {
4138 bool NegMul = Name[6] ==
'n';
4139 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4140 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4151 if (NegMul && !IsScalar)
4152 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4153 if (NegMul && IsScalar)
4154 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4156 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4158 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4162 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4170 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4174 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4175 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4176 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4177 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4178 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4179 bool IsMask3 = Name[11] ==
'3';
4180 bool IsMaskZ = Name[11] ==
'z';
4182 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4183 bool NegMul = Name[2] ==
'n';
4184 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4190 if (NegMul && (IsMask3 || IsMaskZ))
4191 A = Builder.CreateFNeg(
A);
4192 if (NegMul && !(IsMask3 || IsMaskZ))
4193 B = Builder.CreateFNeg(
B);
4195 C = Builder.CreateFNeg(
C);
4197 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4198 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4199 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4206 if (Name.back() ==
'd')
4207 IID = Intrinsic::x86_avx512_vfmadd_f64;
4209 IID = Intrinsic::x86_avx512_vfmadd_f32;
4210 Rep = Builder.CreateIntrinsic(IID,
Ops);
4212 Rep = Builder.CreateFMA(
A,
B,
C);
4221 if (NegAcc && IsMask3)
4226 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4228 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4229 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4230 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4231 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4232 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4233 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4234 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4235 bool IsMask3 = Name[11] ==
'3';
4236 bool IsMaskZ = Name[11] ==
'z';
4238 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4239 bool NegMul = Name[2] ==
'n';
4240 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4246 if (NegMul && (IsMask3 || IsMaskZ))
4247 A = Builder.CreateFNeg(
A);
4248 if (NegMul && !(IsMask3 || IsMaskZ))
4249 B = Builder.CreateFNeg(
B);
4251 C = Builder.CreateFNeg(
C);
4258 if (Name[Name.size() - 5] ==
's')
4259 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4261 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4265 Rep = Builder.CreateFMA(
A,
B,
C);
4273 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4277 if (VecWidth == 128 && EltWidth == 32)
4278 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4279 else if (VecWidth == 256 && EltWidth == 32)
4280 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4281 else if (VecWidth == 128 && EltWidth == 64)
4282 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4283 else if (VecWidth == 256 && EltWidth == 64)
4284 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4290 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4291 Rep = Builder.CreateIntrinsic(IID,
Ops);
4292 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4293 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4294 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4295 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4296 bool IsMask3 = Name[11] ==
'3';
4297 bool IsMaskZ = Name[11] ==
'z';
4299 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4300 bool IsSubAdd = Name[3] ==
's';
4304 if (Name[Name.size() - 5] ==
's')
4305 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4307 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4312 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4314 Rep = Builder.CreateIntrinsic(IID,
Ops);
4323 Value *Odd = Builder.CreateCall(FMA,
Ops);
4324 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4325 Value *Even = Builder.CreateCall(FMA,
Ops);
4331 for (
int i = 0; i != NumElts; ++i)
4332 Idxs[i] = i + (i % 2) * NumElts;
4334 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4342 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4343 Name.starts_with(
"avx512.maskz.pternlog.")) {
4344 bool ZeroMask = Name[11] ==
'z';
4348 if (VecWidth == 128 && EltWidth == 32)
4349 IID = Intrinsic::x86_avx512_pternlog_d_128;
4350 else if (VecWidth == 256 && EltWidth == 32)
4351 IID = Intrinsic::x86_avx512_pternlog_d_256;
4352 else if (VecWidth == 512 && EltWidth == 32)
4353 IID = Intrinsic::x86_avx512_pternlog_d_512;
4354 else if (VecWidth == 128 && EltWidth == 64)
4355 IID = Intrinsic::x86_avx512_pternlog_q_128;
4356 else if (VecWidth == 256 && EltWidth == 64)
4357 IID = Intrinsic::x86_avx512_pternlog_q_256;
4358 else if (VecWidth == 512 && EltWidth == 64)
4359 IID = Intrinsic::x86_avx512_pternlog_q_512;
4365 Rep = Builder.CreateIntrinsic(IID, Args);
4369 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4370 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4371 bool ZeroMask = Name[11] ==
'z';
4372 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4375 if (VecWidth == 128 && !
High)
4376 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4377 else if (VecWidth == 256 && !
High)
4378 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4379 else if (VecWidth == 512 && !
High)
4380 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4381 else if (VecWidth == 128 &&
High)
4382 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4383 else if (VecWidth == 256 &&
High)
4384 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4385 else if (VecWidth == 512 &&
High)
4386 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4392 Rep = Builder.CreateIntrinsic(IID, Args);
4396 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4397 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4398 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4399 bool ZeroMask = Name[11] ==
'z';
4400 bool IndexForm = Name[17] ==
'i';
4402 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4403 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4404 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4405 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4406 bool ZeroMask = Name[11] ==
'z';
4407 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4410 if (VecWidth == 128 && !IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4412 else if (VecWidth == 256 && !IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4414 else if (VecWidth == 512 && !IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4416 else if (VecWidth == 128 && IsSaturating)
4417 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4418 else if (VecWidth == 256 && IsSaturating)
4419 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4420 else if (VecWidth == 512 && IsSaturating)
4421 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4431 if (Args[1]->
getType()->isVectorTy() &&
4434 ->isIntegerTy(32) &&
4435 Args[2]->
getType()->isVectorTy() &&
4438 ->isIntegerTy(32)) {
4439 Type *NewArgType =
nullptr;
4440 if (VecWidth == 128)
4442 else if (VecWidth == 256)
4444 else if (VecWidth == 512)
4450 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4451 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4454 Rep = Builder.CreateIntrinsic(IID, Args);
4458 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4459 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4460 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4461 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4462 bool ZeroMask = Name[11] ==
'z';
4463 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4466 if (VecWidth == 128 && !IsSaturating)
4467 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4468 else if (VecWidth == 256 && !IsSaturating)
4469 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4470 else if (VecWidth == 512 && !IsSaturating)
4471 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4472 else if (VecWidth == 128 && IsSaturating)
4473 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4474 else if (VecWidth == 256 && IsSaturating)
4475 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4476 else if (VecWidth == 512 && IsSaturating)
4477 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4487 if (Args[1]->
getType()->isVectorTy() &&
4490 ->isIntegerTy(32) &&
4491 Args[2]->
getType()->isVectorTy() &&
4494 ->isIntegerTy(32)) {
4495 Type *NewArgType =
nullptr;
4496 if (VecWidth == 128)
4498 else if (VecWidth == 256)
4500 else if (VecWidth == 512)
4506 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4507 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4510 Rep = Builder.CreateIntrinsic(IID, Args);
4514 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4515 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4516 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4518 if (Name[0] ==
'a' && Name.back() ==
'2')
4519 IID = Intrinsic::x86_addcarry_32;
4520 else if (Name[0] ==
'a' && Name.back() ==
'4')
4521 IID = Intrinsic::x86_addcarry_64;
4522 else if (Name[0] ==
's' && Name.back() ==
'2')
4523 IID = Intrinsic::x86_subborrow_32;
4524 else if (Name[0] ==
's' && Name.back() ==
'4')
4525 IID = Intrinsic::x86_subborrow_64;
4532 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4535 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4538 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4542 }
else if (Name.starts_with(
"avx512.mask.") &&
4553 if (Name.starts_with(
"neon.bfcvt")) {
4554 if (Name.starts_with(
"neon.bfcvtn2")) {
4556 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4558 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4559 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4562 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4563 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4565 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4569 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4570 return Builder.CreateShuffleVector(
4573 return Builder.CreateFPTrunc(CI->
getOperand(0),
4576 }
else if (Name.starts_with(
"sve.fcvt")) {
4579 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4580 .
Case(
"sve.fcvtnt.bf16f32",
4581 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4593 if (Args[1]->
getType() != BadPredTy)
4596 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4597 BadPredTy, Args[1]);
4598 Args[1] = Builder.CreateIntrinsic(
4599 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4601 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4610 if (Name ==
"mve.vctp64.old") {
4613 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4616 Value *C1 = Builder.CreateIntrinsic(
4617 Intrinsic::arm_mve_pred_v2i,
4619 return Builder.CreateIntrinsic(
4620 Intrinsic::arm_mve_pred_i2v,
4622 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4623 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4624 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4625 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4627 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4628 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4629 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4630 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4632 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4633 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4634 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4635 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4636 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4637 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4638 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4639 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4640 std::vector<Type *> Tys;
4644 case Intrinsic::arm_mve_mull_int_predicated:
4645 case Intrinsic::arm_mve_vqdmull_predicated:
4646 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4649 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4650 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4651 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4655 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4659 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4663 case Intrinsic::arm_cde_vcx1q_predicated:
4664 case Intrinsic::arm_cde_vcx1qa_predicated:
4665 case Intrinsic::arm_cde_vcx2q_predicated:
4666 case Intrinsic::arm_cde_vcx2qa_predicated:
4667 case Intrinsic::arm_cde_vcx3q_predicated:
4668 case Intrinsic::arm_cde_vcx3qa_predicated:
4675 std::vector<Value *>
Ops;
4677 Type *Ty =
Op->getType();
4678 if (Ty->getScalarSizeInBits() == 1) {
4679 Value *C1 = Builder.CreateIntrinsic(
4680 Intrinsic::arm_mve_pred_v2i,
4682 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4687 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4702 auto UpgradeLegacyWMMAIUIntrinsicCall =
4707 Args.push_back(Builder.getFalse());
4711 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4718 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4723 NewCall->copyMetadata(*CI);
4727 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4728 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4729 "intrinsic should have 7 arguments");
4732 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4734 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4735 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4736 "intrinsic should have 8 arguments");
4741 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4744 switch (
F->getIntrinsicID()) {
4747 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4748 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4749 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4750 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4751 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4752 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4767 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4770 F->getParent(),
F->getIntrinsicID(), Overloads);
4775 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4780 NewCall->copyMetadata(*CI);
4781 NewCall->takeName(CI);
4803 if (NumOperands < 3)
4816 bool IsVolatile =
false;
4820 if (NumOperands > 3)
4825 if (NumOperands > 5) {
4827 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4841 if (VT->getElementType()->isIntegerTy(16)) {
4844 Val = Builder.CreateBitCast(Val, AsBF16);
4852 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4854 unsigned AddrSpace = PtrTy->getAddressSpace();
4857 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4859 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4864 MDNode *RangeNotPrivate =
4867 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4873 return Builder.CreateBitCast(RMW, RetTy);
4894 return MAV->getMetadata();
4901 return I->getDebugLoc().getAsMDNode();
4909 if (Name ==
"label") {
4912 }
else if (Name ==
"assign") {
4919 }
else if (Name ==
"declare") {
4924 }
else if (Name ==
"addr") {
4934 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4936 }
else if (Name ==
"value") {
4939 unsigned ExprOp = 2;
4953 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4961 int64_t OffsetVal =
Offset->getSExtValue();
4962 return Builder.CreateIntrinsic(OffsetVal >= 0
4963 ? Intrinsic::vector_splice_left
4964 : Intrinsic::vector_splice_right,
4966 {CI->getArgOperand(0), CI->getArgOperand(1),
4967 Builder.getInt32(std::abs(OffsetVal))});
4972 if (Name.starts_with(
"to.fp16")) {
4974 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4975 return Builder.CreateBitCast(Cast, CI->
getType());
4978 if (Name.starts_with(
"from.fp16")) {
4980 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4981 return Builder.CreateFPExt(Cast, CI->
getType());
5006 if (!Name.consume_front(
"llvm."))
5009 bool IsX86 = Name.consume_front(
"x86.");
5010 bool IsNVVM = Name.consume_front(
"nvvm.");
5011 bool IsAArch64 = Name.consume_front(
"aarch64.");
5012 bool IsARM = Name.consume_front(
"arm.");
5013 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5014 bool IsDbg = Name.consume_front(
"dbg.");
5016 (Name.consume_front(
"experimental.vector.splice") ||
5017 Name.consume_front(
"vector.splice")) &&
5018 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5019 Value *Rep =
nullptr;
5021 if (!IsX86 && Name ==
"stackprotectorcheck") {
5023 }
else if (IsNVVM) {
5027 }
else if (IsAArch64) {
5031 }
else if (IsAMDGCN) {
5035 }
else if (IsOldSplice) {
5037 }
else if (Name.consume_front(
"convert.")) {
5049 const auto &DefaultCase = [&]() ->
void {
5057 "Unknown function for CallBase upgrade and isn't just a name change");
5065 "Return type must have changed");
5066 assert(OldST->getNumElements() ==
5068 "Must have same number of elements");
5071 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5074 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5075 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5076 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5095 case Intrinsic::arm_neon_vst1:
5096 case Intrinsic::arm_neon_vst2:
5097 case Intrinsic::arm_neon_vst3:
5098 case Intrinsic::arm_neon_vst4:
5099 case Intrinsic::arm_neon_vst2lane:
5100 case Intrinsic::arm_neon_vst3lane:
5101 case Intrinsic::arm_neon_vst4lane: {
5103 NewCall = Builder.CreateCall(NewFn, Args);
5106 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5107 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5108 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5113 NewCall = Builder.CreateCall(NewFn, Args);
5116 case Intrinsic::aarch64_sve_ld3_sret:
5117 case Intrinsic::aarch64_sve_ld4_sret:
5118 case Intrinsic::aarch64_sve_ld2_sret: {
5126 Name = Name.substr(5);
5133 unsigned MinElts = RetTy->getMinNumElements() /
N;
5135 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5137 for (
unsigned I = 0;
I <
N;
I++) {
5138 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5139 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5145 case Intrinsic::coro_end: {
5148 NewCall = Builder.CreateCall(NewFn, Args);
5152 case Intrinsic::vector_extract: {
5154 Name = Name.substr(5);
5155 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5160 unsigned MinElts = RetTy->getMinNumElements();
5163 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5167 case Intrinsic::vector_insert: {
5169 Name = Name.substr(5);
5170 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5174 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5179 NewCall = Builder.CreateCall(
5183 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5189 assert(
N > 1 &&
"Create is expected to be between 2-4");
5192 unsigned MinElts = RetTy->getMinNumElements() /
N;
5193 for (
unsigned I = 0;
I <
N;
I++) {
5195 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5202 case Intrinsic::arm_neon_bfdot:
5203 case Intrinsic::arm_neon_bfmmla:
5204 case Intrinsic::arm_neon_bfmlalb:
5205 case Intrinsic::arm_neon_bfmlalt:
5206 case Intrinsic::aarch64_neon_bfdot:
5207 case Intrinsic::aarch64_neon_bfmmla:
5208 case Intrinsic::aarch64_neon_bfmlalb:
5209 case Intrinsic::aarch64_neon_bfmlalt: {
5212 "Mismatch between function args and call args");
5213 size_t OperandWidth =
5215 assert((OperandWidth == 64 || OperandWidth == 128) &&
5216 "Unexpected operand width");
5218 auto Iter = CI->
args().begin();
5219 Args.push_back(*Iter++);
5220 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5221 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5222 NewCall = Builder.CreateCall(NewFn, Args);
5226 case Intrinsic::bitreverse:
5227 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5230 case Intrinsic::ctlz:
5231 case Intrinsic::cttz: {
5238 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5242 case Intrinsic::objectsize: {
5243 Value *NullIsUnknownSize =
5247 NewCall = Builder.CreateCall(
5252 case Intrinsic::ctpop:
5253 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5255 case Intrinsic::dbg_value: {
5257 Name = Name.substr(5);
5259 if (Name.starts_with(
"dbg.addr")) {
5273 if (
Offset->isNullValue()) {
5274 NewCall = Builder.CreateCall(
5283 case Intrinsic::ptr_annotation:
5291 NewCall = Builder.CreateCall(
5300 case Intrinsic::var_annotation:
5307 NewCall = Builder.CreateCall(
5316 case Intrinsic::riscv_aes32dsi:
5317 case Intrinsic::riscv_aes32dsmi:
5318 case Intrinsic::riscv_aes32esi:
5319 case Intrinsic::riscv_aes32esmi:
5320 case Intrinsic::riscv_sm4ks:
5321 case Intrinsic::riscv_sm4ed: {
5331 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5332 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5338 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5339 Value *Res = NewCall;
5341 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5347 case Intrinsic::nvvm_mapa_shared_cluster: {
5351 Value *Res = NewCall;
5352 Res = Builder.CreateAddrSpaceCast(
5359 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5360 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5363 Args[0] = Builder.CreateAddrSpaceCast(
5366 NewCall = Builder.CreateCall(NewFn, Args);
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5374 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5375 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5376 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5377 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5378 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5379 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5386 Args[0] = Builder.CreateAddrSpaceCast(
5395 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5397 NewCall = Builder.CreateCall(NewFn, Args);
5403 case Intrinsic::riscv_sha256sig0:
5404 case Intrinsic::riscv_sha256sig1:
5405 case Intrinsic::riscv_sha256sum0:
5406 case Intrinsic::riscv_sha256sum1:
5407 case Intrinsic::riscv_sm3p0:
5408 case Intrinsic::riscv_sm3p1: {
5415 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5417 NewCall = Builder.CreateCall(NewFn, Arg);
5419 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5426 case Intrinsic::x86_xop_vfrcz_ss:
5427 case Intrinsic::x86_xop_vfrcz_sd:
5428 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5431 case Intrinsic::x86_xop_vpermil2pd:
5432 case Intrinsic::x86_xop_vpermil2ps:
5433 case Intrinsic::x86_xop_vpermil2pd_256:
5434 case Intrinsic::x86_xop_vpermil2ps_256: {
5438 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5439 NewCall = Builder.CreateCall(NewFn, Args);
5443 case Intrinsic::x86_sse41_ptestc:
5444 case Intrinsic::x86_sse41_ptestz:
5445 case Intrinsic::x86_sse41_ptestnzc: {
5459 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5460 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5462 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5466 case Intrinsic::x86_rdtscp: {
5472 NewCall = Builder.CreateCall(NewFn);
5474 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5477 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5485 case Intrinsic::x86_sse41_insertps:
5486 case Intrinsic::x86_sse41_dppd:
5487 case Intrinsic::x86_sse41_dpps:
5488 case Intrinsic::x86_sse41_mpsadbw:
5489 case Intrinsic::x86_avx_dp_ps_256:
5490 case Intrinsic::x86_avx2_mpsadbw: {
5496 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5497 NewCall = Builder.CreateCall(NewFn, Args);
5501 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5502 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5503 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5504 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5505 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5506 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5512 NewCall = Builder.CreateCall(NewFn, Args);
5521 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5522 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5523 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5524 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5525 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5526 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5530 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5531 Args[1] = Builder.CreateBitCast(
5534 NewCall = Builder.CreateCall(NewFn, Args);
5535 Value *Res = Builder.CreateBitCast(
5543 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5544 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5545 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5549 Args[1] = Builder.CreateBitCast(
5551 Args[2] = Builder.CreateBitCast(
5554 NewCall = Builder.CreateCall(NewFn, Args);
5558 case Intrinsic::thread_pointer: {
5559 NewCall = Builder.CreateCall(NewFn, {});
5563 case Intrinsic::memcpy:
5564 case Intrinsic::memmove:
5565 case Intrinsic::memset: {
5581 NewCall = Builder.CreateCall(NewFn, Args);
5583 AttributeList NewAttrs = AttributeList::get(
5584 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5585 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5586 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5591 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5594 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5598 case Intrinsic::masked_load:
5599 case Intrinsic::masked_gather:
5600 case Intrinsic::masked_store:
5601 case Intrinsic::masked_scatter: {
5607 auto GetMaybeAlign = [](
Value *
Op) {
5617 auto GetAlign = [&](
Value *
Op) {
5626 case Intrinsic::masked_load:
5627 NewCall = Builder.CreateMaskedLoad(
5631 case Intrinsic::masked_gather:
5632 NewCall = Builder.CreateMaskedGather(
5638 case Intrinsic::masked_store:
5639 NewCall = Builder.CreateMaskedStore(
5643 case Intrinsic::masked_scatter:
5644 NewCall = Builder.CreateMaskedScatter(
5646 DL.getValueOrABITypeAlignment(
5660 case Intrinsic::lifetime_start:
5661 case Intrinsic::lifetime_end: {
5673 NewCall = Builder.CreateLifetimeStart(Ptr);
5675 NewCall = Builder.CreateLifetimeEnd(Ptr);
5684 case Intrinsic::x86_avx512_vpdpbusd_128:
5685 case Intrinsic::x86_avx512_vpdpbusd_256:
5686 case Intrinsic::x86_avx512_vpdpbusd_512:
5687 case Intrinsic::x86_avx512_vpdpbusds_128:
5688 case Intrinsic::x86_avx512_vpdpbusds_256:
5689 case Intrinsic::x86_avx512_vpdpbusds_512:
5690 case Intrinsic::x86_avx2_vpdpbssd_128:
5691 case Intrinsic::x86_avx2_vpdpbssd_256:
5692 case Intrinsic::x86_avx10_vpdpbssd_512:
5693 case Intrinsic::x86_avx2_vpdpbssds_128:
5694 case Intrinsic::x86_avx2_vpdpbssds_256:
5695 case Intrinsic::x86_avx10_vpdpbssds_512:
5696 case Intrinsic::x86_avx2_vpdpbsud_128:
5697 case Intrinsic::x86_avx2_vpdpbsud_256:
5698 case Intrinsic::x86_avx10_vpdpbsud_512:
5699 case Intrinsic::x86_avx2_vpdpbsuds_128:
5700 case Intrinsic::x86_avx2_vpdpbsuds_256:
5701 case Intrinsic::x86_avx10_vpdpbsuds_512:
5702 case Intrinsic::x86_avx2_vpdpbuud_128:
5703 case Intrinsic::x86_avx2_vpdpbuud_256:
5704 case Intrinsic::x86_avx10_vpdpbuud_512:
5705 case Intrinsic::x86_avx2_vpdpbuuds_128:
5706 case Intrinsic::x86_avx2_vpdpbuuds_256:
5707 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5712 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5713 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5715 NewCall = Builder.CreateCall(NewFn, Args);
5718 case Intrinsic::x86_avx512_vpdpwssd_128:
5719 case Intrinsic::x86_avx512_vpdpwssd_256:
5720 case Intrinsic::x86_avx512_vpdpwssd_512:
5721 case Intrinsic::x86_avx512_vpdpwssds_128:
5722 case Intrinsic::x86_avx512_vpdpwssds_256:
5723 case Intrinsic::x86_avx512_vpdpwssds_512:
5724 case Intrinsic::x86_avx2_vpdpwsud_128:
5725 case Intrinsic::x86_avx2_vpdpwsud_256:
5726 case Intrinsic::x86_avx10_vpdpwsud_512:
5727 case Intrinsic::x86_avx2_vpdpwsuds_128:
5728 case Intrinsic::x86_avx2_vpdpwsuds_256:
5729 case Intrinsic::x86_avx10_vpdpwsuds_512:
5730 case Intrinsic::x86_avx2_vpdpwusd_128:
5731 case Intrinsic::x86_avx2_vpdpwusd_256:
5732 case Intrinsic::x86_avx10_vpdpwusd_512:
5733 case Intrinsic::x86_avx2_vpdpwusds_128:
5734 case Intrinsic::x86_avx2_vpdpwusds_256:
5735 case Intrinsic::x86_avx10_vpdpwusds_512:
5736 case Intrinsic::x86_avx2_vpdpwuud_128:
5737 case Intrinsic::x86_avx2_vpdpwuud_256:
5738 case Intrinsic::x86_avx10_vpdpwuud_512:
5739 case Intrinsic::x86_avx2_vpdpwuuds_128:
5740 case Intrinsic::x86_avx2_vpdpwuuds_256:
5741 case Intrinsic::x86_avx10_vpdpwuuds_512:
5746 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5747 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5749 NewCall = Builder.CreateCall(NewFn, Args);
5752 assert(NewCall &&
"Should have either set this variable or returned through "
5753 "the default case");
5760 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5774 F->eraseFromParent();
5780 if (NumOperands == 0)
5788 if (NumOperands == 3) {
5792 Metadata *Elts2[] = {ScalarType, ScalarType,
5806 if (
Opc != Instruction::BitCast)
5810 Type *SrcTy = V->getType();
5827 if (
Opc != Instruction::BitCast)
5830 Type *SrcTy =
C->getType();
5857 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5858 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5859 if (Flag->getNumOperands() < 3)
5861 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5862 return K->getString() ==
"Debug Info Version";
5865 if (OpIt != ModFlags->op_end()) {
5866 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5873 bool BrokenDebugInfo =
false;
5876 if (!BrokenDebugInfo)
5882 M.getContext().diagnose(Diag);
5889 M.getContext().diagnose(DiagVersion);
5899 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5902 if (
F->hasFnAttribute(Attr)) {
5905 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5907 auto [Part, Rest] = S.
split(
',');
5913 const unsigned Dim = DimC -
'x';
5914 assert(Dim < 3 &&
"Unexpected dim char");
5924 F->addFnAttr(Attr, NewAttr);
5928 return S ==
"x" || S ==
"y" || S ==
"z";
5933 if (K ==
"kernel") {
5945 const unsigned Idx = (AlignIdxValuePair >> 16);
5946 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5951 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5956 if (K ==
"minctasm") {
5961 if (K ==
"maxnreg") {
5966 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5970 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5974 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5978 if (K ==
"grid_constant") {
5993 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
6000 if (!SeenNodes.
insert(MD).second)
6007 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6014 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6016 const MDOperand &V = MD->getOperand(j + 1);
6019 NewOperands.
append({K, V});
6022 if (NewOperands.
size() > 1)
6035 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6036 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6037 if (ModRetainReleaseMarker) {
6043 ID->getString().split(ValueComp,
"#");
6044 if (ValueComp.
size() == 2) {
6045 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6049 M.eraseNamedMetadata(ModRetainReleaseMarker);
6060 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6086 bool InvalidCast =
false;
6088 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6101 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6103 Args.push_back(Arg);
6110 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6115 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6128 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6136 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6137 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6138 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6139 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6140 {
"objc_autoreleaseReturnValue",
6141 llvm::Intrinsic::objc_autoreleaseReturnValue},
6142 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6143 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6144 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6145 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6146 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6147 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6148 {
"objc_release", llvm::Intrinsic::objc_release},
6149 {
"objc_retain", llvm::Intrinsic::objc_retain},
6150 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6151 {
"objc_retainAutoreleaseReturnValue",
6152 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6153 {
"objc_retainAutoreleasedReturnValue",
6154 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6155 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6156 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6157 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6158 {
"objc_unsafeClaimAutoreleasedReturnValue",
6159 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6160 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6161 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6162 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6163 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6164 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6165 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6166 {
"objc_arc_annotation_topdown_bbstart",
6167 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6168 {
"objc_arc_annotation_topdown_bbend",
6169 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6170 {
"objc_arc_annotation_bottomup_bbstart",
6171 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6172 {
"objc_arc_annotation_bottomup_bbend",
6173 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6175 for (
auto &
I : RuntimeFuncs)
6176 UpgradeToIntrinsic(
I.first,
I.second);
6180 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6184 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6185 bool HasSwiftVersionFlag =
false;
6186 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6193 if (
Op->getNumOperands() != 3)
6207 if (
ID->getString() ==
"Objective-C Image Info Version")
6209 if (
ID->getString() ==
"Objective-C Class Properties")
6210 HasClassProperties =
true;
6212 if (
ID->getString() ==
"PIC Level") {
6213 if (
auto *Behavior =
6215 uint64_t V = Behavior->getLimitedValue();
6221 if (
ID->getString() ==
"PIE Level")
6222 if (
auto *Behavior =
6229 if (
ID->getString() ==
"branch-target-enforcement" ||
6230 ID->getString().starts_with(
"sign-return-address")) {
6231 if (
auto *Behavior =
6237 Op->getOperand(1),
Op->getOperand(2)};
6247 if (
ID->getString() ==
"Objective-C Image Info Section") {
6250 Value->getString().split(ValueComp,
" ");
6251 if (ValueComp.
size() != 1) {
6252 std::string NewValue;
6253 for (
auto &S : ValueComp)
6254 NewValue += S.str();
6265 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6268 assert(Md->getValue() &&
"Expected non-empty metadata");
6269 auto Type = Md->getValue()->getType();
6272 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6273 if ((Val & 0xff) != Val) {
6274 HasSwiftVersionFlag =
true;
6275 SwiftABIVersion = (Val & 0xff00) >> 8;
6276 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6277 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6288 if (
ID->getString() ==
"amdgpu_code_object_version") {
6291 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6303 if (HasObjCFlag && !HasClassProperties) {
6309 if (HasSwiftVersionFlag) {
6313 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6315 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6323 auto TrimSpaces = [](
StringRef Section) -> std::string {
6325 Section.split(Components,
',');
6330 for (
auto Component : Components)
6331 OS <<
',' << Component.trim();
6336 for (
auto &GV : M.globals()) {
6337 if (!GV.hasSection())
6342 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6347 GV.setSection(TrimSpaces(Section));
6363struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6364 StrictFPUpgradeVisitor() =
default;
6367 if (!
Call.isStrictFP())
6373 Call.removeFnAttr(Attribute::StrictFP);
6374 Call.addFnAttr(Attribute::NoBuiltin);
6379struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6380 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6381 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6383 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6398 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6399 StrictFPUpgradeVisitor SFPV;
6404 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6405 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6406 for (
auto &Arg :
F.args())
6408 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6410 bool AddingAttrs =
false, RemovingAttrs =
false;
6411 AttrBuilder AttrsToAdd(
F.getContext());
6416 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6417 A.isValid() &&
A.isStringAttribute()) {
6418 F.setSection(
A.getValueAsString());
6420 RemovingAttrs =
true;
6424 A.isValid() &&
A.isStringAttribute()) {
6427 AddingAttrs = RemovingAttrs =
true;
6430 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6431 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6433 RemovingAttrs =
true;
6434 if (
A.getValueAsString() ==
"true") {
6435 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6444 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6447 if (
A.getValueAsBool()) {
6448 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6454 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6455 RemovingAttrs =
true;
6462 bool HandleDenormalMode =
false;
6464 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6467 DenormalFPMath = ParsedMode;
6469 AddingAttrs = RemovingAttrs =
true;
6470 HandleDenormalMode =
true;
6474 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6478 DenormalFPMathF32 = ParsedMode;
6480 AddingAttrs = RemovingAttrs =
true;
6481 HandleDenormalMode =
true;
6485 if (HandleDenormalMode)
6486 AttrsToAdd.addDenormalFPEnvAttr(
6490 F.removeFnAttrs(AttrsToRemove);
6493 F.addFnAttrs(AttrsToAdd);
6499 if (!
F.hasFnAttribute(FnAttrName))
6500 F.addFnAttr(FnAttrName,
Value);
6507 if (!
F.hasFnAttribute(FnAttrName)) {
6509 F.addFnAttr(FnAttrName);
6511 auto A =
F.getFnAttribute(FnAttrName);
6512 if (
"false" ==
A.getValueAsString())
6513 F.removeFnAttr(FnAttrName);
6514 else if (
"true" ==
A.getValueAsString()) {
6515 F.removeFnAttr(FnAttrName);
6516 F.addFnAttr(FnAttrName);
6522 Triple T(M.getTargetTriple());
6523 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6533 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6537 if (
Op->getNumOperands() != 3)
6546 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6547 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6548 : IDStr ==
"guarded-control-stack" ? &GCSValue
6549 : IDStr ==
"sign-return-address" ? &SRAValue
6550 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6551 : IDStr ==
"sign-return-address-with-bkey"
6557 *ValPtr = CI->getZExtValue();
6563 bool BTE = BTEValue == 1;
6564 bool BPPLR = BPPLRValue == 1;
6565 bool GCS = GCSValue == 1;
6566 bool SRA = SRAValue == 1;
6569 if (SRA && SRAALLValue == 1)
6570 SignTypeValue =
"all";
6573 if (SRA && SRABKeyValue == 1)
6574 SignKeyValue =
"b_key";
6576 for (
Function &
F : M.getFunctionList()) {
6577 if (
F.isDeclaration())
6584 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6585 A.isValid() &&
"none" ==
A.getValueAsString()) {
6586 F.removeFnAttr(
"sign-return-address");
6587 F.removeFnAttr(
"sign-return-address-key");
6603 if (SRAALLValue == 1)
6605 if (SRABKeyValue == 1)
6614 if (
T->getNumOperands() < 1)
6619 return S->getString().starts_with(
"llvm.vectorizer.");
6623 StringRef OldPrefix =
"llvm.vectorizer.";
6626 if (OldTag ==
"llvm.vectorizer.unroll")
6638 if (
T->getNumOperands() < 1)
6643 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6648 Ops.reserve(
T->getNumOperands());
6650 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6651 Ops.push_back(
T->getOperand(
I));
6665 Ops.reserve(
T->getNumOperands());
6676 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6677 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6678 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6681 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6683 auto I =
DL.find(
"-n64-");
6685 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6690 std::string Res =
DL.str();
6693 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6694 Res.append(Res.empty() ?
"G1" :
"-G1");
6702 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6703 Res.append(
"-ni:7:8:9");
6705 if (
DL.ends_with(
"ni:7"))
6707 if (
DL.ends_with(
"ni:7:8"))
6712 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6713 Res.append(
"-p7:160:256:256:32");
6714 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6715 Res.append(
"-p8:128:128:128:48");
6716 constexpr StringRef OldP8(
"-p8:128:128-");
6717 if (
DL.contains(OldP8))
6718 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6719 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6720 Res.append(
"-p9:192:256:256:32");
6724 if (!
DL.contains(
"m:e"))
6725 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6730 if (
T.isSystemZ() && !
DL.empty()) {
6732 if (!
DL.contains(
"-S64"))
6733 return "E-S64" +
DL.drop_front(1).str();
6737 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6740 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6741 if (!
DL.contains(AddrSpaces)) {
6743 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6744 if (R.match(Res, &
Groups))
6750 if (
T.isAArch64()) {
6752 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6753 Res.append(
"-Fn32");
6754 AddPtr32Ptr64AddrSpaces();
6758 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6762 std::string I64 =
"-i64:64";
6763 std::string I128 =
"-i128:128";
6765 size_t Pos = Res.find(I64);
6766 if (Pos !=
size_t(-1))
6767 Res.insert(Pos + I64.size(), I128);
6771 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6772 size_t Pos = Res.find(
"-S128");
6775 Res.insert(Pos,
"-f64:32:64");
6781 AddPtr32Ptr64AddrSpaces();
6789 if (!
T.isOSIAMCU()) {
6790 std::string I128 =
"-i128:128";
6793 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6794 if (R.match(Res, &
Groups))
6802 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6804 auto I =
Ref.find(
"-f80:32-");
6806 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6814 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6817 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6818 B.removeAttribute(
"no-frame-pointer-elim");
6820 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6822 if (FramePointer !=
"all")
6823 FramePointer =
"non-leaf";
6824 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6826 if (!FramePointer.
empty())
6827 B.addAttribute(
"frame-pointer", FramePointer);
6829 A =
B.getAttribute(
"null-pointer-is-valid");
6832 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6833 B.removeAttribute(
"null-pointer-is-valid");
6834 if (NullPointerIsValid)
6835 B.addAttribute(Attribute::NullPointerIsValid);
6838 A =
B.getAttribute(
"uniform-work-group-size");
6842 bool IsTrue = Val ==
"true";
6843 B.removeAttribute(
"uniform-work-group-size");
6845 B.addAttribute(
"uniform-work-group-size");
6856 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.