34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name.consume_back(
".lane")) {
1011 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1012 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1013 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1025 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1030 if (Name.consume_front(
"addqv")) {
1032 if (!
F->getReturnType()->isFPOrFPVectorTy())
1035 auto Args =
F->getFunctionType()->params();
1036 Type *Tys[] = {
F->getReturnType(), Args[1]};
1038 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1042 if (Name.consume_front(
"ld")) {
1044 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1045 if (LdRegex.
match(Name)) {
1051 "Expected 2 arguments for ld* intrinsic.");
1052 Type *PtrTy =
F->getArg(1)->getType();
1055 Intrinsic::aarch64_sve_ld2_sret,
1056 Intrinsic::aarch64_sve_ld3_sret,
1057 Intrinsic::aarch64_sve_ld4_sret,
1060 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1066 if (Name.consume_front(
"tuple.")) {
1068 if (Name.starts_with(
"get")) {
1070 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1072 F->getParent(), Intrinsic::vector_extract, Tys);
1076 if (Name.starts_with(
"set")) {
1078 auto Args =
F->getFunctionType()->params();
1079 Type *Tys[] = {Args[0], Args[2], Args[1]};
1081 F->getParent(), Intrinsic::vector_insert, Tys);
1085 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1086 if (CreateTupleRegex.
match(Name)) {
1088 auto Args =
F->getFunctionType()->params();
1089 Type *Tys[] = {
F->getReturnType(), Args[1]};
1091 F->getParent(), Intrinsic::vector_insert, Tys);
1097 if (Name.starts_with(
"rev.nxv")) {
1100 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1112 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1116 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1118 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1120 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1121 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1122 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1123 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1124 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1125 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1134 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1148 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1149 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1159 if (Name.consume_front(
"mapa.shared.cluster"))
1160 if (
F->getReturnType()->getPointerAddressSpace() ==
1162 return Intrinsic::nvvm_mapa_shared_cluster;
1164 if (Name.consume_front(
"cp.async.bulk.")) {
1167 .
Case(
"global.to.shared.cluster",
1168 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1169 .
Case(
"shared.cta.to.cluster",
1170 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1174 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1183 if (Name.consume_front(
"fma.rn."))
1185 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1186 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1187 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1188 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1191 if (Name.consume_front(
"fmax."))
1193 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1194 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1195 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1196 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1197 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1198 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1199 .
Case(
"ftz.nan.xorsign.abs.bf16",
1200 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1201 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1202 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1203 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1204 .
Case(
"ftz.xorsign.abs.bf16x2",
1205 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1206 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1207 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1208 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1209 .
Case(
"nan.xorsign.abs.bf16x2",
1210 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1211 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1212 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1215 if (Name.consume_front(
"fmin."))
1217 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1218 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1219 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1220 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1221 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1222 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1223 .
Case(
"ftz.nan.xorsign.abs.bf16",
1224 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1225 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1226 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1227 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1228 .
Case(
"ftz.xorsign.abs.bf16x2",
1229 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1230 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1231 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1232 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1233 .
Case(
"nan.xorsign.abs.bf16x2",
1234 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1235 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1236 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1239 if (Name.consume_front(
"neg."))
1241 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1242 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1249 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1250 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1251 Name.consume_front(
"param");
1257 if (Name.starts_with(
"to.fp16")) {
1261 FuncTy->getReturnType());
1264 if (Name.starts_with(
"from.fp16")) {
1268 FuncTy->getReturnType());
1275 bool CanUpgradeDebugIntrinsicsToRecords) {
1276 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1281 if (!Name.consume_front(
"llvm.") || Name.empty())
1287 bool IsArm = Name.consume_front(
"arm.");
1288 if (IsArm || Name.consume_front(
"aarch64.")) {
1294 if (Name.consume_front(
"amdgcn.")) {
1295 if (Name ==
"alignbit") {
1298 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1302 if (Name.consume_front(
"atomic.")) {
1303 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1304 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1313 switch (
F->getIntrinsicID()) {
1317 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1318 if (
F->arg_size() == 7) {
1323 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1324 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1325 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1326 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1327 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1328 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1329 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1330 if (
F->arg_size() == 8) {
1337 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1338 Name.consume_front(
"flat.atomic.")) {
1339 if (Name.starts_with(
"fadd") ||
1341 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1342 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1350 if (Name.starts_with(
"ldexp.")) {
1353 F->getParent(), Intrinsic::ldexp,
1354 {F->getReturnType(), F->getArg(1)->getType()});
1363 if (
F->arg_size() == 1) {
1364 if (Name.consume_front(
"convert.")) {
1378 F->arg_begin()->getType());
1383 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1386 Intrinsic::coro_end);
1393 if (Name.consume_front(
"dbg.")) {
1395 if (CanUpgradeDebugIntrinsicsToRecords) {
1396 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1397 Name ==
"declare" || Name ==
"label") {
1406 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1409 Intrinsic::dbg_value);
1416 if (Name.consume_front(
"experimental.vector.")) {
1422 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1423 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1424 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1425 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1426 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1428 Intrinsic::vector_partial_reduce_add)
1431 const auto *FT =
F->getFunctionType();
1433 if (
ID == Intrinsic::vector_extract ||
1434 ID == Intrinsic::vector_interleave2)
1437 if (
ID != Intrinsic::vector_interleave2)
1439 if (
ID == Intrinsic::vector_insert ||
1440 ID == Intrinsic::vector_partial_reduce_add)
1448 if (Name.consume_front(
"reduce.")) {
1450 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1451 if (R.match(Name, &
Groups))
1453 .
Case(
"add", Intrinsic::vector_reduce_add)
1454 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1455 .
Case(
"and", Intrinsic::vector_reduce_and)
1456 .
Case(
"or", Intrinsic::vector_reduce_or)
1457 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1458 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1459 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1460 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1461 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1462 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1463 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1468 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1473 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1474 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1479 auto Args =
F->getFunctionType()->params();
1481 {Args[V2 ? 1 : 0]});
1487 if (Name.consume_front(
"splice"))
1491 if (Name.consume_front(
"experimental.stepvector.")) {
1495 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1500 if (Name.starts_with(
"flt.rounds")) {
1503 Intrinsic::get_rounding);
1508 if (Name.starts_with(
"invariant.group.barrier")) {
1510 auto Args =
F->getFunctionType()->params();
1511 Type* ObjectPtr[1] = {Args[0]};
1514 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1519 if ((Name.starts_with(
"lifetime.start") ||
1520 Name.starts_with(
"lifetime.end")) &&
1521 F->arg_size() == 2) {
1523 ? Intrinsic::lifetime_start
1524 : Intrinsic::lifetime_end;
1527 F->getArg(0)->getType());
1536 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1537 .StartsWith(
"memmove.", Intrinsic::memmove)
1539 if (
F->arg_size() == 5) {
1543 F->getFunctionType()->params().slice(0, 3);
1549 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1552 const auto *FT =
F->getFunctionType();
1553 Type *ParamTypes[2] = {
1554 FT->getParamType(0),
1558 Intrinsic::memset, ParamTypes);
1564 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1565 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1566 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1567 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1569 if (MaskedID &&
F->arg_size() == 4) {
1571 if (MaskedID == Intrinsic::masked_load ||
1572 MaskedID == Intrinsic::masked_gather) {
1574 F->getParent(), MaskedID,
1575 {F->getReturnType(), F->getArg(0)->getType()});
1579 F->getParent(), MaskedID,
1580 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1586 if (Name.consume_front(
"nvvm.")) {
1588 if (
F->arg_size() == 1) {
1591 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1592 .Case(
"clz.i", Intrinsic::ctlz)
1593 .
Case(
"popc.i", Intrinsic::ctpop)
1597 {F->getReturnType()});
1600 }
else if (
F->arg_size() == 2) {
1603 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1604 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1605 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1606 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1610 {F->getReturnType()});
1616 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1644 bool Expand =
false;
1645 if (Name.consume_front(
"abs."))
1648 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1649 else if (Name.consume_front(
"fabs."))
1651 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1652 else if (Name.consume_front(
"ex2.approx."))
1655 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1656 else if (Name.consume_front(
"atomic.load."))
1665 else if (Name.consume_front(
"bitcast."))
1668 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1669 else if (Name.consume_front(
"rotate."))
1671 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1672 else if (Name.consume_front(
"ptr.gen.to."))
1675 else if (Name.consume_front(
"ptr."))
1678 else if (Name.consume_front(
"ldg.global."))
1680 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1681 Name.starts_with(
"p."));
1684 .
Case(
"barrier0",
true)
1685 .
Case(
"barrier.n",
true)
1686 .
Case(
"barrier.sync.cnt",
true)
1687 .
Case(
"barrier.sync",
true)
1688 .
Case(
"barrier",
true)
1689 .
Case(
"bar.sync",
true)
1690 .
Case(
"barrier0.popc",
true)
1691 .
Case(
"barrier0.and",
true)
1692 .
Case(
"barrier0.or",
true)
1693 .
Case(
"clz.ll",
true)
1694 .
Case(
"popc.ll",
true)
1696 .
Case(
"swap.lo.hi.b64",
true)
1697 .
Case(
"tanh.approx.f32",
true)
1709 if (Name.starts_with(
"objectsize.")) {
1710 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1711 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1714 Intrinsic::objectsize, Tys);
1721 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1724 F->getParent(), Intrinsic::ptr_annotation,
1725 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1731 if (Name.consume_front(
"riscv.")) {
1734 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1735 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1736 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1737 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1740 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1753 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1754 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1763 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1764 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1765 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1766 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1771 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1780 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1782 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1791 if (Name ==
"stackprotectorcheck") {
1798 if (Name ==
"thread.pointer") {
1800 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1806 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1809 F->getParent(), Intrinsic::var_annotation,
1810 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1813 if (Name.consume_front(
"vector.splice")) {
1814 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1822 if (Name.consume_front(
"wasm.")) {
1825 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1826 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1827 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1832 F->getReturnType());
1836 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1838 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1840 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1859 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1868 auto *FT =
F->getFunctionType();
1871 std::string
Name =
F->getName().str();
1874 Name,
F->getParent());
1885 if (Result != std::nullopt) {
1898 bool CanUpgradeDebugIntrinsicsToRecords) {
1918 GV->
getName() ==
"llvm.global_dtors")) ||
1933 unsigned N =
Init->getNumOperands();
1934 std::vector<Constant *> NewCtors(
N);
1935 for (
unsigned i = 0; i !=
N; ++i) {
1938 Ctor->getAggregateElement(1),
1952 unsigned NumElts = ResultTy->getNumElements() * 8;
1956 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1966 for (
unsigned l = 0; l != NumElts; l += 16)
1967 for (
unsigned i = 0; i != 16; ++i) {
1968 unsigned Idx = NumElts + i - Shift;
1970 Idx -= NumElts - 16;
1971 Idxs[l + i] = Idx + l;
1974 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1978 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1986 unsigned NumElts = ResultTy->getNumElements() * 8;
1990 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2000 for (
unsigned l = 0; l != NumElts; l += 16)
2001 for (
unsigned i = 0; i != 16; ++i) {
2002 unsigned Idx = i + Shift;
2004 Idx += NumElts - 16;
2005 Idxs[l + i] = Idx + l;
2008 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2012 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2020 Mask = Builder.CreateBitCast(Mask, MaskTy);
2026 for (
unsigned i = 0; i != NumElts; ++i)
2028 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2039 if (
C->isAllOnesValue())
2044 return Builder.CreateSelect(Mask, Op0, Op1);
2051 if (
C->isAllOnesValue())
2055 Mask->getType()->getIntegerBitWidth());
2056 Mask = Builder.CreateBitCast(Mask, MaskTy);
2057 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2058 return Builder.CreateSelect(Mask, Op0, Op1);
2071 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2072 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2077 ShiftVal &= (NumElts - 1);
2086 if (ShiftVal > 16) {
2094 for (
unsigned l = 0; l < NumElts; l += 16) {
2095 for (
unsigned i = 0; i != 16; ++i) {
2096 unsigned Idx = ShiftVal + i;
2097 if (!IsVALIGN && Idx >= 16)
2098 Idx += NumElts - 16;
2099 Indices[l + i] = Idx + l;
2104 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2110 bool ZeroMask,
bool IndexForm) {
2113 unsigned EltWidth = Ty->getScalarSizeInBits();
2114 bool IsFloat = Ty->isFPOrFPVectorTy();
2116 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2117 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2118 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2119 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2120 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2121 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2122 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2123 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2124 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2125 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2126 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2127 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2128 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2129 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2130 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2131 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2132 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2133 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2134 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2135 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2136 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2137 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2138 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2139 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2140 else if (VecWidth == 128 && EltWidth == 16)
2141 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2142 else if (VecWidth == 256 && EltWidth == 16)
2143 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2144 else if (VecWidth == 512 && EltWidth == 16)
2145 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2146 else if (VecWidth == 128 && EltWidth == 8)
2147 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2148 else if (VecWidth == 256 && EltWidth == 8)
2149 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2150 else if (VecWidth == 512 && EltWidth == 8)
2151 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2162 Value *V = Builder.CreateIntrinsic(IID, Args);
2174 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2185 bool IsRotateRight) {
2195 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2196 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2199 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2200 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2245 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2250 bool IsShiftRight,
bool ZeroMask) {
2264 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2265 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2268 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2269 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2284 const Align Alignment =
2286 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2291 if (
C->isAllOnesValue())
2292 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2297 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2303 const Align Alignment =
2312 if (
C->isAllOnesValue())
2313 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2318 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2324 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2325 {Op0, Builder.getInt1(
false)});
2340 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2341 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2342 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2343 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2344 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2347 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2348 LHS = Builder.CreateAnd(
LHS, Mask);
2349 RHS = Builder.CreateAnd(
RHS, Mask);
2366 if (!
C || !
C->isAllOnesValue())
2367 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2372 for (
unsigned i = 0; i != NumElts; ++i)
2374 for (
unsigned i = NumElts; i != 8; ++i)
2375 Indices[i] = NumElts + i % NumElts;
2376 Vec = Builder.CreateShuffleVector(Vec,
2380 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2384 unsigned CC,
bool Signed) {
2392 }
else if (CC == 7) {
2428 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2429 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2431 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2432 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2441 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2447 Name = Name.substr(12);
2452 if (Name.starts_with(
"max.p")) {
2453 if (VecWidth == 128 && EltWidth == 32)
2454 IID = Intrinsic::x86_sse_max_ps;
2455 else if (VecWidth == 128 && EltWidth == 64)
2456 IID = Intrinsic::x86_sse2_max_pd;
2457 else if (VecWidth == 256 && EltWidth == 32)
2458 IID = Intrinsic::x86_avx_max_ps_256;
2459 else if (VecWidth == 256 && EltWidth == 64)
2460 IID = Intrinsic::x86_avx_max_pd_256;
2463 }
else if (Name.starts_with(
"min.p")) {
2464 if (VecWidth == 128 && EltWidth == 32)
2465 IID = Intrinsic::x86_sse_min_ps;
2466 else if (VecWidth == 128 && EltWidth == 64)
2467 IID = Intrinsic::x86_sse2_min_pd;
2468 else if (VecWidth == 256 && EltWidth == 32)
2469 IID = Intrinsic::x86_avx_min_ps_256;
2470 else if (VecWidth == 256 && EltWidth == 64)
2471 IID = Intrinsic::x86_avx_min_pd_256;
2474 }
else if (Name.starts_with(
"pshuf.b.")) {
2475 if (VecWidth == 128)
2476 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2477 else if (VecWidth == 256)
2478 IID = Intrinsic::x86_avx2_pshuf_b;
2479 else if (VecWidth == 512)
2480 IID = Intrinsic::x86_avx512_pshuf_b_512;
2483 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2484 if (VecWidth == 128)
2485 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2486 else if (VecWidth == 256)
2487 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2488 else if (VecWidth == 512)
2489 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2492 }
else if (Name.starts_with(
"pmulh.w.")) {
2493 if (VecWidth == 128)
2494 IID = Intrinsic::x86_sse2_pmulh_w;
2495 else if (VecWidth == 256)
2496 IID = Intrinsic::x86_avx2_pmulh_w;
2497 else if (VecWidth == 512)
2498 IID = Intrinsic::x86_avx512_pmulh_w_512;
2501 }
else if (Name.starts_with(
"pmulhu.w.")) {
2502 if (VecWidth == 128)
2503 IID = Intrinsic::x86_sse2_pmulhu_w;
2504 else if (VecWidth == 256)
2505 IID = Intrinsic::x86_avx2_pmulhu_w;
2506 else if (VecWidth == 512)
2507 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2510 }
else if (Name.starts_with(
"pmaddw.d.")) {
2511 if (VecWidth == 128)
2512 IID = Intrinsic::x86_sse2_pmadd_wd;
2513 else if (VecWidth == 256)
2514 IID = Intrinsic::x86_avx2_pmadd_wd;
2515 else if (VecWidth == 512)
2516 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2519 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2520 if (VecWidth == 128)
2521 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2522 else if (VecWidth == 256)
2523 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2524 else if (VecWidth == 512)
2525 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2528 }
else if (Name.starts_with(
"packsswb.")) {
2529 if (VecWidth == 128)
2530 IID = Intrinsic::x86_sse2_packsswb_128;
2531 else if (VecWidth == 256)
2532 IID = Intrinsic::x86_avx2_packsswb;
2533 else if (VecWidth == 512)
2534 IID = Intrinsic::x86_avx512_packsswb_512;
2537 }
else if (Name.starts_with(
"packssdw.")) {
2538 if (VecWidth == 128)
2539 IID = Intrinsic::x86_sse2_packssdw_128;
2540 else if (VecWidth == 256)
2541 IID = Intrinsic::x86_avx2_packssdw;
2542 else if (VecWidth == 512)
2543 IID = Intrinsic::x86_avx512_packssdw_512;
2546 }
else if (Name.starts_with(
"packuswb.")) {
2547 if (VecWidth == 128)
2548 IID = Intrinsic::x86_sse2_packuswb_128;
2549 else if (VecWidth == 256)
2550 IID = Intrinsic::x86_avx2_packuswb;
2551 else if (VecWidth == 512)
2552 IID = Intrinsic::x86_avx512_packuswb_512;
2555 }
else if (Name.starts_with(
"packusdw.")) {
2556 if (VecWidth == 128)
2557 IID = Intrinsic::x86_sse41_packusdw;
2558 else if (VecWidth == 256)
2559 IID = Intrinsic::x86_avx2_packusdw;
2560 else if (VecWidth == 512)
2561 IID = Intrinsic::x86_avx512_packusdw_512;
2564 }
else if (Name.starts_with(
"vpermilvar.")) {
2565 if (VecWidth == 128 && EltWidth == 32)
2566 IID = Intrinsic::x86_avx_vpermilvar_ps;
2567 else if (VecWidth == 128 && EltWidth == 64)
2568 IID = Intrinsic::x86_avx_vpermilvar_pd;
2569 else if (VecWidth == 256 && EltWidth == 32)
2570 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2571 else if (VecWidth == 256 && EltWidth == 64)
2572 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2573 else if (VecWidth == 512 && EltWidth == 32)
2574 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2575 else if (VecWidth == 512 && EltWidth == 64)
2576 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2579 }
else if (Name ==
"cvtpd2dq.256") {
2580 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2581 }
else if (Name ==
"cvtpd2ps.256") {
2582 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2583 }
else if (Name ==
"cvttpd2dq.256") {
2584 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2585 }
else if (Name ==
"cvttps2dq.128") {
2586 IID = Intrinsic::x86_sse2_cvttps2dq;
2587 }
else if (Name ==
"cvttps2dq.256") {
2588 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2589 }
else if (Name.starts_with(
"permvar.")) {
2591 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2592 IID = Intrinsic::x86_avx2_permps;
2593 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2594 IID = Intrinsic::x86_avx2_permd;
2595 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2596 IID = Intrinsic::x86_avx512_permvar_df_256;
2597 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2598 IID = Intrinsic::x86_avx512_permvar_di_256;
2599 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2600 IID = Intrinsic::x86_avx512_permvar_sf_512;
2601 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2602 IID = Intrinsic::x86_avx512_permvar_si_512;
2603 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2604 IID = Intrinsic::x86_avx512_permvar_df_512;
2605 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2606 IID = Intrinsic::x86_avx512_permvar_di_512;
2607 else if (VecWidth == 128 && EltWidth == 16)
2608 IID = Intrinsic::x86_avx512_permvar_hi_128;
2609 else if (VecWidth == 256 && EltWidth == 16)
2610 IID = Intrinsic::x86_avx512_permvar_hi_256;
2611 else if (VecWidth == 512 && EltWidth == 16)
2612 IID = Intrinsic::x86_avx512_permvar_hi_512;
2613 else if (VecWidth == 128 && EltWidth == 8)
2614 IID = Intrinsic::x86_avx512_permvar_qi_128;
2615 else if (VecWidth == 256 && EltWidth == 8)
2616 IID = Intrinsic::x86_avx512_permvar_qi_256;
2617 else if (VecWidth == 512 && EltWidth == 8)
2618 IID = Intrinsic::x86_avx512_permvar_qi_512;
2621 }
else if (Name.starts_with(
"dbpsadbw.")) {
2622 if (VecWidth == 128)
2623 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2624 else if (VecWidth == 256)
2625 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2626 else if (VecWidth == 512)
2627 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2630 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2631 if (VecWidth == 128)
2632 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2633 else if (VecWidth == 256)
2634 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2635 else if (VecWidth == 512)
2636 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2639 }
else if (Name.starts_with(
"conflict.")) {
2640 if (Name[9] ==
'd' && VecWidth == 128)
2641 IID = Intrinsic::x86_avx512_conflict_d_128;
2642 else if (Name[9] ==
'd' && VecWidth == 256)
2643 IID = Intrinsic::x86_avx512_conflict_d_256;
2644 else if (Name[9] ==
'd' && VecWidth == 512)
2645 IID = Intrinsic::x86_avx512_conflict_d_512;
2646 else if (Name[9] ==
'q' && VecWidth == 128)
2647 IID = Intrinsic::x86_avx512_conflict_q_128;
2648 else if (Name[9] ==
'q' && VecWidth == 256)
2649 IID = Intrinsic::x86_avx512_conflict_q_256;
2650 else if (Name[9] ==
'q' && VecWidth == 512)
2651 IID = Intrinsic::x86_avx512_conflict_q_512;
2654 }
else if (Name.starts_with(
"pavg.")) {
2655 if (Name[5] ==
'b' && VecWidth == 128)
2656 IID = Intrinsic::x86_sse2_pavg_b;
2657 else if (Name[5] ==
'b' && VecWidth == 256)
2658 IID = Intrinsic::x86_avx2_pavg_b;
2659 else if (Name[5] ==
'b' && VecWidth == 512)
2660 IID = Intrinsic::x86_avx512_pavg_b_512;
2661 else if (Name[5] ==
'w' && VecWidth == 128)
2662 IID = Intrinsic::x86_sse2_pavg_w;
2663 else if (Name[5] ==
'w' && VecWidth == 256)
2664 IID = Intrinsic::x86_avx2_pavg_w;
2665 else if (Name[5] ==
'w' && VecWidth == 512)
2666 IID = Intrinsic::x86_avx512_pavg_w_512;
2675 Rep = Builder.CreateIntrinsic(IID, Args);
2686 if (AsmStr->find(
"mov\tfp") == 0 &&
2687 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2688 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2689 AsmStr->replace(Pos, 1,
";");
2695 Value *Rep =
nullptr;
2697 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2699 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2700 Value *Cmp = Builder.CreateICmpSGE(
2702 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2703 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2704 Type *Ty = (Name ==
"abs.bf16")
2708 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2709 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2710 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2711 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2712 : Intrinsic::nvvm_fabs;
2713 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2714 }
else if (Name.consume_front(
"ex2.approx.")) {
2716 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2717 : Intrinsic::nvvm_ex2_approx;
2718 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2719 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2720 Name.starts_with(
"atomic.load.add.f64.p")) {
2725 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2726 Name.starts_with(
"atomic.load.dec.32.p")) {
2731 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2733 }
else if (Name ==
"clz.ll") {
2736 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2737 {Arg, Builder.getFalse()},
2739 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2740 }
else if (Name ==
"popc.ll") {
2744 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2745 Arg,
nullptr,
"ctpop");
2746 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2747 }
else if (Name ==
"h2f") {
2749 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2750 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2751 }
else if (Name.consume_front(
"bitcast.") &&
2752 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2755 }
else if (Name ==
"rotate.b32") {
2758 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2759 {Arg, Arg, ShiftAmt});
2760 }
else if (Name ==
"rotate.b64") {
2764 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2765 {Arg, Arg, ZExtShiftAmt});
2766 }
else if (Name ==
"rotate.right.b64") {
2770 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2771 {Arg, Arg, ZExtShiftAmt});
2772 }
else if (Name ==
"swap.lo.hi.b64") {
2775 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2776 {Arg, Arg, Builder.getInt64(32)});
2777 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2780 Name.starts_with(
".to.gen"))) {
2782 }
else if (Name.consume_front(
"ldg.global")) {
2786 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2789 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2791 }
else if (Name ==
"tanh.approx.f32") {
2795 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2797 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2799 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2800 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2802 }
else if (Name ==
"barrier") {
2803 Rep = Builder.CreateIntrinsic(
2804 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2806 }
else if (Name ==
"barrier.sync") {
2807 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2809 }
else if (Name ==
"barrier.sync.cnt") {
2810 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2812 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2813 Name ==
"barrier0.or") {
2815 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2819 .
Case(
"barrier0.popc",
2820 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2821 .
Case(
"barrier0.and",
2822 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2823 .
Case(
"barrier0.or",
2824 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2825 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2826 Rep = Builder.CreateZExt(Bar, CI->
getType());
2830 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2840 ? Builder.CreateBitCast(Arg, NewType)
2843 Rep = Builder.CreateCall(NewFn, Args);
2844 if (
F->getReturnType()->isIntegerTy())
2845 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2855 Value *Rep =
nullptr;
2857 if (Name.starts_with(
"sse4a.movnt.")) {
2869 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2872 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2873 }
else if (Name.starts_with(
"avx.movnt.") ||
2874 Name.starts_with(
"avx512.storent.")) {
2886 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2887 }
else if (Name ==
"sse2.storel.dq") {
2892 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2893 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2894 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2895 }
else if (Name.starts_with(
"sse.storeu.") ||
2896 Name.starts_with(
"sse2.storeu.") ||
2897 Name.starts_with(
"avx.storeu.")) {
2900 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2901 }
else if (Name ==
"avx512.mask.store.ss") {
2905 }
else if (Name.starts_with(
"avx512.mask.store")) {
2907 bool Aligned = Name[17] !=
'u';
2910 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2913 bool CmpEq = Name[9] ==
'e';
2916 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2917 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2924 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2925 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2927 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2928 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2929 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2930 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2931 Name.starts_with(
"sse2.sqrt.p") ||
2932 Name.starts_with(
"sse.sqrt.p")) {
2933 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2934 {CI->getArgOperand(0)});
2935 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2939 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2940 : Intrinsic::x86_avx512_sqrt_pd_512;
2943 Rep = Builder.CreateIntrinsic(IID, Args);
2945 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2946 {CI->getArgOperand(0)});
2950 }
else if (Name.starts_with(
"avx512.ptestm") ||
2951 Name.starts_with(
"avx512.ptestnm")) {
2955 Rep = Builder.CreateAnd(Op0, Op1);
2961 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2963 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2966 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2969 }
else if (Name.starts_with(
"avx512.kunpck")) {
2974 for (
unsigned i = 0; i != NumElts; ++i)
2983 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2984 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2985 }
else if (Name ==
"avx512.kand.w") {
2988 Rep = Builder.CreateAnd(
LHS,
RHS);
2989 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2990 }
else if (Name ==
"avx512.kandn.w") {
2993 LHS = Builder.CreateNot(
LHS);
2994 Rep = Builder.CreateAnd(
LHS,
RHS);
2995 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2996 }
else if (Name ==
"avx512.kor.w") {
2999 Rep = Builder.CreateOr(
LHS,
RHS);
3000 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3001 }
else if (Name ==
"avx512.kxor.w") {
3004 Rep = Builder.CreateXor(
LHS,
RHS);
3005 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3006 }
else if (Name ==
"avx512.kxnor.w") {
3009 LHS = Builder.CreateNot(
LHS);
3010 Rep = Builder.CreateXor(
LHS,
RHS);
3011 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3012 }
else if (Name ==
"avx512.knot.w") {
3014 Rep = Builder.CreateNot(Rep);
3015 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3016 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3019 Rep = Builder.CreateOr(
LHS,
RHS);
3020 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3022 if (Name[14] ==
'c')
3026 Rep = Builder.CreateICmpEQ(Rep,
C);
3027 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3028 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3029 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3030 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3031 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3034 ConstantInt::get(I32Ty, 0));
3036 ConstantInt::get(I32Ty, 0));
3038 if (Name.contains(
".add."))
3039 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3040 else if (Name.contains(
".sub."))
3041 EltOp = Builder.CreateFSub(Elt0, Elt1);
3042 else if (Name.contains(
".mul."))
3043 EltOp = Builder.CreateFMul(Elt0, Elt1);
3045 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3046 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3047 ConstantInt::get(I32Ty, 0));
3048 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3050 bool CmpEq = Name[16] ==
'e';
3052 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3061 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3064 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3067 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3074 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3079 if (VecWidth == 128 && EltWidth == 32)
3080 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3081 else if (VecWidth == 256 && EltWidth == 32)
3082 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3083 else if (VecWidth == 512 && EltWidth == 32)
3084 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3085 else if (VecWidth == 128 && EltWidth == 64)
3086 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3087 else if (VecWidth == 256 && EltWidth == 64)
3088 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3089 else if (VecWidth == 512 && EltWidth == 64)
3090 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3097 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3099 Type *OpTy = Args[0]->getType();
3103 if (VecWidth == 128 && EltWidth == 32)
3104 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3105 else if (VecWidth == 256 && EltWidth == 32)
3106 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3107 else if (VecWidth == 512 && EltWidth == 32)
3108 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3109 else if (VecWidth == 128 && EltWidth == 64)
3110 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3111 else if (VecWidth == 256 && EltWidth == 64)
3112 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3113 else if (VecWidth == 512 && EltWidth == 64)
3114 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3119 if (VecWidth == 512)
3121 Args.push_back(Mask);
3123 Rep = Builder.CreateIntrinsic(IID, Args);
3124 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3128 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3131 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3132 Name.starts_with(
"avx512.cvtw2mask.") ||
3133 Name.starts_with(
"avx512.cvtd2mask.") ||
3134 Name.starts_with(
"avx512.cvtq2mask.")) {
3139 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3140 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3141 Name.starts_with(
"avx512.mask.pabs")) {
3143 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3144 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3145 Name.starts_with(
"avx512.mask.pmaxs")) {
3147 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3148 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3149 Name.starts_with(
"avx512.mask.pmaxu")) {
3151 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3152 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3153 Name.starts_with(
"avx512.mask.pmins")) {
3155 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3156 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3157 Name.starts_with(
"avx512.mask.pminu")) {
3159 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3160 Name ==
"avx512.pmulu.dq.512" ||
3161 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3163 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3164 Name ==
"avx512.pmul.dq.512" ||
3165 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3167 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3168 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3173 }
else if (Name ==
"avx512.cvtusi2sd") {
3178 }
else if (Name ==
"sse2.cvtss2sd") {
3180 Rep = Builder.CreateFPExt(
3183 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3184 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3185 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3186 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3187 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3188 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3189 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3190 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3191 Name ==
"avx512.mask.cvtqq2ps.256" ||
3192 Name ==
"avx512.mask.cvtqq2ps.512" ||
3193 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3194 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3195 Name ==
"avx.cvt.ps2.pd.256" ||
3196 Name ==
"avx512.mask.cvtps2pd.128" ||
3197 Name ==
"avx512.mask.cvtps2pd.256") {
3202 unsigned NumDstElts = DstTy->getNumElements();
3204 assert(NumDstElts == 2 &&
"Unexpected vector size");
3205 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3208 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3209 bool IsUnsigned = Name.contains(
"cvtu");
3211 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3215 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3216 : Intrinsic::x86_avx512_sitofp_round;
3217 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3220 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3221 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3227 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3228 Name.starts_with(
"vcvtph2ps.")) {
3232 unsigned NumDstElts = DstTy->getNumElements();
3233 if (NumDstElts != SrcTy->getNumElements()) {
3234 assert(NumDstElts == 4 &&
"Unexpected vector size");
3235 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3237 Rep = Builder.CreateBitCast(
3239 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3243 }
else if (Name.starts_with(
"avx512.mask.load")) {
3245 bool Aligned = Name[16] !=
'u';
3248 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3251 ResultTy->getNumElements());
3253 Rep = Builder.CreateIntrinsic(
3254 Intrinsic::masked_expandload, ResultTy,
3256 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3262 Rep = Builder.CreateIntrinsic(
3263 Intrinsic::masked_compressstore, ResultTy,
3265 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3266 Name.starts_with(
"avx512.mask.expand.")) {
3270 ResultTy->getNumElements());
3272 bool IsCompress = Name[12] ==
'c';
3273 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3274 : Intrinsic::x86_avx512_mask_expand;
3275 Rep = Builder.CreateIntrinsic(
3277 }
else if (Name.starts_with(
"xop.vpcom")) {
3279 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3280 Name.ends_with(
"uq"))
3282 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3283 Name.ends_with(
"d") || Name.ends_with(
"q"))
3292 Name = Name.substr(9);
3293 if (Name.starts_with(
"lt"))
3295 else if (Name.starts_with(
"le"))
3297 else if (Name.starts_with(
"gt"))
3299 else if (Name.starts_with(
"ge"))
3301 else if (Name.starts_with(
"eq"))
3303 else if (Name.starts_with(
"ne"))
3305 else if (Name.starts_with(
"false"))
3307 else if (Name.starts_with(
"true"))
3314 }
else if (Name.starts_with(
"xop.vpcmov")) {
3316 Value *NotSel = Builder.CreateNot(Sel);
3319 Rep = Builder.CreateOr(Sel0, Sel1);
3320 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3321 Name.starts_with(
"avx512.mask.prol")) {
3323 }
else if (Name.starts_with(
"avx512.pror") ||
3324 Name.starts_with(
"avx512.mask.pror")) {
3326 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3327 Name.starts_with(
"avx512.mask.vpshld") ||
3328 Name.starts_with(
"avx512.maskz.vpshld")) {
3329 bool ZeroMask = Name[11] ==
'z';
3331 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3332 Name.starts_with(
"avx512.mask.vpshrd") ||
3333 Name.starts_with(
"avx512.maskz.vpshrd")) {
3334 bool ZeroMask = Name[11] ==
'z';
3336 }
else if (Name ==
"sse42.crc32.64.8") {
3339 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3341 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3342 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3343 Name.starts_with(
"avx512.vbroadcast.s")) {
3346 Type *EltTy = VecTy->getElementType();
3347 unsigned EltNum = VecTy->getNumElements();
3351 for (
unsigned I = 0;
I < EltNum; ++
I)
3352 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3353 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3354 Name.starts_with(
"sse41.pmovzx") ||
3355 Name.starts_with(
"avx2.pmovsx") ||
3356 Name.starts_with(
"avx2.pmovzx") ||
3357 Name.starts_with(
"avx512.mask.pmovsx") ||
3358 Name.starts_with(
"avx512.mask.pmovzx")) {
3360 unsigned NumDstElts = DstTy->getNumElements();
3364 for (
unsigned i = 0; i != NumDstElts; ++i)
3369 bool DoSext = Name.contains(
"pmovsx");
3371 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3376 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3377 Name ==
"avx512.mask.pmov.qd.512" ||
3378 Name ==
"avx512.mask.pmov.wb.256" ||
3379 Name ==
"avx512.mask.pmov.wb.512") {
3384 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3385 Name ==
"avx2.vbroadcasti128") {
3391 if (NumSrcElts == 2)
3392 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3394 Rep = Builder.CreateShuffleVector(Load,
3396 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3397 Name.starts_with(
"avx512.mask.shuf.f")) {
3402 unsigned ControlBitsMask = NumLanes - 1;
3403 unsigned NumControlBits = NumLanes / 2;
3406 for (
unsigned l = 0; l != NumLanes; ++l) {
3407 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3409 if (l >= NumLanes / 2)
3410 LaneMask += NumLanes;
3411 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3412 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3418 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3419 Name.starts_with(
"avx512.mask.broadcasti")) {
3422 unsigned NumDstElts =
3426 for (
unsigned i = 0; i != NumDstElts; ++i)
3427 ShuffleMask[i] = i % NumSrcElts;
3433 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3434 Name.starts_with(
"avx2.vbroadcast") ||
3435 Name.starts_with(
"avx512.pbroadcast") ||
3436 Name.starts_with(
"avx512.mask.broadcast.s")) {
3443 Rep = Builder.CreateShuffleVector(
Op, M);
3448 }
else if (Name.starts_with(
"sse2.padds.") ||
3449 Name.starts_with(
"avx2.padds.") ||
3450 Name.starts_with(
"avx512.padds.") ||
3451 Name.starts_with(
"avx512.mask.padds.")) {
3453 }
else if (Name.starts_with(
"sse2.psubs.") ||
3454 Name.starts_with(
"avx2.psubs.") ||
3455 Name.starts_with(
"avx512.psubs.") ||
3456 Name.starts_with(
"avx512.mask.psubs.")) {
3458 }
else if (Name.starts_with(
"sse2.paddus.") ||
3459 Name.starts_with(
"avx2.paddus.") ||
3460 Name.starts_with(
"avx512.mask.paddus.")) {
3462 }
else if (Name.starts_with(
"sse2.psubus.") ||
3463 Name.starts_with(
"avx2.psubus.") ||
3464 Name.starts_with(
"avx512.mask.psubus.")) {
3466 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3471 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3475 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3480 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3485 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3486 Name ==
"avx512.psll.dq.512") {
3490 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3491 Name ==
"avx512.psrl.dq.512") {
3495 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3496 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3497 Name.starts_with(
"avx2.pblendd.")) {
3502 unsigned NumElts = VecTy->getNumElements();
3505 for (
unsigned i = 0; i != NumElts; ++i)
3506 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3508 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3509 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3510 Name ==
"avx2.vinserti128" ||
3511 Name.starts_with(
"avx512.mask.insert")) {
3515 unsigned DstNumElts =
3517 unsigned SrcNumElts =
3519 unsigned Scale = DstNumElts / SrcNumElts;
3526 for (
unsigned i = 0; i != SrcNumElts; ++i)
3528 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3529 Idxs[i] = SrcNumElts;
3530 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3544 for (
unsigned i = 0; i != DstNumElts; ++i)
3547 for (
unsigned i = 0; i != SrcNumElts; ++i)
3548 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3549 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3555 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3556 Name ==
"avx2.vextracti128" ||
3557 Name.starts_with(
"avx512.mask.vextract")) {
3560 unsigned DstNumElts =
3562 unsigned SrcNumElts =
3564 unsigned Scale = SrcNumElts / DstNumElts;
3571 for (
unsigned i = 0; i != DstNumElts; ++i) {
3572 Idxs[i] = i + (Imm * DstNumElts);
3574 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3580 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3581 Name.starts_with(
"avx512.mask.perm.di.")) {
3585 unsigned NumElts = VecTy->getNumElements();
3588 for (
unsigned i = 0; i != NumElts; ++i)
3589 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3591 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3596 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3608 unsigned HalfSize = NumElts / 2;
3620 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3621 for (
unsigned i = 0; i < HalfSize; ++i)
3622 ShuffleMask[i] = StartIndex + i;
3625 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3626 for (
unsigned i = 0; i < HalfSize; ++i)
3627 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3629 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3631 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3632 Name.starts_with(
"avx512.mask.vpermil.p") ||
3633 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3637 unsigned NumElts = VecTy->getNumElements();
3639 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3640 unsigned IdxMask = ((1 << IdxSize) - 1);
3646 for (
unsigned i = 0; i != NumElts; ++i)
3647 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3649 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3654 }
else if (Name ==
"sse2.pshufl.w" ||
3655 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3660 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3664 for (
unsigned l = 0; l != NumElts; l += 8) {
3665 for (
unsigned i = 0; i != 4; ++i)
3666 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3667 for (
unsigned i = 4; i != 8; ++i)
3668 Idxs[i + l] = i + l;
3671 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3676 }
else if (Name ==
"sse2.pshufh.w" ||
3677 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3682 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3686 for (
unsigned l = 0; l != NumElts; l += 8) {
3687 for (
unsigned i = 0; i != 4; ++i)
3688 Idxs[i + l] = i + l;
3689 for (
unsigned i = 0; i != 4; ++i)
3690 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3693 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3698 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3705 unsigned HalfLaneElts = NumLaneElts / 2;
3708 for (
unsigned i = 0; i != NumElts; ++i) {
3710 Idxs[i] = i - (i % NumLaneElts);
3712 if ((i % NumLaneElts) >= HalfLaneElts)
3716 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3719 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3723 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3724 Name.starts_with(
"avx512.mask.movshdup") ||
3725 Name.starts_with(
"avx512.mask.movsldup")) {
3731 if (Name.starts_with(
"avx512.mask.movshdup."))
3735 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3736 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3737 Idxs[i + l + 0] = i + l +
Offset;
3738 Idxs[i + l + 1] = i + l +
Offset;
3741 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3745 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3746 Name.starts_with(
"avx512.mask.unpckl.")) {
3753 for (
int l = 0; l != NumElts; l += NumLaneElts)
3754 for (
int i = 0; i != NumLaneElts; ++i)
3755 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3757 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3761 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3762 Name.starts_with(
"avx512.mask.unpckh.")) {
3769 for (
int l = 0; l != NumElts; l += NumLaneElts)
3770 for (
int i = 0; i != NumLaneElts; ++i)
3771 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3773 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3777 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3778 Name.starts_with(
"avx512.mask.pand.")) {
3781 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3783 Rep = Builder.CreateBitCast(Rep, FTy);
3786 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3787 Name.starts_with(
"avx512.mask.pandn.")) {
3790 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3791 Rep = Builder.CreateAnd(Rep,
3793 Rep = Builder.CreateBitCast(Rep, FTy);
3796 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3797 Name.starts_with(
"avx512.mask.por.")) {
3800 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3802 Rep = Builder.CreateBitCast(Rep, FTy);
3805 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3806 Name.starts_with(
"avx512.mask.pxor.")) {
3809 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3811 Rep = Builder.CreateBitCast(Rep, FTy);
3814 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3818 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3822 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3826 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3827 if (Name.ends_with(
".512")) {
3829 if (Name[17] ==
's')
3830 IID = Intrinsic::x86_avx512_add_ps_512;
3832 IID = Intrinsic::x86_avx512_add_pd_512;
3834 Rep = Builder.CreateIntrinsic(
3842 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3843 if (Name.ends_with(
".512")) {
3845 if (Name[17] ==
's')
3846 IID = Intrinsic::x86_avx512_div_ps_512;
3848 IID = Intrinsic::x86_avx512_div_pd_512;
3850 Rep = Builder.CreateIntrinsic(
3858 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3859 if (Name.ends_with(
".512")) {
3861 if (Name[17] ==
's')
3862 IID = Intrinsic::x86_avx512_mul_ps_512;
3864 IID = Intrinsic::x86_avx512_mul_pd_512;
3866 Rep = Builder.CreateIntrinsic(
3874 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3875 if (Name.ends_with(
".512")) {
3877 if (Name[17] ==
's')
3878 IID = Intrinsic::x86_avx512_sub_ps_512;
3880 IID = Intrinsic::x86_avx512_sub_pd_512;
3882 Rep = Builder.CreateIntrinsic(
3890 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3891 Name.starts_with(
"avx512.mask.min.p")) &&
3892 Name.drop_front(18) ==
".512") {
3893 bool IsDouble = Name[17] ==
'd';
3894 bool IsMin = Name[13] ==
'i';
3896 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3897 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3900 Rep = Builder.CreateIntrinsic(
3905 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3907 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3908 {CI->getArgOperand(0), Builder.getInt1(false)});
3911 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3912 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3913 bool IsVariable = Name[16] ==
'v';
3914 char Size = Name[16] ==
'.' ? Name[17]
3915 : Name[17] ==
'.' ? Name[18]
3916 : Name[18] ==
'.' ? Name[19]
3920 if (IsVariable && Name[17] !=
'.') {
3921 if (
Size ==
'd' && Name[17] ==
'2')
3922 IID = Intrinsic::x86_avx2_psllv_q;
3923 else if (
Size ==
'd' && Name[17] ==
'4')
3924 IID = Intrinsic::x86_avx2_psllv_q_256;
3925 else if (
Size ==
's' && Name[17] ==
'4')
3926 IID = Intrinsic::x86_avx2_psllv_d;
3927 else if (
Size ==
's' && Name[17] ==
'8')
3928 IID = Intrinsic::x86_avx2_psllv_d_256;
3929 else if (
Size ==
'h' && Name[17] ==
'8')
3930 IID = Intrinsic::x86_avx512_psllv_w_128;
3931 else if (
Size ==
'h' && Name[17] ==
'1')
3932 IID = Intrinsic::x86_avx512_psllv_w_256;
3933 else if (Name[17] ==
'3' && Name[18] ==
'2')
3934 IID = Intrinsic::x86_avx512_psllv_w_512;
3937 }
else if (Name.ends_with(
".128")) {
3939 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3940 : Intrinsic::x86_sse2_psll_d;
3941 else if (
Size ==
'q')
3942 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3943 : Intrinsic::x86_sse2_psll_q;
3944 else if (
Size ==
'w')
3945 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3946 : Intrinsic::x86_sse2_psll_w;
3949 }
else if (Name.ends_with(
".256")) {
3951 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3952 : Intrinsic::x86_avx2_psll_d;
3953 else if (
Size ==
'q')
3954 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3955 : Intrinsic::x86_avx2_psll_q;
3956 else if (
Size ==
'w')
3957 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3958 : Intrinsic::x86_avx2_psll_w;
3963 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3964 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3965 : Intrinsic::x86_avx512_psll_d_512;
3966 else if (
Size ==
'q')
3967 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3968 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3969 : Intrinsic::x86_avx512_psll_q_512;
3970 else if (
Size ==
'w')
3971 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3972 : Intrinsic::x86_avx512_psll_w_512;
3978 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3979 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3980 bool IsVariable = Name[16] ==
'v';
3981 char Size = Name[16] ==
'.' ? Name[17]
3982 : Name[17] ==
'.' ? Name[18]
3983 : Name[18] ==
'.' ? Name[19]
3987 if (IsVariable && Name[17] !=
'.') {
3988 if (
Size ==
'd' && Name[17] ==
'2')
3989 IID = Intrinsic::x86_avx2_psrlv_q;
3990 else if (
Size ==
'd' && Name[17] ==
'4')
3991 IID = Intrinsic::x86_avx2_psrlv_q_256;
3992 else if (
Size ==
's' && Name[17] ==
'4')
3993 IID = Intrinsic::x86_avx2_psrlv_d;
3994 else if (
Size ==
's' && Name[17] ==
'8')
3995 IID = Intrinsic::x86_avx2_psrlv_d_256;
3996 else if (
Size ==
'h' && Name[17] ==
'8')
3997 IID = Intrinsic::x86_avx512_psrlv_w_128;
3998 else if (
Size ==
'h' && Name[17] ==
'1')
3999 IID = Intrinsic::x86_avx512_psrlv_w_256;
4000 else if (Name[17] ==
'3' && Name[18] ==
'2')
4001 IID = Intrinsic::x86_avx512_psrlv_w_512;
4004 }
else if (Name.ends_with(
".128")) {
4006 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4007 : Intrinsic::x86_sse2_psrl_d;
4008 else if (
Size ==
'q')
4009 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4010 : Intrinsic::x86_sse2_psrl_q;
4011 else if (
Size ==
'w')
4012 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4013 : Intrinsic::x86_sse2_psrl_w;
4016 }
else if (Name.ends_with(
".256")) {
4018 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4019 : Intrinsic::x86_avx2_psrl_d;
4020 else if (
Size ==
'q')
4021 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4022 : Intrinsic::x86_avx2_psrl_q;
4023 else if (
Size ==
'w')
4024 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4025 : Intrinsic::x86_avx2_psrl_w;
4030 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4031 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4032 : Intrinsic::x86_avx512_psrl_d_512;
4033 else if (
Size ==
'q')
4034 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4035 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4036 : Intrinsic::x86_avx512_psrl_q_512;
4037 else if (
Size ==
'w')
4038 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4039 : Intrinsic::x86_avx512_psrl_w_512;
4045 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4046 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4047 bool IsVariable = Name[16] ==
'v';
4048 char Size = Name[16] ==
'.' ? Name[17]
4049 : Name[17] ==
'.' ? Name[18]
4050 : Name[18] ==
'.' ? Name[19]
4054 if (IsVariable && Name[17] !=
'.') {
4055 if (
Size ==
's' && Name[17] ==
'4')
4056 IID = Intrinsic::x86_avx2_psrav_d;
4057 else if (
Size ==
's' && Name[17] ==
'8')
4058 IID = Intrinsic::x86_avx2_psrav_d_256;
4059 else if (
Size ==
'h' && Name[17] ==
'8')
4060 IID = Intrinsic::x86_avx512_psrav_w_128;
4061 else if (
Size ==
'h' && Name[17] ==
'1')
4062 IID = Intrinsic::x86_avx512_psrav_w_256;
4063 else if (Name[17] ==
'3' && Name[18] ==
'2')
4064 IID = Intrinsic::x86_avx512_psrav_w_512;
4067 }
else if (Name.ends_with(
".128")) {
4069 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4070 : Intrinsic::x86_sse2_psra_d;
4071 else if (
Size ==
'q')
4072 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4073 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4074 : Intrinsic::x86_avx512_psra_q_128;
4075 else if (
Size ==
'w')
4076 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4077 : Intrinsic::x86_sse2_psra_w;
4080 }
else if (Name.ends_with(
".256")) {
4082 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4083 : Intrinsic::x86_avx2_psra_d;
4084 else if (
Size ==
'q')
4085 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4086 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4087 : Intrinsic::x86_avx512_psra_q_256;
4088 else if (
Size ==
'w')
4089 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4090 : Intrinsic::x86_avx2_psra_w;
4095 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4096 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4097 : Intrinsic::x86_avx512_psra_d_512;
4098 else if (
Size ==
'q')
4099 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4100 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4101 : Intrinsic::x86_avx512_psra_q_512;
4102 else if (
Size ==
'w')
4103 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4104 : Intrinsic::x86_avx512_psra_w_512;
4110 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4112 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4114 }
else if (Name.ends_with(
".movntdqa")) {
4118 LoadInst *LI = Builder.CreateAlignedLoad(
4123 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4124 Name.starts_with(
"fma.vfmsub.") ||
4125 Name.starts_with(
"fma.vfnmadd.") ||
4126 Name.starts_with(
"fma.vfnmsub.")) {
4127 bool NegMul = Name[6] ==
'n';
4128 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4129 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4140 if (NegMul && !IsScalar)
4141 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4142 if (NegMul && IsScalar)
4143 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4145 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4147 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4151 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4159 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4163 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4164 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4165 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4166 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4167 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4168 bool IsMask3 = Name[11] ==
'3';
4169 bool IsMaskZ = Name[11] ==
'z';
4171 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4172 bool NegMul = Name[2] ==
'n';
4173 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4179 if (NegMul && (IsMask3 || IsMaskZ))
4180 A = Builder.CreateFNeg(
A);
4181 if (NegMul && !(IsMask3 || IsMaskZ))
4182 B = Builder.CreateFNeg(
B);
4184 C = Builder.CreateFNeg(
C);
4186 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4187 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4188 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4195 if (Name.back() ==
'd')
4196 IID = Intrinsic::x86_avx512_vfmadd_f64;
4198 IID = Intrinsic::x86_avx512_vfmadd_f32;
4199 Rep = Builder.CreateIntrinsic(IID,
Ops);
4201 Rep = Builder.CreateFMA(
A,
B,
C);
4210 if (NegAcc && IsMask3)
4215 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4217 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4218 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4219 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4220 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4221 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4222 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4223 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4224 bool IsMask3 = Name[11] ==
'3';
4225 bool IsMaskZ = Name[11] ==
'z';
4227 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4228 bool NegMul = Name[2] ==
'n';
4229 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4235 if (NegMul && (IsMask3 || IsMaskZ))
4236 A = Builder.CreateFNeg(
A);
4237 if (NegMul && !(IsMask3 || IsMaskZ))
4238 B = Builder.CreateFNeg(
B);
4240 C = Builder.CreateFNeg(
C);
4247 if (Name[Name.size() - 5] ==
's')
4248 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4250 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4254 Rep = Builder.CreateFMA(
A,
B,
C);
4262 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4266 if (VecWidth == 128 && EltWidth == 32)
4267 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4268 else if (VecWidth == 256 && EltWidth == 32)
4269 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4270 else if (VecWidth == 128 && EltWidth == 64)
4271 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4272 else if (VecWidth == 256 && EltWidth == 64)
4273 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4279 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4280 Rep = Builder.CreateIntrinsic(IID,
Ops);
4281 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4282 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4283 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4284 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4285 bool IsMask3 = Name[11] ==
'3';
4286 bool IsMaskZ = Name[11] ==
'z';
4288 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4289 bool IsSubAdd = Name[3] ==
's';
4293 if (Name[Name.size() - 5] ==
's')
4294 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4296 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4301 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4303 Rep = Builder.CreateIntrinsic(IID,
Ops);
4312 Value *Odd = Builder.CreateCall(FMA,
Ops);
4313 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4314 Value *Even = Builder.CreateCall(FMA,
Ops);
4320 for (
int i = 0; i != NumElts; ++i)
4321 Idxs[i] = i + (i % 2) * NumElts;
4323 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4331 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4332 Name.starts_with(
"avx512.maskz.pternlog.")) {
4333 bool ZeroMask = Name[11] ==
'z';
4337 if (VecWidth == 128 && EltWidth == 32)
4338 IID = Intrinsic::x86_avx512_pternlog_d_128;
4339 else if (VecWidth == 256 && EltWidth == 32)
4340 IID = Intrinsic::x86_avx512_pternlog_d_256;
4341 else if (VecWidth == 512 && EltWidth == 32)
4342 IID = Intrinsic::x86_avx512_pternlog_d_512;
4343 else if (VecWidth == 128 && EltWidth == 64)
4344 IID = Intrinsic::x86_avx512_pternlog_q_128;
4345 else if (VecWidth == 256 && EltWidth == 64)
4346 IID = Intrinsic::x86_avx512_pternlog_q_256;
4347 else if (VecWidth == 512 && EltWidth == 64)
4348 IID = Intrinsic::x86_avx512_pternlog_q_512;
4354 Rep = Builder.CreateIntrinsic(IID, Args);
4358 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4359 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4360 bool ZeroMask = Name[11] ==
'z';
4361 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4364 if (VecWidth == 128 && !
High)
4365 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4366 else if (VecWidth == 256 && !
High)
4367 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4368 else if (VecWidth == 512 && !
High)
4369 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4370 else if (VecWidth == 128 &&
High)
4371 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4372 else if (VecWidth == 256 &&
High)
4373 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4374 else if (VecWidth == 512 &&
High)
4375 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4381 Rep = Builder.CreateIntrinsic(IID, Args);
4385 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4386 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4387 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4388 bool ZeroMask = Name[11] ==
'z';
4389 bool IndexForm = Name[17] ==
'i';
4391 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4392 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4393 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4394 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4395 bool ZeroMask = Name[11] ==
'z';
4396 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4399 if (VecWidth == 128 && !IsSaturating)
4400 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4401 else if (VecWidth == 256 && !IsSaturating)
4402 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4403 else if (VecWidth == 512 && !IsSaturating)
4404 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4405 else if (VecWidth == 128 && IsSaturating)
4406 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4407 else if (VecWidth == 256 && IsSaturating)
4408 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4409 else if (VecWidth == 512 && IsSaturating)
4410 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4420 if (Args[1]->
getType()->isVectorTy() &&
4423 ->isIntegerTy(32) &&
4424 Args[2]->
getType()->isVectorTy() &&
4427 ->isIntegerTy(32)) {
4428 Type *NewArgType =
nullptr;
4429 if (VecWidth == 128)
4431 else if (VecWidth == 256)
4433 else if (VecWidth == 512)
4439 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4440 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4443 Rep = Builder.CreateIntrinsic(IID, Args);
4447 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4448 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4449 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4450 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4451 bool ZeroMask = Name[11] ==
'z';
4452 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4455 if (VecWidth == 128 && !IsSaturating)
4456 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4457 else if (VecWidth == 256 && !IsSaturating)
4458 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4459 else if (VecWidth == 512 && !IsSaturating)
4460 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4461 else if (VecWidth == 128 && IsSaturating)
4462 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4463 else if (VecWidth == 256 && IsSaturating)
4464 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4465 else if (VecWidth == 512 && IsSaturating)
4466 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4476 if (Args[1]->
getType()->isVectorTy() &&
4479 ->isIntegerTy(32) &&
4480 Args[2]->
getType()->isVectorTy() &&
4483 ->isIntegerTy(32)) {
4484 Type *NewArgType =
nullptr;
4485 if (VecWidth == 128)
4487 else if (VecWidth == 256)
4489 else if (VecWidth == 512)
4495 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4496 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4499 Rep = Builder.CreateIntrinsic(IID, Args);
4503 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4504 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4505 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4507 if (Name[0] ==
'a' && Name.back() ==
'2')
4508 IID = Intrinsic::x86_addcarry_32;
4509 else if (Name[0] ==
'a' && Name.back() ==
'4')
4510 IID = Intrinsic::x86_addcarry_64;
4511 else if (Name[0] ==
's' && Name.back() ==
'2')
4512 IID = Intrinsic::x86_subborrow_32;
4513 else if (Name[0] ==
's' && Name.back() ==
'4')
4514 IID = Intrinsic::x86_subborrow_64;
4521 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4524 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4527 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4531 }
else if (Name.starts_with(
"avx512.mask.") &&
4542 if (Name.starts_with(
"neon.bfcvt")) {
4543 if (Name.starts_with(
"neon.bfcvtn2")) {
4545 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4547 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4548 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4551 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4552 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4554 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4558 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4559 return Builder.CreateShuffleVector(
4562 return Builder.CreateFPTrunc(CI->
getOperand(0),
4565 }
else if (Name.starts_with(
"sve.fcvt")) {
4568 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4569 .
Case(
"sve.fcvtnt.bf16f32",
4570 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4582 if (Args[1]->
getType() != BadPredTy)
4585 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4586 BadPredTy, Args[1]);
4587 Args[1] = Builder.CreateIntrinsic(
4588 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4590 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4599 if (Name ==
"mve.vctp64.old") {
4602 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4605 Value *C1 = Builder.CreateIntrinsic(
4606 Intrinsic::arm_mve_pred_v2i,
4608 return Builder.CreateIntrinsic(
4609 Intrinsic::arm_mve_pred_i2v,
4611 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4612 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4613 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4614 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4616 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4617 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4618 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4619 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4621 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4622 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4623 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4624 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4625 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4626 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4627 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4628 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4629 std::vector<Type *> Tys;
4633 case Intrinsic::arm_mve_mull_int_predicated:
4634 case Intrinsic::arm_mve_vqdmull_predicated:
4635 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4638 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4639 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4640 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4644 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4648 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4652 case Intrinsic::arm_cde_vcx1q_predicated:
4653 case Intrinsic::arm_cde_vcx1qa_predicated:
4654 case Intrinsic::arm_cde_vcx2q_predicated:
4655 case Intrinsic::arm_cde_vcx2qa_predicated:
4656 case Intrinsic::arm_cde_vcx3q_predicated:
4657 case Intrinsic::arm_cde_vcx3qa_predicated:
4664 std::vector<Value *>
Ops;
4666 Type *Ty =
Op->getType();
4667 if (Ty->getScalarSizeInBits() == 1) {
4668 Value *C1 = Builder.CreateIntrinsic(
4669 Intrinsic::arm_mve_pred_v2i,
4671 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4676 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4691 auto UpgradeLegacyWMMAIUIntrinsicCall =
4696 Args.push_back(Builder.getFalse());
4700 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4707 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4712 NewCall->copyMetadata(*CI);
4716 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4717 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4718 "intrinsic should have 7 arguments");
4721 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4723 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4724 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4725 "intrinsic should have 8 arguments");
4730 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4733 switch (
F->getIntrinsicID()) {
4736 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4737 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4738 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4739 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4740 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4741 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4756 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4759 F->getParent(),
F->getIntrinsicID(), Overloads);
4764 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4769 NewCall->copyMetadata(*CI);
4770 NewCall->takeName(CI);
4792 if (NumOperands < 3)
4805 bool IsVolatile =
false;
4809 if (NumOperands > 3)
4814 if (NumOperands > 5) {
4816 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4830 if (VT->getElementType()->isIntegerTy(16)) {
4833 Val = Builder.CreateBitCast(Val, AsBF16);
4841 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4843 unsigned AddrSpace = PtrTy->getAddressSpace();
4846 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4848 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4853 MDNode *RangeNotPrivate =
4856 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4862 return Builder.CreateBitCast(RMW, RetTy);
4883 return MAV->getMetadata();
4890 return I->getDebugLoc().getAsMDNode();
4898 if (Name ==
"label") {
4901 }
else if (Name ==
"assign") {
4908 }
else if (Name ==
"declare") {
4913 }
else if (Name ==
"addr") {
4923 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4925 }
else if (Name ==
"value") {
4928 unsigned ExprOp = 2;
4942 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4950 int64_t OffsetVal =
Offset->getSExtValue();
4951 return Builder.CreateIntrinsic(OffsetVal >= 0
4952 ? Intrinsic::vector_splice_left
4953 : Intrinsic::vector_splice_right,
4955 {CI->getArgOperand(0), CI->getArgOperand(1),
4956 Builder.getInt32(std::abs(OffsetVal))});
4961 if (Name.starts_with(
"to.fp16")) {
4963 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4964 return Builder.CreateBitCast(Cast, CI->
getType());
4967 if (Name.starts_with(
"from.fp16")) {
4969 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4970 return Builder.CreateFPExt(Cast, CI->
getType());
4995 if (!Name.consume_front(
"llvm."))
4998 bool IsX86 = Name.consume_front(
"x86.");
4999 bool IsNVVM = Name.consume_front(
"nvvm.");
5000 bool IsAArch64 = Name.consume_front(
"aarch64.");
5001 bool IsARM = Name.consume_front(
"arm.");
5002 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5003 bool IsDbg = Name.consume_front(
"dbg.");
5005 (Name.consume_front(
"experimental.vector.splice") ||
5006 Name.consume_front(
"vector.splice")) &&
5007 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5008 Value *Rep =
nullptr;
5010 if (!IsX86 && Name ==
"stackprotectorcheck") {
5012 }
else if (IsNVVM) {
5016 }
else if (IsAArch64) {
5020 }
else if (IsAMDGCN) {
5024 }
else if (IsOldSplice) {
5026 }
else if (Name.consume_front(
"convert.")) {
5038 const auto &DefaultCase = [&]() ->
void {
5046 "Unknown function for CallBase upgrade and isn't just a name change");
5054 "Return type must have changed");
5055 assert(OldST->getNumElements() ==
5057 "Must have same number of elements");
5060 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5063 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5064 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5065 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5084 case Intrinsic::arm_neon_vst1:
5085 case Intrinsic::arm_neon_vst2:
5086 case Intrinsic::arm_neon_vst3:
5087 case Intrinsic::arm_neon_vst4:
5088 case Intrinsic::arm_neon_vst2lane:
5089 case Intrinsic::arm_neon_vst3lane:
5090 case Intrinsic::arm_neon_vst4lane: {
5092 NewCall = Builder.CreateCall(NewFn, Args);
5095 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5096 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5097 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5102 NewCall = Builder.CreateCall(NewFn, Args);
5105 case Intrinsic::aarch64_sve_ld3_sret:
5106 case Intrinsic::aarch64_sve_ld4_sret:
5107 case Intrinsic::aarch64_sve_ld2_sret: {
5115 Name = Name.substr(5);
5122 unsigned MinElts = RetTy->getMinNumElements() /
N;
5124 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5126 for (
unsigned I = 0;
I <
N;
I++) {
5127 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5128 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5134 case Intrinsic::coro_end: {
5137 NewCall = Builder.CreateCall(NewFn, Args);
5141 case Intrinsic::vector_extract: {
5143 Name = Name.substr(5);
5144 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5149 unsigned MinElts = RetTy->getMinNumElements();
5152 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5156 case Intrinsic::vector_insert: {
5158 Name = Name.substr(5);
5159 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5163 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5168 NewCall = Builder.CreateCall(
5172 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5178 assert(
N > 1 &&
"Create is expected to be between 2-4");
5181 unsigned MinElts = RetTy->getMinNumElements() /
N;
5182 for (
unsigned I = 0;
I <
N;
I++) {
5184 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5191 case Intrinsic::arm_neon_bfdot:
5192 case Intrinsic::arm_neon_bfmmla:
5193 case Intrinsic::arm_neon_bfmlalb:
5194 case Intrinsic::arm_neon_bfmlalt:
5195 case Intrinsic::aarch64_neon_bfdot:
5196 case Intrinsic::aarch64_neon_bfmmla:
5197 case Intrinsic::aarch64_neon_bfmlalb:
5198 case Intrinsic::aarch64_neon_bfmlalt: {
5201 "Mismatch between function args and call args");
5202 size_t OperandWidth =
5204 assert((OperandWidth == 64 || OperandWidth == 128) &&
5205 "Unexpected operand width");
5207 auto Iter = CI->
args().begin();
5208 Args.push_back(*Iter++);
5209 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5210 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5211 NewCall = Builder.CreateCall(NewFn, Args);
5215 case Intrinsic::bitreverse:
5216 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5219 case Intrinsic::ctlz:
5220 case Intrinsic::cttz: {
5227 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5231 case Intrinsic::objectsize: {
5232 Value *NullIsUnknownSize =
5236 NewCall = Builder.CreateCall(
5241 case Intrinsic::ctpop:
5242 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5244 case Intrinsic::dbg_value: {
5246 Name = Name.substr(5);
5248 if (Name.starts_with(
"dbg.addr")) {
5262 if (
Offset->isNullValue()) {
5263 NewCall = Builder.CreateCall(
5272 case Intrinsic::ptr_annotation:
5280 NewCall = Builder.CreateCall(
5289 case Intrinsic::var_annotation:
5296 NewCall = Builder.CreateCall(
5305 case Intrinsic::riscv_aes32dsi:
5306 case Intrinsic::riscv_aes32dsmi:
5307 case Intrinsic::riscv_aes32esi:
5308 case Intrinsic::riscv_aes32esmi:
5309 case Intrinsic::riscv_sm4ks:
5310 case Intrinsic::riscv_sm4ed: {
5320 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5321 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5327 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5328 Value *Res = NewCall;
5330 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5336 case Intrinsic::nvvm_mapa_shared_cluster: {
5340 Value *Res = NewCall;
5341 Res = Builder.CreateAddrSpaceCast(
5348 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5349 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5352 Args[0] = Builder.CreateAddrSpaceCast(
5355 NewCall = Builder.CreateCall(NewFn, Args);
5361 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5362 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5363 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5364 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5365 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5366 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5367 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5368 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5375 Args[0] = Builder.CreateAddrSpaceCast(
5384 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5386 NewCall = Builder.CreateCall(NewFn, Args);
5392 case Intrinsic::riscv_sha256sig0:
5393 case Intrinsic::riscv_sha256sig1:
5394 case Intrinsic::riscv_sha256sum0:
5395 case Intrinsic::riscv_sha256sum1:
5396 case Intrinsic::riscv_sm3p0:
5397 case Intrinsic::riscv_sm3p1: {
5404 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5406 NewCall = Builder.CreateCall(NewFn, Arg);
5408 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5415 case Intrinsic::x86_xop_vfrcz_ss:
5416 case Intrinsic::x86_xop_vfrcz_sd:
5417 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5420 case Intrinsic::x86_xop_vpermil2pd:
5421 case Intrinsic::x86_xop_vpermil2ps:
5422 case Intrinsic::x86_xop_vpermil2pd_256:
5423 case Intrinsic::x86_xop_vpermil2ps_256: {
5427 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5428 NewCall = Builder.CreateCall(NewFn, Args);
5432 case Intrinsic::x86_sse41_ptestc:
5433 case Intrinsic::x86_sse41_ptestz:
5434 case Intrinsic::x86_sse41_ptestnzc: {
5448 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5449 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5451 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5455 case Intrinsic::x86_rdtscp: {
5461 NewCall = Builder.CreateCall(NewFn);
5463 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5466 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5474 case Intrinsic::x86_sse41_insertps:
5475 case Intrinsic::x86_sse41_dppd:
5476 case Intrinsic::x86_sse41_dpps:
5477 case Intrinsic::x86_sse41_mpsadbw:
5478 case Intrinsic::x86_avx_dp_ps_256:
5479 case Intrinsic::x86_avx2_mpsadbw: {
5485 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5486 NewCall = Builder.CreateCall(NewFn, Args);
5490 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5491 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5492 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5493 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5494 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5495 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5501 NewCall = Builder.CreateCall(NewFn, Args);
5510 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5511 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5512 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5513 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5514 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5515 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5519 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5520 Args[1] = Builder.CreateBitCast(
5523 NewCall = Builder.CreateCall(NewFn, Args);
5524 Value *Res = Builder.CreateBitCast(
5532 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5533 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5534 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5538 Args[1] = Builder.CreateBitCast(
5540 Args[2] = Builder.CreateBitCast(
5543 NewCall = Builder.CreateCall(NewFn, Args);
5547 case Intrinsic::thread_pointer: {
5548 NewCall = Builder.CreateCall(NewFn, {});
5552 case Intrinsic::memcpy:
5553 case Intrinsic::memmove:
5554 case Intrinsic::memset: {
5570 NewCall = Builder.CreateCall(NewFn, Args);
5572 AttributeList NewAttrs = AttributeList::get(
5573 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5574 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5575 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5580 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5583 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5587 case Intrinsic::masked_load:
5588 case Intrinsic::masked_gather:
5589 case Intrinsic::masked_store:
5590 case Intrinsic::masked_scatter: {
5596 auto GetMaybeAlign = [](
Value *
Op) {
5606 auto GetAlign = [&](
Value *
Op) {
5615 case Intrinsic::masked_load:
5616 NewCall = Builder.CreateMaskedLoad(
5620 case Intrinsic::masked_gather:
5621 NewCall = Builder.CreateMaskedGather(
5627 case Intrinsic::masked_store:
5628 NewCall = Builder.CreateMaskedStore(
5632 case Intrinsic::masked_scatter:
5633 NewCall = Builder.CreateMaskedScatter(
5635 DL.getValueOrABITypeAlignment(
5649 case Intrinsic::lifetime_start:
5650 case Intrinsic::lifetime_end: {
5662 NewCall = Builder.CreateLifetimeStart(Ptr);
5664 NewCall = Builder.CreateLifetimeEnd(Ptr);
5673 case Intrinsic::x86_avx512_vpdpbusd_128:
5674 case Intrinsic::x86_avx512_vpdpbusd_256:
5675 case Intrinsic::x86_avx512_vpdpbusd_512:
5676 case Intrinsic::x86_avx512_vpdpbusds_128:
5677 case Intrinsic::x86_avx512_vpdpbusds_256:
5678 case Intrinsic::x86_avx512_vpdpbusds_512:
5679 case Intrinsic::x86_avx2_vpdpbssd_128:
5680 case Intrinsic::x86_avx2_vpdpbssd_256:
5681 case Intrinsic::x86_avx10_vpdpbssd_512:
5682 case Intrinsic::x86_avx2_vpdpbssds_128:
5683 case Intrinsic::x86_avx2_vpdpbssds_256:
5684 case Intrinsic::x86_avx10_vpdpbssds_512:
5685 case Intrinsic::x86_avx2_vpdpbsud_128:
5686 case Intrinsic::x86_avx2_vpdpbsud_256:
5687 case Intrinsic::x86_avx10_vpdpbsud_512:
5688 case Intrinsic::x86_avx2_vpdpbsuds_128:
5689 case Intrinsic::x86_avx2_vpdpbsuds_256:
5690 case Intrinsic::x86_avx10_vpdpbsuds_512:
5691 case Intrinsic::x86_avx2_vpdpbuud_128:
5692 case Intrinsic::x86_avx2_vpdpbuud_256:
5693 case Intrinsic::x86_avx10_vpdpbuud_512:
5694 case Intrinsic::x86_avx2_vpdpbuuds_128:
5695 case Intrinsic::x86_avx2_vpdpbuuds_256:
5696 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5701 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5702 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5704 NewCall = Builder.CreateCall(NewFn, Args);
5707 case Intrinsic::x86_avx512_vpdpwssd_128:
5708 case Intrinsic::x86_avx512_vpdpwssd_256:
5709 case Intrinsic::x86_avx512_vpdpwssd_512:
5710 case Intrinsic::x86_avx512_vpdpwssds_128:
5711 case Intrinsic::x86_avx512_vpdpwssds_256:
5712 case Intrinsic::x86_avx512_vpdpwssds_512:
5713 case Intrinsic::x86_avx2_vpdpwsud_128:
5714 case Intrinsic::x86_avx2_vpdpwsud_256:
5715 case Intrinsic::x86_avx10_vpdpwsud_512:
5716 case Intrinsic::x86_avx2_vpdpwsuds_128:
5717 case Intrinsic::x86_avx2_vpdpwsuds_256:
5718 case Intrinsic::x86_avx10_vpdpwsuds_512:
5719 case Intrinsic::x86_avx2_vpdpwusd_128:
5720 case Intrinsic::x86_avx2_vpdpwusd_256:
5721 case Intrinsic::x86_avx10_vpdpwusd_512:
5722 case Intrinsic::x86_avx2_vpdpwusds_128:
5723 case Intrinsic::x86_avx2_vpdpwusds_256:
5724 case Intrinsic::x86_avx10_vpdpwusds_512:
5725 case Intrinsic::x86_avx2_vpdpwuud_128:
5726 case Intrinsic::x86_avx2_vpdpwuud_256:
5727 case Intrinsic::x86_avx10_vpdpwuud_512:
5728 case Intrinsic::x86_avx2_vpdpwuuds_128:
5729 case Intrinsic::x86_avx2_vpdpwuuds_256:
5730 case Intrinsic::x86_avx10_vpdpwuuds_512:
5735 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5736 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5738 NewCall = Builder.CreateCall(NewFn, Args);
5741 assert(NewCall &&
"Should have either set this variable or returned through "
5742 "the default case");
5749 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5763 F->eraseFromParent();
5769 if (NumOperands == 0)
5777 if (NumOperands == 3) {
5781 Metadata *Elts2[] = {ScalarType, ScalarType,
5795 if (
Opc != Instruction::BitCast)
5799 Type *SrcTy = V->getType();
5816 if (
Opc != Instruction::BitCast)
5819 Type *SrcTy =
C->getType();
5846 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5847 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5848 if (Flag->getNumOperands() < 3)
5850 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5851 return K->getString() ==
"Debug Info Version";
5854 if (OpIt != ModFlags->op_end()) {
5855 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5862 bool BrokenDebugInfo =
false;
5865 if (!BrokenDebugInfo)
5871 M.getContext().diagnose(Diag);
5878 M.getContext().diagnose(DiagVersion);
5888 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5891 if (
F->hasFnAttribute(Attr)) {
5894 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5896 auto [Part, Rest] = S.
split(
',');
5902 const unsigned Dim = DimC -
'x';
5903 assert(Dim < 3 &&
"Unexpected dim char");
5913 F->addFnAttr(Attr, NewAttr);
5917 return S ==
"x" || S ==
"y" || S ==
"z";
5922 if (K ==
"kernel") {
5934 const unsigned Idx = (AlignIdxValuePair >> 16);
5935 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5940 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5945 if (K ==
"minctasm") {
5950 if (K ==
"maxnreg") {
5955 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5959 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5963 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5967 if (K ==
"grid_constant") {
5982 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5989 if (!SeenNodes.
insert(MD).second)
5996 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6003 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6005 const MDOperand &V = MD->getOperand(j + 1);
6008 NewOperands.
append({K, V});
6011 if (NewOperands.
size() > 1)
6024 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6025 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6026 if (ModRetainReleaseMarker) {
6032 ID->getString().split(ValueComp,
"#");
6033 if (ValueComp.
size() == 2) {
6034 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6038 M.eraseNamedMetadata(ModRetainReleaseMarker);
6049 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6075 bool InvalidCast =
false;
6077 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6090 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6092 Args.push_back(Arg);
6099 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6104 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6117 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6125 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6126 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6127 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6128 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6129 {
"objc_autoreleaseReturnValue",
6130 llvm::Intrinsic::objc_autoreleaseReturnValue},
6131 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6132 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6133 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6134 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6135 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6136 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6137 {
"objc_release", llvm::Intrinsic::objc_release},
6138 {
"objc_retain", llvm::Intrinsic::objc_retain},
6139 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6140 {
"objc_retainAutoreleaseReturnValue",
6141 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6142 {
"objc_retainAutoreleasedReturnValue",
6143 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6144 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6145 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6146 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6147 {
"objc_unsafeClaimAutoreleasedReturnValue",
6148 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6149 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6150 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6151 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6152 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6153 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6154 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6155 {
"objc_arc_annotation_topdown_bbstart",
6156 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6157 {
"objc_arc_annotation_topdown_bbend",
6158 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6159 {
"objc_arc_annotation_bottomup_bbstart",
6160 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6161 {
"objc_arc_annotation_bottomup_bbend",
6162 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6164 for (
auto &
I : RuntimeFuncs)
6165 UpgradeToIntrinsic(
I.first,
I.second);
6169 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6173 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6174 bool HasSwiftVersionFlag =
false;
6175 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6182 if (
Op->getNumOperands() != 3)
6196 if (
ID->getString() ==
"Objective-C Image Info Version")
6198 if (
ID->getString() ==
"Objective-C Class Properties")
6199 HasClassProperties =
true;
6201 if (
ID->getString() ==
"PIC Level") {
6202 if (
auto *Behavior =
6204 uint64_t V = Behavior->getLimitedValue();
6210 if (
ID->getString() ==
"PIE Level")
6211 if (
auto *Behavior =
6218 if (
ID->getString() ==
"branch-target-enforcement" ||
6219 ID->getString().starts_with(
"sign-return-address")) {
6220 if (
auto *Behavior =
6226 Op->getOperand(1),
Op->getOperand(2)};
6236 if (
ID->getString() ==
"Objective-C Image Info Section") {
6239 Value->getString().split(ValueComp,
" ");
6240 if (ValueComp.
size() != 1) {
6241 std::string NewValue;
6242 for (
auto &S : ValueComp)
6243 NewValue += S.str();
6254 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6257 assert(Md->getValue() &&
"Expected non-empty metadata");
6258 auto Type = Md->getValue()->getType();
6261 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6262 if ((Val & 0xff) != Val) {
6263 HasSwiftVersionFlag =
true;
6264 SwiftABIVersion = (Val & 0xff00) >> 8;
6265 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6266 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6277 if (
ID->getString() ==
"amdgpu_code_object_version") {
6280 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6292 if (HasObjCFlag && !HasClassProperties) {
6298 if (HasSwiftVersionFlag) {
6302 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6304 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6312 auto TrimSpaces = [](
StringRef Section) -> std::string {
6314 Section.split(Components,
',');
6319 for (
auto Component : Components)
6320 OS <<
',' << Component.trim();
6325 for (
auto &GV : M.globals()) {
6326 if (!GV.hasSection())
6331 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6336 GV.setSection(TrimSpaces(Section));
6352struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6353 StrictFPUpgradeVisitor() =
default;
6356 if (!
Call.isStrictFP())
6362 Call.removeFnAttr(Attribute::StrictFP);
6363 Call.addFnAttr(Attribute::NoBuiltin);
6368struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6369 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6370 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6372 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6387 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6388 StrictFPUpgradeVisitor SFPV;
6393 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6394 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6395 for (
auto &Arg :
F.args())
6397 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6399 bool AddingAttrs =
false, RemovingAttrs =
false;
6400 AttrBuilder AttrsToAdd(
F.getContext());
6405 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6406 A.isValid() &&
A.isStringAttribute()) {
6407 F.setSection(
A.getValueAsString());
6409 RemovingAttrs =
true;
6413 A.isValid() &&
A.isStringAttribute()) {
6416 AddingAttrs = RemovingAttrs =
true;
6419 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6420 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6422 RemovingAttrs =
true;
6423 if (
A.getValueAsString() ==
"true") {
6424 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6433 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6436 if (
A.getValueAsBool()) {
6437 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6443 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6444 RemovingAttrs =
true;
6451 bool HandleDenormalMode =
false;
6453 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6456 DenormalFPMath = ParsedMode;
6458 AddingAttrs = RemovingAttrs =
true;
6459 HandleDenormalMode =
true;
6463 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6467 DenormalFPMathF32 = ParsedMode;
6469 AddingAttrs = RemovingAttrs =
true;
6470 HandleDenormalMode =
true;
6474 if (HandleDenormalMode)
6475 AttrsToAdd.addDenormalFPEnvAttr(
6479 F.removeFnAttrs(AttrsToRemove);
6482 F.addFnAttrs(AttrsToAdd);
6488 if (!
F.hasFnAttribute(FnAttrName))
6489 F.addFnAttr(FnAttrName,
Value);
6496 if (!
F.hasFnAttribute(FnAttrName)) {
6498 F.addFnAttr(FnAttrName);
6500 auto A =
F.getFnAttribute(FnAttrName);
6501 if (
"false" ==
A.getValueAsString())
6502 F.removeFnAttr(FnAttrName);
6503 else if (
"true" ==
A.getValueAsString()) {
6504 F.removeFnAttr(FnAttrName);
6505 F.addFnAttr(FnAttrName);
6511 Triple T(M.getTargetTriple());
6512 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6522 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6526 if (
Op->getNumOperands() != 3)
6535 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6536 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6537 : IDStr ==
"guarded-control-stack" ? &GCSValue
6538 : IDStr ==
"sign-return-address" ? &SRAValue
6539 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6540 : IDStr ==
"sign-return-address-with-bkey"
6546 *ValPtr = CI->getZExtValue();
6552 bool BTE = BTEValue == 1;
6553 bool BPPLR = BPPLRValue == 1;
6554 bool GCS = GCSValue == 1;
6555 bool SRA = SRAValue == 1;
6558 if (SRA && SRAALLValue == 1)
6559 SignTypeValue =
"all";
6562 if (SRA && SRABKeyValue == 1)
6563 SignKeyValue =
"b_key";
6565 for (
Function &
F : M.getFunctionList()) {
6566 if (
F.isDeclaration())
6573 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6574 A.isValid() &&
"none" ==
A.getValueAsString()) {
6575 F.removeFnAttr(
"sign-return-address");
6576 F.removeFnAttr(
"sign-return-address-key");
6592 if (SRAALLValue == 1)
6594 if (SRABKeyValue == 1)
6603 if (
T->getNumOperands() < 1)
6608 return S->getString().starts_with(
"llvm.vectorizer.");
6612 StringRef OldPrefix =
"llvm.vectorizer.";
6615 if (OldTag ==
"llvm.vectorizer.unroll")
6627 if (
T->getNumOperands() < 1)
6632 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6637 Ops.reserve(
T->getNumOperands());
6639 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6640 Ops.push_back(
T->getOperand(
I));
6654 Ops.reserve(
T->getNumOperands());
6665 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6666 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6667 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6670 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6672 auto I =
DL.find(
"-n64-");
6674 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6679 std::string Res =
DL.str();
6682 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6683 Res.append(Res.empty() ?
"G1" :
"-G1");
6691 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6692 Res.append(
"-ni:7:8:9");
6694 if (
DL.ends_with(
"ni:7"))
6696 if (
DL.ends_with(
"ni:7:8"))
6701 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6702 Res.append(
"-p7:160:256:256:32");
6703 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6704 Res.append(
"-p8:128:128:128:48");
6705 constexpr StringRef OldP8(
"-p8:128:128-");
6706 if (
DL.contains(OldP8))
6707 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6708 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6709 Res.append(
"-p9:192:256:256:32");
6713 if (!
DL.contains(
"m:e"))
6714 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6719 if (
T.isSystemZ() && !
DL.empty()) {
6721 if (!
DL.contains(
"-S64"))
6722 return "E-S64" +
DL.drop_front(1).str();
6726 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6729 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6730 if (!
DL.contains(AddrSpaces)) {
6732 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6733 if (R.match(Res, &
Groups))
6739 if (
T.isAArch64()) {
6741 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6742 Res.append(
"-Fn32");
6743 AddPtr32Ptr64AddrSpaces();
6747 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6751 std::string I64 =
"-i64:64";
6752 std::string I128 =
"-i128:128";
6754 size_t Pos = Res.find(I64);
6755 if (Pos !=
size_t(-1))
6756 Res.insert(Pos + I64.size(), I128);
6760 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6761 size_t Pos = Res.find(
"-S128");
6764 Res.insert(Pos,
"-f64:32:64");
6770 AddPtr32Ptr64AddrSpaces();
6778 if (!
T.isOSIAMCU()) {
6779 std::string I128 =
"-i128:128";
6782 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6783 if (R.match(Res, &
Groups))
6791 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6793 auto I =
Ref.find(
"-f80:32-");
6795 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6803 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6806 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6807 B.removeAttribute(
"no-frame-pointer-elim");
6809 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6811 if (FramePointer !=
"all")
6812 FramePointer =
"non-leaf";
6813 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6815 if (!FramePointer.
empty())
6816 B.addAttribute(
"frame-pointer", FramePointer);
6818 A =
B.getAttribute(
"null-pointer-is-valid");
6821 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6822 B.removeAttribute(
"null-pointer-is-valid");
6823 if (NullPointerIsValid)
6824 B.addAttribute(Attribute::NullPointerIsValid);
6827 A =
B.getAttribute(
"uniform-work-group-size");
6831 bool IsTrue = Val ==
"true";
6832 B.removeAttribute(
"uniform-work-group-size");
6834 B.addAttribute(
"uniform-work-group-size");
6845 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.