34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name.consume_back(
".lane")) {
1011 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1012 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1013 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1025 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1030 if (Name.consume_front(
"addqv")) {
1032 if (!
F->getReturnType()->isFPOrFPVectorTy())
1035 auto Args =
F->getFunctionType()->params();
1036 Type *Tys[] = {
F->getReturnType(), Args[1]};
1038 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1042 if (Name.consume_front(
"ld")) {
1044 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1045 if (LdRegex.
match(Name)) {
1052 Intrinsic::aarch64_sve_ld2_sret,
1053 Intrinsic::aarch64_sve_ld3_sret,
1054 Intrinsic::aarch64_sve_ld4_sret,
1057 LoadIDs[Name[0] -
'2'], Ty);
1063 if (Name.consume_front(
"tuple.")) {
1065 if (Name.starts_with(
"get")) {
1067 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1069 F->getParent(), Intrinsic::vector_extract, Tys);
1073 if (Name.starts_with(
"set")) {
1075 auto Args =
F->getFunctionType()->params();
1076 Type *Tys[] = {Args[0], Args[2], Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1082 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1083 if (CreateTupleRegex.
match(Name)) {
1085 auto Args =
F->getFunctionType()->params();
1086 Type *Tys[] = {
F->getReturnType(), Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1094 if (Name.starts_with(
"rev.nxv")) {
1097 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1109 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1113 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1115 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1117 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1118 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1119 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1120 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1121 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1122 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1131 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1145 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1146 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1156 if (Name.consume_front(
"mapa.shared.cluster"))
1157 if (
F->getReturnType()->getPointerAddressSpace() ==
1159 return Intrinsic::nvvm_mapa_shared_cluster;
1161 if (Name.consume_front(
"cp.async.bulk.")) {
1164 .
Case(
"global.to.shared.cluster",
1165 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1166 .
Case(
"shared.cta.to.cluster",
1167 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1171 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1180 if (Name.consume_front(
"fma.rn."))
1182 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1183 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1184 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1185 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1188 if (Name.consume_front(
"fmax."))
1190 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1191 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1192 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1193 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1194 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1195 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1196 .
Case(
"ftz.nan.xorsign.abs.bf16",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1198 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1200 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1201 .
Case(
"ftz.xorsign.abs.bf16x2",
1202 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1203 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1204 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1205 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1206 .
Case(
"nan.xorsign.abs.bf16x2",
1207 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1208 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1209 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1212 if (Name.consume_front(
"fmin."))
1214 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1215 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1216 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1217 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1218 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1219 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1220 .
Case(
"ftz.nan.xorsign.abs.bf16",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1222 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1224 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1225 .
Case(
"ftz.xorsign.abs.bf16x2",
1226 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1227 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1228 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1229 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1230 .
Case(
"nan.xorsign.abs.bf16x2",
1231 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1232 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1233 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1236 if (Name.consume_front(
"neg."))
1238 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1239 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1246 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1247 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1248 Name.consume_front(
"param");
1254 if (Name.starts_with(
"to.fp16")) {
1258 FuncTy->getReturnType());
1261 if (Name.starts_with(
"from.fp16")) {
1265 FuncTy->getReturnType());
1272 bool CanUpgradeDebugIntrinsicsToRecords) {
1273 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1278 if (!Name.consume_front(
"llvm.") || Name.empty())
1284 bool IsArm = Name.consume_front(
"arm.");
1285 if (IsArm || Name.consume_front(
"aarch64.")) {
1291 if (Name.consume_front(
"amdgcn.")) {
1292 if (Name ==
"alignbit") {
1295 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1299 if (Name.consume_front(
"atomic.")) {
1300 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1301 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1311 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1312 F->arg_size() == 7) {
1316 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1317 F->arg_size() == 8) {
1322 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1323 Name.consume_front(
"flat.atomic.")) {
1324 if (Name.starts_with(
"fadd") ||
1326 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1327 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1335 if (Name.starts_with(
"ldexp.")) {
1338 F->getParent(), Intrinsic::ldexp,
1339 {F->getReturnType(), F->getArg(1)->getType()});
1348 if (
F->arg_size() == 1) {
1349 if (Name.consume_front(
"convert.")) {
1363 F->arg_begin()->getType());
1368 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1371 Intrinsic::coro_end);
1378 if (Name.consume_front(
"dbg.")) {
1380 if (CanUpgradeDebugIntrinsicsToRecords) {
1381 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1382 Name ==
"declare" || Name ==
"label") {
1391 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1394 Intrinsic::dbg_value);
1401 if (Name.consume_front(
"experimental.vector.")) {
1407 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1408 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1409 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1410 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1411 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1413 Intrinsic::vector_partial_reduce_add)
1416 const auto *FT =
F->getFunctionType();
1418 if (
ID == Intrinsic::vector_extract ||
1419 ID == Intrinsic::vector_interleave2)
1422 if (
ID != Intrinsic::vector_interleave2)
1424 if (
ID == Intrinsic::vector_insert ||
1425 ID == Intrinsic::vector_partial_reduce_add)
1433 if (Name.consume_front(
"reduce.")) {
1435 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1436 if (R.match(Name, &
Groups))
1438 .
Case(
"add", Intrinsic::vector_reduce_add)
1439 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1440 .
Case(
"and", Intrinsic::vector_reduce_and)
1441 .
Case(
"or", Intrinsic::vector_reduce_or)
1442 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1443 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1444 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1445 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1446 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1447 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1448 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1453 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1458 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1459 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1464 auto Args =
F->getFunctionType()->params();
1466 {Args[V2 ? 1 : 0]});
1472 if (Name.consume_front(
"splice"))
1476 if (Name.consume_front(
"experimental.stepvector.")) {
1480 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1485 if (Name.starts_with(
"flt.rounds")) {
1488 Intrinsic::get_rounding);
1493 if (Name.starts_with(
"invariant.group.barrier")) {
1495 auto Args =
F->getFunctionType()->params();
1496 Type* ObjectPtr[1] = {Args[0]};
1499 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1504 if ((Name.starts_with(
"lifetime.start") ||
1505 Name.starts_with(
"lifetime.end")) &&
1506 F->arg_size() == 2) {
1508 ? Intrinsic::lifetime_start
1509 : Intrinsic::lifetime_end;
1512 F->getArg(0)->getType());
1521 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1522 .StartsWith(
"memmove.", Intrinsic::memmove)
1524 if (
F->arg_size() == 5) {
1528 F->getFunctionType()->params().slice(0, 3);
1534 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1537 const auto *FT =
F->getFunctionType();
1538 Type *ParamTypes[2] = {
1539 FT->getParamType(0),
1543 Intrinsic::memset, ParamTypes);
1549 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1550 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1551 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1552 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1554 if (MaskedID &&
F->arg_size() == 4) {
1556 if (MaskedID == Intrinsic::masked_load ||
1557 MaskedID == Intrinsic::masked_gather) {
1559 F->getParent(), MaskedID,
1560 {F->getReturnType(), F->getArg(0)->getType()});
1564 F->getParent(), MaskedID,
1565 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1571 if (Name.consume_front(
"nvvm.")) {
1573 if (
F->arg_size() == 1) {
1576 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1577 .Case(
"clz.i", Intrinsic::ctlz)
1578 .
Case(
"popc.i", Intrinsic::ctpop)
1582 {F->getReturnType()});
1585 }
else if (
F->arg_size() == 2) {
1588 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1589 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1590 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1591 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1595 {F->getReturnType()});
1601 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1629 bool Expand =
false;
1630 if (Name.consume_front(
"abs."))
1633 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1634 else if (Name.consume_front(
"fabs."))
1636 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1637 else if (Name.consume_front(
"ex2.approx."))
1640 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1641 else if (Name.consume_front(
"atomic.load."))
1650 else if (Name.consume_front(
"bitcast."))
1653 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1654 else if (Name.consume_front(
"rotate."))
1656 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1657 else if (Name.consume_front(
"ptr.gen.to."))
1660 else if (Name.consume_front(
"ptr."))
1663 else if (Name.consume_front(
"ldg.global."))
1665 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1666 Name.starts_with(
"p."));
1669 .
Case(
"barrier0",
true)
1670 .
Case(
"barrier.n",
true)
1671 .
Case(
"barrier.sync.cnt",
true)
1672 .
Case(
"barrier.sync",
true)
1673 .
Case(
"barrier",
true)
1674 .
Case(
"bar.sync",
true)
1675 .
Case(
"barrier0.popc",
true)
1676 .
Case(
"barrier0.and",
true)
1677 .
Case(
"barrier0.or",
true)
1678 .
Case(
"clz.ll",
true)
1679 .
Case(
"popc.ll",
true)
1681 .
Case(
"swap.lo.hi.b64",
true)
1682 .
Case(
"tanh.approx.f32",
true)
1694 if (Name.starts_with(
"objectsize.")) {
1695 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1696 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1699 Intrinsic::objectsize, Tys);
1706 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1709 F->getParent(), Intrinsic::ptr_annotation,
1710 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1716 if (Name.consume_front(
"riscv.")) {
1719 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1720 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1721 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1722 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1725 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1738 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1739 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1748 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1749 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1750 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1751 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1756 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1765 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1767 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1776 if (Name ==
"stackprotectorcheck") {
1783 if (Name ==
"thread.pointer") {
1785 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1791 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1794 F->getParent(), Intrinsic::var_annotation,
1795 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1798 if (Name.consume_front(
"vector.splice")) {
1799 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1807 if (Name.consume_front(
"wasm.")) {
1810 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1811 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1812 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1817 F->getReturnType());
1821 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1823 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1825 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1844 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1853 auto *FT =
F->getFunctionType();
1856 std::string
Name =
F->getName().str();
1859 Name,
F->getParent());
1870 if (Result != std::nullopt) {
1883 bool CanUpgradeDebugIntrinsicsToRecords) {
1903 GV->
getName() ==
"llvm.global_dtors")) ||
1918 unsigned N =
Init->getNumOperands();
1919 std::vector<Constant *> NewCtors(
N);
1920 for (
unsigned i = 0; i !=
N; ++i) {
1923 Ctor->getAggregateElement(1),
1937 unsigned NumElts = ResultTy->getNumElements() * 8;
1941 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1951 for (
unsigned l = 0; l != NumElts; l += 16)
1952 for (
unsigned i = 0; i != 16; ++i) {
1953 unsigned Idx = NumElts + i - Shift;
1955 Idx -= NumElts - 16;
1956 Idxs[l + i] = Idx + l;
1959 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1963 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1971 unsigned NumElts = ResultTy->getNumElements() * 8;
1975 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1985 for (
unsigned l = 0; l != NumElts; l += 16)
1986 for (
unsigned i = 0; i != 16; ++i) {
1987 unsigned Idx = i + Shift;
1989 Idx += NumElts - 16;
1990 Idxs[l + i] = Idx + l;
1993 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1997 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2005 Mask = Builder.CreateBitCast(Mask, MaskTy);
2011 for (
unsigned i = 0; i != NumElts; ++i)
2013 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2024 if (
C->isAllOnesValue())
2029 return Builder.CreateSelect(Mask, Op0, Op1);
2036 if (
C->isAllOnesValue())
2040 Mask->getType()->getIntegerBitWidth());
2041 Mask = Builder.CreateBitCast(Mask, MaskTy);
2042 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2043 return Builder.CreateSelect(Mask, Op0, Op1);
2056 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2057 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2062 ShiftVal &= (NumElts - 1);
2071 if (ShiftVal > 16) {
2079 for (
unsigned l = 0; l < NumElts; l += 16) {
2080 for (
unsigned i = 0; i != 16; ++i) {
2081 unsigned Idx = ShiftVal + i;
2082 if (!IsVALIGN && Idx >= 16)
2083 Idx += NumElts - 16;
2084 Indices[l + i] = Idx + l;
2089 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2095 bool ZeroMask,
bool IndexForm) {
2098 unsigned EltWidth = Ty->getScalarSizeInBits();
2099 bool IsFloat = Ty->isFPOrFPVectorTy();
2101 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2102 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2103 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2104 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2105 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2106 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2107 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2108 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2109 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2110 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2111 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2112 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2113 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2115 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2116 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2117 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2118 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2119 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2120 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2121 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2123 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2125 else if (VecWidth == 128 && EltWidth == 16)
2126 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2127 else if (VecWidth == 256 && EltWidth == 16)
2128 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2129 else if (VecWidth == 512 && EltWidth == 16)
2130 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2131 else if (VecWidth == 128 && EltWidth == 8)
2132 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2133 else if (VecWidth == 256 && EltWidth == 8)
2134 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2135 else if (VecWidth == 512 && EltWidth == 8)
2136 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2147 Value *V = Builder.CreateIntrinsic(IID, Args);
2159 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2170 bool IsRotateRight) {
2180 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2181 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2184 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2185 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2230 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2235 bool IsShiftRight,
bool ZeroMask) {
2249 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2250 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2253 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2254 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2269 const Align Alignment =
2271 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2276 if (
C->isAllOnesValue())
2277 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2282 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2288 const Align Alignment =
2297 if (
C->isAllOnesValue())
2298 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2303 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2309 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2310 {Op0, Builder.getInt1(
false)});
2325 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2326 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2327 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2328 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2329 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2332 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2333 LHS = Builder.CreateAnd(
LHS, Mask);
2334 RHS = Builder.CreateAnd(
RHS, Mask);
2351 if (!
C || !
C->isAllOnesValue())
2352 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2357 for (
unsigned i = 0; i != NumElts; ++i)
2359 for (
unsigned i = NumElts; i != 8; ++i)
2360 Indices[i] = NumElts + i % NumElts;
2361 Vec = Builder.CreateShuffleVector(Vec,
2365 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2369 unsigned CC,
bool Signed) {
2377 }
else if (CC == 7) {
2413 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2414 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2416 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2417 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2426 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2432 Name = Name.substr(12);
2437 if (Name.starts_with(
"max.p")) {
2438 if (VecWidth == 128 && EltWidth == 32)
2439 IID = Intrinsic::x86_sse_max_ps;
2440 else if (VecWidth == 128 && EltWidth == 64)
2441 IID = Intrinsic::x86_sse2_max_pd;
2442 else if (VecWidth == 256 && EltWidth == 32)
2443 IID = Intrinsic::x86_avx_max_ps_256;
2444 else if (VecWidth == 256 && EltWidth == 64)
2445 IID = Intrinsic::x86_avx_max_pd_256;
2448 }
else if (Name.starts_with(
"min.p")) {
2449 if (VecWidth == 128 && EltWidth == 32)
2450 IID = Intrinsic::x86_sse_min_ps;
2451 else if (VecWidth == 128 && EltWidth == 64)
2452 IID = Intrinsic::x86_sse2_min_pd;
2453 else if (VecWidth == 256 && EltWidth == 32)
2454 IID = Intrinsic::x86_avx_min_ps_256;
2455 else if (VecWidth == 256 && EltWidth == 64)
2456 IID = Intrinsic::x86_avx_min_pd_256;
2459 }
else if (Name.starts_with(
"pshuf.b.")) {
2460 if (VecWidth == 128)
2461 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2462 else if (VecWidth == 256)
2463 IID = Intrinsic::x86_avx2_pshuf_b;
2464 else if (VecWidth == 512)
2465 IID = Intrinsic::x86_avx512_pshuf_b_512;
2468 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2469 if (VecWidth == 128)
2470 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2471 else if (VecWidth == 256)
2472 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2473 else if (VecWidth == 512)
2474 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2477 }
else if (Name.starts_with(
"pmulh.w.")) {
2478 if (VecWidth == 128)
2479 IID = Intrinsic::x86_sse2_pmulh_w;
2480 else if (VecWidth == 256)
2481 IID = Intrinsic::x86_avx2_pmulh_w;
2482 else if (VecWidth == 512)
2483 IID = Intrinsic::x86_avx512_pmulh_w_512;
2486 }
else if (Name.starts_with(
"pmulhu.w.")) {
2487 if (VecWidth == 128)
2488 IID = Intrinsic::x86_sse2_pmulhu_w;
2489 else if (VecWidth == 256)
2490 IID = Intrinsic::x86_avx2_pmulhu_w;
2491 else if (VecWidth == 512)
2492 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2495 }
else if (Name.starts_with(
"pmaddw.d.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_sse2_pmadd_wd;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_pmadd_wd;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2504 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2505 if (VecWidth == 128)
2506 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2507 else if (VecWidth == 256)
2508 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2509 else if (VecWidth == 512)
2510 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2513 }
else if (Name.starts_with(
"packsswb.")) {
2514 if (VecWidth == 128)
2515 IID = Intrinsic::x86_sse2_packsswb_128;
2516 else if (VecWidth == 256)
2517 IID = Intrinsic::x86_avx2_packsswb;
2518 else if (VecWidth == 512)
2519 IID = Intrinsic::x86_avx512_packsswb_512;
2522 }
else if (Name.starts_with(
"packssdw.")) {
2523 if (VecWidth == 128)
2524 IID = Intrinsic::x86_sse2_packssdw_128;
2525 else if (VecWidth == 256)
2526 IID = Intrinsic::x86_avx2_packssdw;
2527 else if (VecWidth == 512)
2528 IID = Intrinsic::x86_avx512_packssdw_512;
2531 }
else if (Name.starts_with(
"packuswb.")) {
2532 if (VecWidth == 128)
2533 IID = Intrinsic::x86_sse2_packuswb_128;
2534 else if (VecWidth == 256)
2535 IID = Intrinsic::x86_avx2_packuswb;
2536 else if (VecWidth == 512)
2537 IID = Intrinsic::x86_avx512_packuswb_512;
2540 }
else if (Name.starts_with(
"packusdw.")) {
2541 if (VecWidth == 128)
2542 IID = Intrinsic::x86_sse41_packusdw;
2543 else if (VecWidth == 256)
2544 IID = Intrinsic::x86_avx2_packusdw;
2545 else if (VecWidth == 512)
2546 IID = Intrinsic::x86_avx512_packusdw_512;
2549 }
else if (Name.starts_with(
"vpermilvar.")) {
2550 if (VecWidth == 128 && EltWidth == 32)
2551 IID = Intrinsic::x86_avx_vpermilvar_ps;
2552 else if (VecWidth == 128 && EltWidth == 64)
2553 IID = Intrinsic::x86_avx_vpermilvar_pd;
2554 else if (VecWidth == 256 && EltWidth == 32)
2555 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2556 else if (VecWidth == 256 && EltWidth == 64)
2557 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2558 else if (VecWidth == 512 && EltWidth == 32)
2559 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2560 else if (VecWidth == 512 && EltWidth == 64)
2561 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2564 }
else if (Name ==
"cvtpd2dq.256") {
2565 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2566 }
else if (Name ==
"cvtpd2ps.256") {
2567 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2568 }
else if (Name ==
"cvttpd2dq.256") {
2569 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2570 }
else if (Name ==
"cvttps2dq.128") {
2571 IID = Intrinsic::x86_sse2_cvttps2dq;
2572 }
else if (Name ==
"cvttps2dq.256") {
2573 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2574 }
else if (Name.starts_with(
"permvar.")) {
2576 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2577 IID = Intrinsic::x86_avx2_permps;
2578 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2579 IID = Intrinsic::x86_avx2_permd;
2580 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2581 IID = Intrinsic::x86_avx512_permvar_df_256;
2582 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2583 IID = Intrinsic::x86_avx512_permvar_di_256;
2584 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2585 IID = Intrinsic::x86_avx512_permvar_sf_512;
2586 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2587 IID = Intrinsic::x86_avx512_permvar_si_512;
2588 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2589 IID = Intrinsic::x86_avx512_permvar_df_512;
2590 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2591 IID = Intrinsic::x86_avx512_permvar_di_512;
2592 else if (VecWidth == 128 && EltWidth == 16)
2593 IID = Intrinsic::x86_avx512_permvar_hi_128;
2594 else if (VecWidth == 256 && EltWidth == 16)
2595 IID = Intrinsic::x86_avx512_permvar_hi_256;
2596 else if (VecWidth == 512 && EltWidth == 16)
2597 IID = Intrinsic::x86_avx512_permvar_hi_512;
2598 else if (VecWidth == 128 && EltWidth == 8)
2599 IID = Intrinsic::x86_avx512_permvar_qi_128;
2600 else if (VecWidth == 256 && EltWidth == 8)
2601 IID = Intrinsic::x86_avx512_permvar_qi_256;
2602 else if (VecWidth == 512 && EltWidth == 8)
2603 IID = Intrinsic::x86_avx512_permvar_qi_512;
2606 }
else if (Name.starts_with(
"dbpsadbw.")) {
2607 if (VecWidth == 128)
2608 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2609 else if (VecWidth == 256)
2610 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2611 else if (VecWidth == 512)
2612 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2615 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2616 if (VecWidth == 128)
2617 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2618 else if (VecWidth == 256)
2619 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2620 else if (VecWidth == 512)
2621 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2624 }
else if (Name.starts_with(
"conflict.")) {
2625 if (Name[9] ==
'd' && VecWidth == 128)
2626 IID = Intrinsic::x86_avx512_conflict_d_128;
2627 else if (Name[9] ==
'd' && VecWidth == 256)
2628 IID = Intrinsic::x86_avx512_conflict_d_256;
2629 else if (Name[9] ==
'd' && VecWidth == 512)
2630 IID = Intrinsic::x86_avx512_conflict_d_512;
2631 else if (Name[9] ==
'q' && VecWidth == 128)
2632 IID = Intrinsic::x86_avx512_conflict_q_128;
2633 else if (Name[9] ==
'q' && VecWidth == 256)
2634 IID = Intrinsic::x86_avx512_conflict_q_256;
2635 else if (Name[9] ==
'q' && VecWidth == 512)
2636 IID = Intrinsic::x86_avx512_conflict_q_512;
2639 }
else if (Name.starts_with(
"pavg.")) {
2640 if (Name[5] ==
'b' && VecWidth == 128)
2641 IID = Intrinsic::x86_sse2_pavg_b;
2642 else if (Name[5] ==
'b' && VecWidth == 256)
2643 IID = Intrinsic::x86_avx2_pavg_b;
2644 else if (Name[5] ==
'b' && VecWidth == 512)
2645 IID = Intrinsic::x86_avx512_pavg_b_512;
2646 else if (Name[5] ==
'w' && VecWidth == 128)
2647 IID = Intrinsic::x86_sse2_pavg_w;
2648 else if (Name[5] ==
'w' && VecWidth == 256)
2649 IID = Intrinsic::x86_avx2_pavg_w;
2650 else if (Name[5] ==
'w' && VecWidth == 512)
2651 IID = Intrinsic::x86_avx512_pavg_w_512;
2660 Rep = Builder.CreateIntrinsic(IID, Args);
2671 if (AsmStr->find(
"mov\tfp") == 0 &&
2672 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2673 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2674 AsmStr->replace(Pos, 1,
";");
2680 Value *Rep =
nullptr;
2682 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2684 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2685 Value *Cmp = Builder.CreateICmpSGE(
2687 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2688 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2689 Type *Ty = (Name ==
"abs.bf16")
2693 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2694 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2695 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2696 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2697 : Intrinsic::nvvm_fabs;
2698 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2699 }
else if (Name.consume_front(
"ex2.approx.")) {
2701 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2702 : Intrinsic::nvvm_ex2_approx;
2703 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2704 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2705 Name.starts_with(
"atomic.load.add.f64.p")) {
2710 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2711 Name.starts_with(
"atomic.load.dec.32.p")) {
2716 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2718 }
else if (Name ==
"clz.ll") {
2721 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2722 {Arg, Builder.getFalse()},
2724 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2725 }
else if (Name ==
"popc.ll") {
2729 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2730 Arg,
nullptr,
"ctpop");
2731 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2732 }
else if (Name ==
"h2f") {
2734 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2735 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2736 }
else if (Name.consume_front(
"bitcast.") &&
2737 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2740 }
else if (Name ==
"rotate.b32") {
2743 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2744 {Arg, Arg, ShiftAmt});
2745 }
else if (Name ==
"rotate.b64") {
2749 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2750 {Arg, Arg, ZExtShiftAmt});
2751 }
else if (Name ==
"rotate.right.b64") {
2755 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2756 {Arg, Arg, ZExtShiftAmt});
2757 }
else if (Name ==
"swap.lo.hi.b64") {
2760 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2761 {Arg, Arg, Builder.getInt64(32)});
2762 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2765 Name.starts_with(
".to.gen"))) {
2767 }
else if (Name.consume_front(
"ldg.global")) {
2771 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2774 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2776 }
else if (Name ==
"tanh.approx.f32") {
2780 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2782 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2784 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2785 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2787 }
else if (Name ==
"barrier") {
2788 Rep = Builder.CreateIntrinsic(
2789 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2791 }
else if (Name ==
"barrier.sync") {
2792 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2794 }
else if (Name ==
"barrier.sync.cnt") {
2795 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2797 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2798 Name ==
"barrier0.or") {
2800 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2804 .
Case(
"barrier0.popc",
2805 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2806 .
Case(
"barrier0.and",
2807 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2808 .
Case(
"barrier0.or",
2809 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2810 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2811 Rep = Builder.CreateZExt(Bar, CI->
getType());
2815 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2825 ? Builder.CreateBitCast(Arg, NewType)
2828 Rep = Builder.CreateCall(NewFn, Args);
2829 if (
F->getReturnType()->isIntegerTy())
2830 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2840 Value *Rep =
nullptr;
2842 if (Name.starts_with(
"sse4a.movnt.")) {
2854 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2857 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2858 }
else if (Name.starts_with(
"avx.movnt.") ||
2859 Name.starts_with(
"avx512.storent.")) {
2871 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2872 }
else if (Name ==
"sse2.storel.dq") {
2877 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2878 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2879 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2880 }
else if (Name.starts_with(
"sse.storeu.") ||
2881 Name.starts_with(
"sse2.storeu.") ||
2882 Name.starts_with(
"avx.storeu.")) {
2885 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2886 }
else if (Name ==
"avx512.mask.store.ss") {
2890 }
else if (Name.starts_with(
"avx512.mask.store")) {
2892 bool Aligned = Name[17] !=
'u';
2895 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2898 bool CmpEq = Name[9] ==
'e';
2901 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2902 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2909 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2910 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2912 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2913 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2914 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2915 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2916 Name.starts_with(
"sse2.sqrt.p") ||
2917 Name.starts_with(
"sse.sqrt.p")) {
2918 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2919 {CI->getArgOperand(0)});
2920 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2924 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2925 : Intrinsic::x86_avx512_sqrt_pd_512;
2928 Rep = Builder.CreateIntrinsic(IID, Args);
2930 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2931 {CI->getArgOperand(0)});
2935 }
else if (Name.starts_with(
"avx512.ptestm") ||
2936 Name.starts_with(
"avx512.ptestnm")) {
2940 Rep = Builder.CreateAnd(Op0, Op1);
2946 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2948 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2951 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2954 }
else if (Name.starts_with(
"avx512.kunpck")) {
2959 for (
unsigned i = 0; i != NumElts; ++i)
2968 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2969 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2970 }
else if (Name ==
"avx512.kand.w") {
2973 Rep = Builder.CreateAnd(
LHS,
RHS);
2974 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2975 }
else if (Name ==
"avx512.kandn.w") {
2978 LHS = Builder.CreateNot(
LHS);
2979 Rep = Builder.CreateAnd(
LHS,
RHS);
2980 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2981 }
else if (Name ==
"avx512.kor.w") {
2984 Rep = Builder.CreateOr(
LHS,
RHS);
2985 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2986 }
else if (Name ==
"avx512.kxor.w") {
2989 Rep = Builder.CreateXor(
LHS,
RHS);
2990 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2991 }
else if (Name ==
"avx512.kxnor.w") {
2994 LHS = Builder.CreateNot(
LHS);
2995 Rep = Builder.CreateXor(
LHS,
RHS);
2996 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2997 }
else if (Name ==
"avx512.knot.w") {
2999 Rep = Builder.CreateNot(Rep);
3000 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3001 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3004 Rep = Builder.CreateOr(
LHS,
RHS);
3005 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3007 if (Name[14] ==
'c')
3011 Rep = Builder.CreateICmpEQ(Rep,
C);
3012 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3013 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3014 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3015 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3016 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3019 ConstantInt::get(I32Ty, 0));
3021 ConstantInt::get(I32Ty, 0));
3023 if (Name.contains(
".add."))
3024 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3025 else if (Name.contains(
".sub."))
3026 EltOp = Builder.CreateFSub(Elt0, Elt1);
3027 else if (Name.contains(
".mul."))
3028 EltOp = Builder.CreateFMul(Elt0, Elt1);
3030 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3031 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3032 ConstantInt::get(I32Ty, 0));
3033 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3035 bool CmpEq = Name[16] ==
'e';
3037 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3046 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3049 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3052 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3059 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3062 unsigned EltWidth = OpTy->getScalarSizeInBits();
3064 if (VecWidth == 128 && EltWidth == 32)
3065 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3066 else if (VecWidth == 256 && EltWidth == 32)
3067 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3068 else if (VecWidth == 512 && EltWidth == 32)
3069 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3070 else if (VecWidth == 128 && EltWidth == 64)
3071 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3072 else if (VecWidth == 256 && EltWidth == 64)
3073 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3074 else if (VecWidth == 512 && EltWidth == 64)
3075 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3082 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3084 Type *OpTy = Args[0]->getType();
3085 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3086 unsigned EltWidth = OpTy->getScalarSizeInBits();
3088 if (VecWidth == 128 && EltWidth == 32)
3089 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3090 else if (VecWidth == 256 && EltWidth == 32)
3091 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3092 else if (VecWidth == 512 && EltWidth == 32)
3093 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3094 else if (VecWidth == 128 && EltWidth == 64)
3095 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3096 else if (VecWidth == 256 && EltWidth == 64)
3097 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3098 else if (VecWidth == 512 && EltWidth == 64)
3099 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3104 if (VecWidth == 512)
3106 Args.push_back(Mask);
3108 Rep = Builder.CreateIntrinsic(IID, Args);
3109 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3113 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3116 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3117 Name.starts_with(
"avx512.cvtw2mask.") ||
3118 Name.starts_with(
"avx512.cvtd2mask.") ||
3119 Name.starts_with(
"avx512.cvtq2mask.")) {
3124 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3125 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3126 Name.starts_with(
"avx512.mask.pabs")) {
3128 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3129 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3130 Name.starts_with(
"avx512.mask.pmaxs")) {
3132 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3133 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3134 Name.starts_with(
"avx512.mask.pmaxu")) {
3136 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3137 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3138 Name.starts_with(
"avx512.mask.pmins")) {
3140 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3141 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3142 Name.starts_with(
"avx512.mask.pminu")) {
3144 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3145 Name ==
"avx512.pmulu.dq.512" ||
3146 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3148 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3149 Name ==
"avx512.pmul.dq.512" ||
3150 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3152 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3153 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3158 }
else if (Name ==
"avx512.cvtusi2sd") {
3163 }
else if (Name ==
"sse2.cvtss2sd") {
3165 Rep = Builder.CreateFPExt(
3168 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3169 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3170 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3171 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3172 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3173 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3174 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3175 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3176 Name ==
"avx512.mask.cvtqq2ps.256" ||
3177 Name ==
"avx512.mask.cvtqq2ps.512" ||
3178 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3179 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3180 Name ==
"avx.cvt.ps2.pd.256" ||
3181 Name ==
"avx512.mask.cvtps2pd.128" ||
3182 Name ==
"avx512.mask.cvtps2pd.256") {
3187 unsigned NumDstElts = DstTy->getNumElements();
3189 assert(NumDstElts == 2 &&
"Unexpected vector size");
3190 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3193 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3194 bool IsUnsigned = Name.contains(
"cvtu");
3196 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3200 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3201 : Intrinsic::x86_avx512_sitofp_round;
3202 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3205 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3206 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3212 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3213 Name.starts_with(
"vcvtph2ps.")) {
3217 unsigned NumDstElts = DstTy->getNumElements();
3218 if (NumDstElts != SrcTy->getNumElements()) {
3219 assert(NumDstElts == 4 &&
"Unexpected vector size");
3220 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3222 Rep = Builder.CreateBitCast(
3224 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3228 }
else if (Name.starts_with(
"avx512.mask.load")) {
3230 bool Aligned = Name[16] !=
'u';
3233 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3236 ResultTy->getNumElements());
3238 Rep = Builder.CreateIntrinsic(
3239 Intrinsic::masked_expandload, ResultTy,
3241 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3247 Rep = Builder.CreateIntrinsic(
3248 Intrinsic::masked_compressstore, ResultTy,
3250 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3251 Name.starts_with(
"avx512.mask.expand.")) {
3255 ResultTy->getNumElements());
3257 bool IsCompress = Name[12] ==
'c';
3258 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3259 : Intrinsic::x86_avx512_mask_expand;
3260 Rep = Builder.CreateIntrinsic(
3262 }
else if (Name.starts_with(
"xop.vpcom")) {
3264 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3265 Name.ends_with(
"uq"))
3267 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3268 Name.ends_with(
"d") || Name.ends_with(
"q"))
3277 Name = Name.substr(9);
3278 if (Name.starts_with(
"lt"))
3280 else if (Name.starts_with(
"le"))
3282 else if (Name.starts_with(
"gt"))
3284 else if (Name.starts_with(
"ge"))
3286 else if (Name.starts_with(
"eq"))
3288 else if (Name.starts_with(
"ne"))
3290 else if (Name.starts_with(
"false"))
3292 else if (Name.starts_with(
"true"))
3299 }
else if (Name.starts_with(
"xop.vpcmov")) {
3301 Value *NotSel = Builder.CreateNot(Sel);
3304 Rep = Builder.CreateOr(Sel0, Sel1);
3305 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3306 Name.starts_with(
"avx512.mask.prol")) {
3308 }
else if (Name.starts_with(
"avx512.pror") ||
3309 Name.starts_with(
"avx512.mask.pror")) {
3311 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3312 Name.starts_with(
"avx512.mask.vpshld") ||
3313 Name.starts_with(
"avx512.maskz.vpshld")) {
3314 bool ZeroMask = Name[11] ==
'z';
3316 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3317 Name.starts_with(
"avx512.mask.vpshrd") ||
3318 Name.starts_with(
"avx512.maskz.vpshrd")) {
3319 bool ZeroMask = Name[11] ==
'z';
3321 }
else if (Name ==
"sse42.crc32.64.8") {
3324 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3326 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3327 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3328 Name.starts_with(
"avx512.vbroadcast.s")) {
3331 Type *EltTy = VecTy->getElementType();
3332 unsigned EltNum = VecTy->getNumElements();
3336 for (
unsigned I = 0;
I < EltNum; ++
I)
3337 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3338 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3339 Name.starts_with(
"sse41.pmovzx") ||
3340 Name.starts_with(
"avx2.pmovsx") ||
3341 Name.starts_with(
"avx2.pmovzx") ||
3342 Name.starts_with(
"avx512.mask.pmovsx") ||
3343 Name.starts_with(
"avx512.mask.pmovzx")) {
3345 unsigned NumDstElts = DstTy->getNumElements();
3349 for (
unsigned i = 0; i != NumDstElts; ++i)
3354 bool DoSext = Name.contains(
"pmovsx");
3356 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3361 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3362 Name ==
"avx512.mask.pmov.qd.512" ||
3363 Name ==
"avx512.mask.pmov.wb.256" ||
3364 Name ==
"avx512.mask.pmov.wb.512") {
3369 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3370 Name ==
"avx2.vbroadcasti128") {
3376 if (NumSrcElts == 2)
3377 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3379 Rep = Builder.CreateShuffleVector(Load,
3381 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3382 Name.starts_with(
"avx512.mask.shuf.f")) {
3387 unsigned ControlBitsMask = NumLanes - 1;
3388 unsigned NumControlBits = NumLanes / 2;
3391 for (
unsigned l = 0; l != NumLanes; ++l) {
3392 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3394 if (l >= NumLanes / 2)
3395 LaneMask += NumLanes;
3396 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3397 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3403 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3404 Name.starts_with(
"avx512.mask.broadcasti")) {
3407 unsigned NumDstElts =
3411 for (
unsigned i = 0; i != NumDstElts; ++i)
3412 ShuffleMask[i] = i % NumSrcElts;
3418 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3419 Name.starts_with(
"avx2.vbroadcast") ||
3420 Name.starts_with(
"avx512.pbroadcast") ||
3421 Name.starts_with(
"avx512.mask.broadcast.s")) {
3428 Rep = Builder.CreateShuffleVector(
Op, M);
3433 }
else if (Name.starts_with(
"sse2.padds.") ||
3434 Name.starts_with(
"avx2.padds.") ||
3435 Name.starts_with(
"avx512.padds.") ||
3436 Name.starts_with(
"avx512.mask.padds.")) {
3438 }
else if (Name.starts_with(
"sse2.psubs.") ||
3439 Name.starts_with(
"avx2.psubs.") ||
3440 Name.starts_with(
"avx512.psubs.") ||
3441 Name.starts_with(
"avx512.mask.psubs.")) {
3443 }
else if (Name.starts_with(
"sse2.paddus.") ||
3444 Name.starts_with(
"avx2.paddus.") ||
3445 Name.starts_with(
"avx512.mask.paddus.")) {
3447 }
else if (Name.starts_with(
"sse2.psubus.") ||
3448 Name.starts_with(
"avx2.psubus.") ||
3449 Name.starts_with(
"avx512.mask.psubus.")) {
3451 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3456 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3460 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3465 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3470 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3471 Name ==
"avx512.psll.dq.512") {
3475 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3476 Name ==
"avx512.psrl.dq.512") {
3480 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3481 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3482 Name.starts_with(
"avx2.pblendd.")) {
3487 unsigned NumElts = VecTy->getNumElements();
3490 for (
unsigned i = 0; i != NumElts; ++i)
3491 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3493 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3494 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3495 Name ==
"avx2.vinserti128" ||
3496 Name.starts_with(
"avx512.mask.insert")) {
3500 unsigned DstNumElts =
3502 unsigned SrcNumElts =
3504 unsigned Scale = DstNumElts / SrcNumElts;
3511 for (
unsigned i = 0; i != SrcNumElts; ++i)
3513 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3514 Idxs[i] = SrcNumElts;
3515 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3529 for (
unsigned i = 0; i != DstNumElts; ++i)
3532 for (
unsigned i = 0; i != SrcNumElts; ++i)
3533 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3534 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3540 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3541 Name ==
"avx2.vextracti128" ||
3542 Name.starts_with(
"avx512.mask.vextract")) {
3545 unsigned DstNumElts =
3547 unsigned SrcNumElts =
3549 unsigned Scale = SrcNumElts / DstNumElts;
3556 for (
unsigned i = 0; i != DstNumElts; ++i) {
3557 Idxs[i] = i + (Imm * DstNumElts);
3559 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3565 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3566 Name.starts_with(
"avx512.mask.perm.di.")) {
3570 unsigned NumElts = VecTy->getNumElements();
3573 for (
unsigned i = 0; i != NumElts; ++i)
3574 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3576 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3581 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3593 unsigned HalfSize = NumElts / 2;
3605 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3606 for (
unsigned i = 0; i < HalfSize; ++i)
3607 ShuffleMask[i] = StartIndex + i;
3610 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3611 for (
unsigned i = 0; i < HalfSize; ++i)
3612 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3614 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3616 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3617 Name.starts_with(
"avx512.mask.vpermil.p") ||
3618 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3622 unsigned NumElts = VecTy->getNumElements();
3624 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3625 unsigned IdxMask = ((1 << IdxSize) - 1);
3631 for (
unsigned i = 0; i != NumElts; ++i)
3632 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3639 }
else if (Name ==
"sse2.pshufl.w" ||
3640 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3645 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3649 for (
unsigned l = 0; l != NumElts; l += 8) {
3650 for (
unsigned i = 0; i != 4; ++i)
3651 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3652 for (
unsigned i = 4; i != 8; ++i)
3653 Idxs[i + l] = i + l;
3656 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3661 }
else if (Name ==
"sse2.pshufh.w" ||
3662 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3667 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3671 for (
unsigned l = 0; l != NumElts; l += 8) {
3672 for (
unsigned i = 0; i != 4; ++i)
3673 Idxs[i + l] = i + l;
3674 for (
unsigned i = 0; i != 4; ++i)
3675 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3678 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3683 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3690 unsigned HalfLaneElts = NumLaneElts / 2;
3693 for (
unsigned i = 0; i != NumElts; ++i) {
3695 Idxs[i] = i - (i % NumLaneElts);
3697 if ((i % NumLaneElts) >= HalfLaneElts)
3701 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3704 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3708 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3709 Name.starts_with(
"avx512.mask.movshdup") ||
3710 Name.starts_with(
"avx512.mask.movsldup")) {
3716 if (Name.starts_with(
"avx512.mask.movshdup."))
3720 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3721 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3722 Idxs[i + l + 0] = i + l +
Offset;
3723 Idxs[i + l + 1] = i + l +
Offset;
3726 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3730 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3731 Name.starts_with(
"avx512.mask.unpckl.")) {
3738 for (
int l = 0; l != NumElts; l += NumLaneElts)
3739 for (
int i = 0; i != NumLaneElts; ++i)
3740 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3742 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3746 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3747 Name.starts_with(
"avx512.mask.unpckh.")) {
3754 for (
int l = 0; l != NumElts; l += NumLaneElts)
3755 for (
int i = 0; i != NumLaneElts; ++i)
3756 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3758 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3762 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3763 Name.starts_with(
"avx512.mask.pand.")) {
3766 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3768 Rep = Builder.CreateBitCast(Rep, FTy);
3771 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3772 Name.starts_with(
"avx512.mask.pandn.")) {
3775 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3776 Rep = Builder.CreateAnd(Rep,
3778 Rep = Builder.CreateBitCast(Rep, FTy);
3781 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3782 Name.starts_with(
"avx512.mask.por.")) {
3785 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3787 Rep = Builder.CreateBitCast(Rep, FTy);
3790 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3791 Name.starts_with(
"avx512.mask.pxor.")) {
3794 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3796 Rep = Builder.CreateBitCast(Rep, FTy);
3799 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3803 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3807 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3811 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3812 if (Name.ends_with(
".512")) {
3814 if (Name[17] ==
's')
3815 IID = Intrinsic::x86_avx512_add_ps_512;
3817 IID = Intrinsic::x86_avx512_add_pd_512;
3819 Rep = Builder.CreateIntrinsic(
3827 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3828 if (Name.ends_with(
".512")) {
3830 if (Name[17] ==
's')
3831 IID = Intrinsic::x86_avx512_div_ps_512;
3833 IID = Intrinsic::x86_avx512_div_pd_512;
3835 Rep = Builder.CreateIntrinsic(
3843 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3844 if (Name.ends_with(
".512")) {
3846 if (Name[17] ==
's')
3847 IID = Intrinsic::x86_avx512_mul_ps_512;
3849 IID = Intrinsic::x86_avx512_mul_pd_512;
3851 Rep = Builder.CreateIntrinsic(
3859 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3860 if (Name.ends_with(
".512")) {
3862 if (Name[17] ==
's')
3863 IID = Intrinsic::x86_avx512_sub_ps_512;
3865 IID = Intrinsic::x86_avx512_sub_pd_512;
3867 Rep = Builder.CreateIntrinsic(
3875 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3876 Name.starts_with(
"avx512.mask.min.p")) &&
3877 Name.drop_front(18) ==
".512") {
3878 bool IsDouble = Name[17] ==
'd';
3879 bool IsMin = Name[13] ==
'i';
3881 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3882 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3885 Rep = Builder.CreateIntrinsic(
3890 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3892 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3893 {CI->getArgOperand(0), Builder.getInt1(false)});
3896 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3897 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3898 bool IsVariable = Name[16] ==
'v';
3899 char Size = Name[16] ==
'.' ? Name[17]
3900 : Name[17] ==
'.' ? Name[18]
3901 : Name[18] ==
'.' ? Name[19]
3905 if (IsVariable && Name[17] !=
'.') {
3906 if (
Size ==
'd' && Name[17] ==
'2')
3907 IID = Intrinsic::x86_avx2_psllv_q;
3908 else if (
Size ==
'd' && Name[17] ==
'4')
3909 IID = Intrinsic::x86_avx2_psllv_q_256;
3910 else if (
Size ==
's' && Name[17] ==
'4')
3911 IID = Intrinsic::x86_avx2_psllv_d;
3912 else if (
Size ==
's' && Name[17] ==
'8')
3913 IID = Intrinsic::x86_avx2_psllv_d_256;
3914 else if (
Size ==
'h' && Name[17] ==
'8')
3915 IID = Intrinsic::x86_avx512_psllv_w_128;
3916 else if (
Size ==
'h' && Name[17] ==
'1')
3917 IID = Intrinsic::x86_avx512_psllv_w_256;
3918 else if (Name[17] ==
'3' && Name[18] ==
'2')
3919 IID = Intrinsic::x86_avx512_psllv_w_512;
3922 }
else if (Name.ends_with(
".128")) {
3924 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3925 : Intrinsic::x86_sse2_psll_d;
3926 else if (
Size ==
'q')
3927 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3928 : Intrinsic::x86_sse2_psll_q;
3929 else if (
Size ==
'w')
3930 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3931 : Intrinsic::x86_sse2_psll_w;
3934 }
else if (Name.ends_with(
".256")) {
3936 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3937 : Intrinsic::x86_avx2_psll_d;
3938 else if (
Size ==
'q')
3939 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3940 : Intrinsic::x86_avx2_psll_q;
3941 else if (
Size ==
'w')
3942 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3943 : Intrinsic::x86_avx2_psll_w;
3948 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3949 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3950 : Intrinsic::x86_avx512_psll_d_512;
3951 else if (
Size ==
'q')
3952 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3953 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3954 : Intrinsic::x86_avx512_psll_q_512;
3955 else if (
Size ==
'w')
3956 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3957 : Intrinsic::x86_avx512_psll_w_512;
3963 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3964 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3965 bool IsVariable = Name[16] ==
'v';
3966 char Size = Name[16] ==
'.' ? Name[17]
3967 : Name[17] ==
'.' ? Name[18]
3968 : Name[18] ==
'.' ? Name[19]
3972 if (IsVariable && Name[17] !=
'.') {
3973 if (
Size ==
'd' && Name[17] ==
'2')
3974 IID = Intrinsic::x86_avx2_psrlv_q;
3975 else if (
Size ==
'd' && Name[17] ==
'4')
3976 IID = Intrinsic::x86_avx2_psrlv_q_256;
3977 else if (
Size ==
's' && Name[17] ==
'4')
3978 IID = Intrinsic::x86_avx2_psrlv_d;
3979 else if (
Size ==
's' && Name[17] ==
'8')
3980 IID = Intrinsic::x86_avx2_psrlv_d_256;
3981 else if (
Size ==
'h' && Name[17] ==
'8')
3982 IID = Intrinsic::x86_avx512_psrlv_w_128;
3983 else if (
Size ==
'h' && Name[17] ==
'1')
3984 IID = Intrinsic::x86_avx512_psrlv_w_256;
3985 else if (Name[17] ==
'3' && Name[18] ==
'2')
3986 IID = Intrinsic::x86_avx512_psrlv_w_512;
3989 }
else if (Name.ends_with(
".128")) {
3991 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3992 : Intrinsic::x86_sse2_psrl_d;
3993 else if (
Size ==
'q')
3994 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3995 : Intrinsic::x86_sse2_psrl_q;
3996 else if (
Size ==
'w')
3997 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3998 : Intrinsic::x86_sse2_psrl_w;
4001 }
else if (Name.ends_with(
".256")) {
4003 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4004 : Intrinsic::x86_avx2_psrl_d;
4005 else if (
Size ==
'q')
4006 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4007 : Intrinsic::x86_avx2_psrl_q;
4008 else if (
Size ==
'w')
4009 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4010 : Intrinsic::x86_avx2_psrl_w;
4015 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4016 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4017 : Intrinsic::x86_avx512_psrl_d_512;
4018 else if (
Size ==
'q')
4019 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4020 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4021 : Intrinsic::x86_avx512_psrl_q_512;
4022 else if (
Size ==
'w')
4023 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4024 : Intrinsic::x86_avx512_psrl_w_512;
4030 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4031 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4032 bool IsVariable = Name[16] ==
'v';
4033 char Size = Name[16] ==
'.' ? Name[17]
4034 : Name[17] ==
'.' ? Name[18]
4035 : Name[18] ==
'.' ? Name[19]
4039 if (IsVariable && Name[17] !=
'.') {
4040 if (
Size ==
's' && Name[17] ==
'4')
4041 IID = Intrinsic::x86_avx2_psrav_d;
4042 else if (
Size ==
's' && Name[17] ==
'8')
4043 IID = Intrinsic::x86_avx2_psrav_d_256;
4044 else if (
Size ==
'h' && Name[17] ==
'8')
4045 IID = Intrinsic::x86_avx512_psrav_w_128;
4046 else if (
Size ==
'h' && Name[17] ==
'1')
4047 IID = Intrinsic::x86_avx512_psrav_w_256;
4048 else if (Name[17] ==
'3' && Name[18] ==
'2')
4049 IID = Intrinsic::x86_avx512_psrav_w_512;
4052 }
else if (Name.ends_with(
".128")) {
4054 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4055 : Intrinsic::x86_sse2_psra_d;
4056 else if (
Size ==
'q')
4057 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4058 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4059 : Intrinsic::x86_avx512_psra_q_128;
4060 else if (
Size ==
'w')
4061 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4062 : Intrinsic::x86_sse2_psra_w;
4065 }
else if (Name.ends_with(
".256")) {
4067 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4068 : Intrinsic::x86_avx2_psra_d;
4069 else if (
Size ==
'q')
4070 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4071 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4072 : Intrinsic::x86_avx512_psra_q_256;
4073 else if (
Size ==
'w')
4074 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4075 : Intrinsic::x86_avx2_psra_w;
4080 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4081 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4082 : Intrinsic::x86_avx512_psra_d_512;
4083 else if (
Size ==
'q')
4084 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4085 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4086 : Intrinsic::x86_avx512_psra_q_512;
4087 else if (
Size ==
'w')
4088 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4089 : Intrinsic::x86_avx512_psra_w_512;
4095 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4097 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4099 }
else if (Name.ends_with(
".movntdqa")) {
4103 LoadInst *LI = Builder.CreateAlignedLoad(
4108 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4109 Name.starts_with(
"fma.vfmsub.") ||
4110 Name.starts_with(
"fma.vfnmadd.") ||
4111 Name.starts_with(
"fma.vfnmsub.")) {
4112 bool NegMul = Name[6] ==
'n';
4113 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4114 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4125 if (NegMul && !IsScalar)
4126 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4127 if (NegMul && IsScalar)
4128 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4130 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4132 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4136 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4144 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4148 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4149 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4150 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4151 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4152 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4153 bool IsMask3 = Name[11] ==
'3';
4154 bool IsMaskZ = Name[11] ==
'z';
4156 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4157 bool NegMul = Name[2] ==
'n';
4158 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4164 if (NegMul && (IsMask3 || IsMaskZ))
4165 A = Builder.CreateFNeg(
A);
4166 if (NegMul && !(IsMask3 || IsMaskZ))
4167 B = Builder.CreateFNeg(
B);
4169 C = Builder.CreateFNeg(
C);
4171 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4172 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4173 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4180 if (Name.back() ==
'd')
4181 IID = Intrinsic::x86_avx512_vfmadd_f64;
4183 IID = Intrinsic::x86_avx512_vfmadd_f32;
4184 Rep = Builder.CreateIntrinsic(IID,
Ops);
4186 Rep = Builder.CreateFMA(
A,
B,
C);
4195 if (NegAcc && IsMask3)
4200 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4202 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4203 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4204 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4205 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4206 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4207 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4208 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4209 bool IsMask3 = Name[11] ==
'3';
4210 bool IsMaskZ = Name[11] ==
'z';
4212 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4213 bool NegMul = Name[2] ==
'n';
4214 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4220 if (NegMul && (IsMask3 || IsMaskZ))
4221 A = Builder.CreateFNeg(
A);
4222 if (NegMul && !(IsMask3 || IsMaskZ))
4223 B = Builder.CreateFNeg(
B);
4225 C = Builder.CreateFNeg(
C);
4232 if (Name[Name.size() - 5] ==
's')
4233 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4235 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4239 Rep = Builder.CreateFMA(
A,
B,
C);
4247 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4251 if (VecWidth == 128 && EltWidth == 32)
4252 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4253 else if (VecWidth == 256 && EltWidth == 32)
4254 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4255 else if (VecWidth == 128 && EltWidth == 64)
4256 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4257 else if (VecWidth == 256 && EltWidth == 64)
4258 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4264 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4265 Rep = Builder.CreateIntrinsic(IID,
Ops);
4266 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4267 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4268 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4269 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4270 bool IsMask3 = Name[11] ==
'3';
4271 bool IsMaskZ = Name[11] ==
'z';
4273 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4274 bool IsSubAdd = Name[3] ==
's';
4278 if (Name[Name.size() - 5] ==
's')
4279 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4281 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4286 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4288 Rep = Builder.CreateIntrinsic(IID,
Ops);
4297 Value *Odd = Builder.CreateCall(FMA,
Ops);
4298 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4299 Value *Even = Builder.CreateCall(FMA,
Ops);
4305 for (
int i = 0; i != NumElts; ++i)
4306 Idxs[i] = i + (i % 2) * NumElts;
4308 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4316 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4317 Name.starts_with(
"avx512.maskz.pternlog.")) {
4318 bool ZeroMask = Name[11] ==
'z';
4322 if (VecWidth == 128 && EltWidth == 32)
4323 IID = Intrinsic::x86_avx512_pternlog_d_128;
4324 else if (VecWidth == 256 && EltWidth == 32)
4325 IID = Intrinsic::x86_avx512_pternlog_d_256;
4326 else if (VecWidth == 512 && EltWidth == 32)
4327 IID = Intrinsic::x86_avx512_pternlog_d_512;
4328 else if (VecWidth == 128 && EltWidth == 64)
4329 IID = Intrinsic::x86_avx512_pternlog_q_128;
4330 else if (VecWidth == 256 && EltWidth == 64)
4331 IID = Intrinsic::x86_avx512_pternlog_q_256;
4332 else if (VecWidth == 512 && EltWidth == 64)
4333 IID = Intrinsic::x86_avx512_pternlog_q_512;
4339 Rep = Builder.CreateIntrinsic(IID, Args);
4343 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4344 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4345 bool ZeroMask = Name[11] ==
'z';
4346 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4349 if (VecWidth == 128 && !
High)
4350 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4351 else if (VecWidth == 256 && !
High)
4352 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4353 else if (VecWidth == 512 && !
High)
4354 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4355 else if (VecWidth == 128 &&
High)
4356 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4357 else if (VecWidth == 256 &&
High)
4358 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4359 else if (VecWidth == 512 &&
High)
4360 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4366 Rep = Builder.CreateIntrinsic(IID, Args);
4370 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4371 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4372 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4373 bool ZeroMask = Name[11] ==
'z';
4374 bool IndexForm = Name[17] ==
'i';
4376 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4377 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4378 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4379 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4380 bool ZeroMask = Name[11] ==
'z';
4381 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4384 if (VecWidth == 128 && !IsSaturating)
4385 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4386 else if (VecWidth == 256 && !IsSaturating)
4387 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4388 else if (VecWidth == 512 && !IsSaturating)
4389 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4390 else if (VecWidth == 128 && IsSaturating)
4391 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4392 else if (VecWidth == 256 && IsSaturating)
4393 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4394 else if (VecWidth == 512 && IsSaturating)
4395 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4405 if (Args[1]->
getType()->isVectorTy() &&
4408 ->isIntegerTy(32) &&
4409 Args[2]->
getType()->isVectorTy() &&
4412 ->isIntegerTy(32)) {
4413 Type *NewArgType =
nullptr;
4414 if (VecWidth == 128)
4416 else if (VecWidth == 256)
4418 else if (VecWidth == 512)
4424 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4425 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4428 Rep = Builder.CreateIntrinsic(IID, Args);
4432 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4433 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4434 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4435 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4436 bool ZeroMask = Name[11] ==
'z';
4437 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4440 if (VecWidth == 128 && !IsSaturating)
4441 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4442 else if (VecWidth == 256 && !IsSaturating)
4443 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4444 else if (VecWidth == 512 && !IsSaturating)
4445 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4446 else if (VecWidth == 128 && IsSaturating)
4447 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4448 else if (VecWidth == 256 && IsSaturating)
4449 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4450 else if (VecWidth == 512 && IsSaturating)
4451 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4461 if (Args[1]->
getType()->isVectorTy() &&
4464 ->isIntegerTy(32) &&
4465 Args[2]->
getType()->isVectorTy() &&
4468 ->isIntegerTy(32)) {
4469 Type *NewArgType =
nullptr;
4470 if (VecWidth == 128)
4472 else if (VecWidth == 256)
4474 else if (VecWidth == 512)
4480 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4481 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4484 Rep = Builder.CreateIntrinsic(IID, Args);
4488 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4489 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4490 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4492 if (Name[0] ==
'a' && Name.back() ==
'2')
4493 IID = Intrinsic::x86_addcarry_32;
4494 else if (Name[0] ==
'a' && Name.back() ==
'4')
4495 IID = Intrinsic::x86_addcarry_64;
4496 else if (Name[0] ==
's' && Name.back() ==
'2')
4497 IID = Intrinsic::x86_subborrow_32;
4498 else if (Name[0] ==
's' && Name.back() ==
'4')
4499 IID = Intrinsic::x86_subborrow_64;
4506 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4509 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4512 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4516 }
else if (Name.starts_with(
"avx512.mask.") &&
4527 if (Name.starts_with(
"neon.bfcvt")) {
4528 if (Name.starts_with(
"neon.bfcvtn2")) {
4530 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4532 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4533 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4536 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4537 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4539 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4543 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4544 return Builder.CreateShuffleVector(
4547 return Builder.CreateFPTrunc(CI->
getOperand(0),
4550 }
else if (Name.starts_with(
"sve.fcvt")) {
4553 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4554 .
Case(
"sve.fcvtnt.bf16f32",
4555 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4567 if (Args[1]->
getType() != BadPredTy)
4570 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4571 BadPredTy, Args[1]);
4572 Args[1] = Builder.CreateIntrinsic(
4573 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4575 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4584 if (Name ==
"mve.vctp64.old") {
4587 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4590 Value *C1 = Builder.CreateIntrinsic(
4591 Intrinsic::arm_mve_pred_v2i,
4593 return Builder.CreateIntrinsic(
4594 Intrinsic::arm_mve_pred_i2v,
4596 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4597 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4598 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4599 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4601 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4602 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4603 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4604 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4606 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4607 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4608 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4609 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4610 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4611 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4612 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4613 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4614 std::vector<Type *> Tys;
4618 case Intrinsic::arm_mve_mull_int_predicated:
4619 case Intrinsic::arm_mve_vqdmull_predicated:
4620 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4623 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4624 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4625 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4629 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4633 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4637 case Intrinsic::arm_cde_vcx1q_predicated:
4638 case Intrinsic::arm_cde_vcx1qa_predicated:
4639 case Intrinsic::arm_cde_vcx2q_predicated:
4640 case Intrinsic::arm_cde_vcx2qa_predicated:
4641 case Intrinsic::arm_cde_vcx3q_predicated:
4642 case Intrinsic::arm_cde_vcx3qa_predicated:
4649 std::vector<Value *>
Ops;
4651 Type *Ty =
Op->getType();
4652 if (Ty->getScalarSizeInBits() == 1) {
4653 Value *C1 = Builder.CreateIntrinsic(
4654 Intrinsic::arm_mve_pred_v2i,
4656 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4661 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4676 auto UpgradeLegacyWMMAIUIntrinsicCall =
4681 Args.push_back(Builder.getFalse());
4685 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4692 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4697 NewCall->copyMetadata(*CI);
4701 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4702 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4703 "intrinsic should have 7 arguments");
4706 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4708 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4709 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4710 "intrinsic should have 8 arguments");
4715 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4735 if (NumOperands < 3)
4748 bool IsVolatile =
false;
4752 if (NumOperands > 3)
4757 if (NumOperands > 5) {
4759 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4773 if (VT->getElementType()->isIntegerTy(16)) {
4776 Val = Builder.CreateBitCast(Val, AsBF16);
4784 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4786 unsigned AddrSpace = PtrTy->getAddressSpace();
4789 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4791 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4796 MDNode *RangeNotPrivate =
4799 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4805 return Builder.CreateBitCast(RMW, RetTy);
4826 return MAV->getMetadata();
4833 return I->getDebugLoc().getAsMDNode();
4841 if (Name ==
"label") {
4844 }
else if (Name ==
"assign") {
4851 }
else if (Name ==
"declare") {
4856 }
else if (Name ==
"addr") {
4866 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4868 }
else if (Name ==
"value") {
4871 unsigned ExprOp = 2;
4885 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4893 int64_t OffsetVal =
Offset->getSExtValue();
4894 return Builder.CreateIntrinsic(OffsetVal >= 0
4895 ? Intrinsic::vector_splice_left
4896 : Intrinsic::vector_splice_right,
4898 {CI->getArgOperand(0), CI->getArgOperand(1),
4899 Builder.getInt32(std::abs(OffsetVal))});
4904 if (Name.starts_with(
"to.fp16")) {
4906 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4907 return Builder.CreateBitCast(Cast, CI->
getType());
4910 if (Name.starts_with(
"from.fp16")) {
4912 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4913 return Builder.CreateFPExt(Cast, CI->
getType());
4938 if (!Name.consume_front(
"llvm."))
4941 bool IsX86 = Name.consume_front(
"x86.");
4942 bool IsNVVM = Name.consume_front(
"nvvm.");
4943 bool IsAArch64 = Name.consume_front(
"aarch64.");
4944 bool IsARM = Name.consume_front(
"arm.");
4945 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4946 bool IsDbg = Name.consume_front(
"dbg.");
4948 (Name.consume_front(
"experimental.vector.splice") ||
4949 Name.consume_front(
"vector.splice")) &&
4950 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
4951 Value *Rep =
nullptr;
4953 if (!IsX86 && Name ==
"stackprotectorcheck") {
4955 }
else if (IsNVVM) {
4959 }
else if (IsAArch64) {
4963 }
else if (IsAMDGCN) {
4967 }
else if (IsOldSplice) {
4969 }
else if (Name.consume_front(
"convert.")) {
4981 const auto &DefaultCase = [&]() ->
void {
4989 "Unknown function for CallBase upgrade and isn't just a name change");
4997 "Return type must have changed");
4998 assert(OldST->getNumElements() ==
5000 "Must have same number of elements");
5003 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5006 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5007 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5008 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5027 case Intrinsic::arm_neon_vst1:
5028 case Intrinsic::arm_neon_vst2:
5029 case Intrinsic::arm_neon_vst3:
5030 case Intrinsic::arm_neon_vst4:
5031 case Intrinsic::arm_neon_vst2lane:
5032 case Intrinsic::arm_neon_vst3lane:
5033 case Intrinsic::arm_neon_vst4lane: {
5035 NewCall = Builder.CreateCall(NewFn, Args);
5038 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5039 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5040 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5045 NewCall = Builder.CreateCall(NewFn, Args);
5048 case Intrinsic::aarch64_sve_ld3_sret:
5049 case Intrinsic::aarch64_sve_ld4_sret:
5050 case Intrinsic::aarch64_sve_ld2_sret: {
5052 Name = Name.substr(5);
5059 unsigned MinElts = RetTy->getMinNumElements() /
N;
5061 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5063 for (
unsigned I = 0;
I <
N;
I++) {
5064 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5065 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5071 case Intrinsic::coro_end: {
5074 NewCall = Builder.CreateCall(NewFn, Args);
5078 case Intrinsic::vector_extract: {
5080 Name = Name.substr(5);
5081 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5086 unsigned MinElts = RetTy->getMinNumElements();
5089 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5093 case Intrinsic::vector_insert: {
5095 Name = Name.substr(5);
5096 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5100 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5105 NewCall = Builder.CreateCall(
5109 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5115 assert(
N > 1 &&
"Create is expected to be between 2-4");
5118 unsigned MinElts = RetTy->getMinNumElements() /
N;
5119 for (
unsigned I = 0;
I <
N;
I++) {
5121 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5128 case Intrinsic::arm_neon_bfdot:
5129 case Intrinsic::arm_neon_bfmmla:
5130 case Intrinsic::arm_neon_bfmlalb:
5131 case Intrinsic::arm_neon_bfmlalt:
5132 case Intrinsic::aarch64_neon_bfdot:
5133 case Intrinsic::aarch64_neon_bfmmla:
5134 case Intrinsic::aarch64_neon_bfmlalb:
5135 case Intrinsic::aarch64_neon_bfmlalt: {
5138 "Mismatch between function args and call args");
5139 size_t OperandWidth =
5141 assert((OperandWidth == 64 || OperandWidth == 128) &&
5142 "Unexpected operand width");
5144 auto Iter = CI->
args().begin();
5145 Args.push_back(*Iter++);
5146 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5147 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5148 NewCall = Builder.CreateCall(NewFn, Args);
5152 case Intrinsic::bitreverse:
5153 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5156 case Intrinsic::ctlz:
5157 case Intrinsic::cttz: {
5164 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5168 case Intrinsic::objectsize: {
5169 Value *NullIsUnknownSize =
5173 NewCall = Builder.CreateCall(
5178 case Intrinsic::ctpop:
5179 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5181 case Intrinsic::dbg_value: {
5183 Name = Name.substr(5);
5185 if (Name.starts_with(
"dbg.addr")) {
5199 if (
Offset->isNullValue()) {
5200 NewCall = Builder.CreateCall(
5209 case Intrinsic::ptr_annotation:
5217 NewCall = Builder.CreateCall(
5226 case Intrinsic::var_annotation:
5233 NewCall = Builder.CreateCall(
5242 case Intrinsic::riscv_aes32dsi:
5243 case Intrinsic::riscv_aes32dsmi:
5244 case Intrinsic::riscv_aes32esi:
5245 case Intrinsic::riscv_aes32esmi:
5246 case Intrinsic::riscv_sm4ks:
5247 case Intrinsic::riscv_sm4ed: {
5257 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5258 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5264 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5265 Value *Res = NewCall;
5267 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5273 case Intrinsic::nvvm_mapa_shared_cluster: {
5277 Value *Res = NewCall;
5278 Res = Builder.CreateAddrSpaceCast(
5285 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5286 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5289 Args[0] = Builder.CreateAddrSpaceCast(
5292 NewCall = Builder.CreateCall(NewFn, Args);
5298 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5299 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5300 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5301 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5302 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5303 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5304 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5305 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5312 Args[0] = Builder.CreateAddrSpaceCast(
5321 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5323 NewCall = Builder.CreateCall(NewFn, Args);
5329 case Intrinsic::riscv_sha256sig0:
5330 case Intrinsic::riscv_sha256sig1:
5331 case Intrinsic::riscv_sha256sum0:
5332 case Intrinsic::riscv_sha256sum1:
5333 case Intrinsic::riscv_sm3p0:
5334 case Intrinsic::riscv_sm3p1: {
5341 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5343 NewCall = Builder.CreateCall(NewFn, Arg);
5345 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5352 case Intrinsic::x86_xop_vfrcz_ss:
5353 case Intrinsic::x86_xop_vfrcz_sd:
5354 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5357 case Intrinsic::x86_xop_vpermil2pd:
5358 case Intrinsic::x86_xop_vpermil2ps:
5359 case Intrinsic::x86_xop_vpermil2pd_256:
5360 case Intrinsic::x86_xop_vpermil2ps_256: {
5364 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5365 NewCall = Builder.CreateCall(NewFn, Args);
5369 case Intrinsic::x86_sse41_ptestc:
5370 case Intrinsic::x86_sse41_ptestz:
5371 case Intrinsic::x86_sse41_ptestnzc: {
5385 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5386 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5388 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5392 case Intrinsic::x86_rdtscp: {
5398 NewCall = Builder.CreateCall(NewFn);
5400 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5403 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5411 case Intrinsic::x86_sse41_insertps:
5412 case Intrinsic::x86_sse41_dppd:
5413 case Intrinsic::x86_sse41_dpps:
5414 case Intrinsic::x86_sse41_mpsadbw:
5415 case Intrinsic::x86_avx_dp_ps_256:
5416 case Intrinsic::x86_avx2_mpsadbw: {
5422 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5423 NewCall = Builder.CreateCall(NewFn, Args);
5427 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5428 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5429 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5430 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5431 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5432 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5438 NewCall = Builder.CreateCall(NewFn, Args);
5447 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5448 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5449 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5450 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5451 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5452 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5456 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5457 Args[1] = Builder.CreateBitCast(
5460 NewCall = Builder.CreateCall(NewFn, Args);
5461 Value *Res = Builder.CreateBitCast(
5469 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5470 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5471 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5475 Args[1] = Builder.CreateBitCast(
5477 Args[2] = Builder.CreateBitCast(
5480 NewCall = Builder.CreateCall(NewFn, Args);
5484 case Intrinsic::thread_pointer: {
5485 NewCall = Builder.CreateCall(NewFn, {});
5489 case Intrinsic::memcpy:
5490 case Intrinsic::memmove:
5491 case Intrinsic::memset: {
5507 NewCall = Builder.CreateCall(NewFn, Args);
5509 AttributeList NewAttrs = AttributeList::get(
5510 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5511 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5512 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5517 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5520 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5524 case Intrinsic::masked_load:
5525 case Intrinsic::masked_gather:
5526 case Intrinsic::masked_store:
5527 case Intrinsic::masked_scatter: {
5533 auto GetMaybeAlign = [](
Value *
Op) {
5543 auto GetAlign = [&](
Value *
Op) {
5552 case Intrinsic::masked_load:
5553 NewCall = Builder.CreateMaskedLoad(
5557 case Intrinsic::masked_gather:
5558 NewCall = Builder.CreateMaskedGather(
5564 case Intrinsic::masked_store:
5565 NewCall = Builder.CreateMaskedStore(
5569 case Intrinsic::masked_scatter:
5570 NewCall = Builder.CreateMaskedScatter(
5572 DL.getValueOrABITypeAlignment(
5586 case Intrinsic::lifetime_start:
5587 case Intrinsic::lifetime_end: {
5599 NewCall = Builder.CreateLifetimeStart(Ptr);
5601 NewCall = Builder.CreateLifetimeEnd(Ptr);
5610 case Intrinsic::x86_avx512_vpdpbusd_128:
5611 case Intrinsic::x86_avx512_vpdpbusd_256:
5612 case Intrinsic::x86_avx512_vpdpbusd_512:
5613 case Intrinsic::x86_avx512_vpdpbusds_128:
5614 case Intrinsic::x86_avx512_vpdpbusds_256:
5615 case Intrinsic::x86_avx512_vpdpbusds_512:
5616 case Intrinsic::x86_avx2_vpdpbssd_128:
5617 case Intrinsic::x86_avx2_vpdpbssd_256:
5618 case Intrinsic::x86_avx10_vpdpbssd_512:
5619 case Intrinsic::x86_avx2_vpdpbssds_128:
5620 case Intrinsic::x86_avx2_vpdpbssds_256:
5621 case Intrinsic::x86_avx10_vpdpbssds_512:
5622 case Intrinsic::x86_avx2_vpdpbsud_128:
5623 case Intrinsic::x86_avx2_vpdpbsud_256:
5624 case Intrinsic::x86_avx10_vpdpbsud_512:
5625 case Intrinsic::x86_avx2_vpdpbsuds_128:
5626 case Intrinsic::x86_avx2_vpdpbsuds_256:
5627 case Intrinsic::x86_avx10_vpdpbsuds_512:
5628 case Intrinsic::x86_avx2_vpdpbuud_128:
5629 case Intrinsic::x86_avx2_vpdpbuud_256:
5630 case Intrinsic::x86_avx10_vpdpbuud_512:
5631 case Intrinsic::x86_avx2_vpdpbuuds_128:
5632 case Intrinsic::x86_avx2_vpdpbuuds_256:
5633 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5638 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5639 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5641 NewCall = Builder.CreateCall(NewFn, Args);
5644 case Intrinsic::x86_avx512_vpdpwssd_128:
5645 case Intrinsic::x86_avx512_vpdpwssd_256:
5646 case Intrinsic::x86_avx512_vpdpwssd_512:
5647 case Intrinsic::x86_avx512_vpdpwssds_128:
5648 case Intrinsic::x86_avx512_vpdpwssds_256:
5649 case Intrinsic::x86_avx512_vpdpwssds_512:
5650 case Intrinsic::x86_avx2_vpdpwsud_128:
5651 case Intrinsic::x86_avx2_vpdpwsud_256:
5652 case Intrinsic::x86_avx10_vpdpwsud_512:
5653 case Intrinsic::x86_avx2_vpdpwsuds_128:
5654 case Intrinsic::x86_avx2_vpdpwsuds_256:
5655 case Intrinsic::x86_avx10_vpdpwsuds_512:
5656 case Intrinsic::x86_avx2_vpdpwusd_128:
5657 case Intrinsic::x86_avx2_vpdpwusd_256:
5658 case Intrinsic::x86_avx10_vpdpwusd_512:
5659 case Intrinsic::x86_avx2_vpdpwusds_128:
5660 case Intrinsic::x86_avx2_vpdpwusds_256:
5661 case Intrinsic::x86_avx10_vpdpwusds_512:
5662 case Intrinsic::x86_avx2_vpdpwuud_128:
5663 case Intrinsic::x86_avx2_vpdpwuud_256:
5664 case Intrinsic::x86_avx10_vpdpwuud_512:
5665 case Intrinsic::x86_avx2_vpdpwuuds_128:
5666 case Intrinsic::x86_avx2_vpdpwuuds_256:
5667 case Intrinsic::x86_avx10_vpdpwuuds_512:
5672 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5673 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5675 NewCall = Builder.CreateCall(NewFn, Args);
5678 assert(NewCall &&
"Should have either set this variable or returned through "
5679 "the default case");
5686 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5700 F->eraseFromParent();
5706 if (NumOperands == 0)
5714 if (NumOperands == 3) {
5718 Metadata *Elts2[] = {ScalarType, ScalarType,
5732 if (
Opc != Instruction::BitCast)
5736 Type *SrcTy = V->getType();
5753 if (
Opc != Instruction::BitCast)
5756 Type *SrcTy =
C->getType();
5783 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5784 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5785 if (Flag->getNumOperands() < 3)
5787 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5788 return K->getString() ==
"Debug Info Version";
5791 if (OpIt != ModFlags->op_end()) {
5792 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5799 bool BrokenDebugInfo =
false;
5802 if (!BrokenDebugInfo)
5808 M.getContext().diagnose(Diag);
5815 M.getContext().diagnose(DiagVersion);
5825 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5828 if (
F->hasFnAttribute(Attr)) {
5831 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5833 auto [Part, Rest] = S.
split(
',');
5839 const unsigned Dim = DimC -
'x';
5840 assert(Dim < 3 &&
"Unexpected dim char");
5850 F->addFnAttr(Attr, NewAttr);
5854 return S ==
"x" || S ==
"y" || S ==
"z";
5859 if (K ==
"kernel") {
5871 const unsigned Idx = (AlignIdxValuePair >> 16);
5872 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5877 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5882 if (K ==
"minctasm") {
5887 if (K ==
"maxnreg") {
5892 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5896 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5900 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5904 if (K ==
"grid_constant") {
5919 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5926 if (!SeenNodes.
insert(MD).second)
5933 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5940 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5942 const MDOperand &V = MD->getOperand(j + 1);
5945 NewOperands.
append({K, V});
5948 if (NewOperands.
size() > 1)
5961 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5962 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5963 if (ModRetainReleaseMarker) {
5969 ID->getString().split(ValueComp,
"#");
5970 if (ValueComp.
size() == 2) {
5971 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5975 M.eraseNamedMetadata(ModRetainReleaseMarker);
5986 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6012 bool InvalidCast =
false;
6014 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6027 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6029 Args.push_back(Arg);
6036 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6041 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6054 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6062 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6063 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6064 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6065 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6066 {
"objc_autoreleaseReturnValue",
6067 llvm::Intrinsic::objc_autoreleaseReturnValue},
6068 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6069 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6070 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6071 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6072 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6073 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6074 {
"objc_release", llvm::Intrinsic::objc_release},
6075 {
"objc_retain", llvm::Intrinsic::objc_retain},
6076 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6077 {
"objc_retainAutoreleaseReturnValue",
6078 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6079 {
"objc_retainAutoreleasedReturnValue",
6080 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6081 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6082 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6083 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6084 {
"objc_unsafeClaimAutoreleasedReturnValue",
6085 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6086 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6087 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6088 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6089 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6090 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6091 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6092 {
"objc_arc_annotation_topdown_bbstart",
6093 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6094 {
"objc_arc_annotation_topdown_bbend",
6095 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6096 {
"objc_arc_annotation_bottomup_bbstart",
6097 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6098 {
"objc_arc_annotation_bottomup_bbend",
6099 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6101 for (
auto &
I : RuntimeFuncs)
6102 UpgradeToIntrinsic(
I.first,
I.second);
6106 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6110 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6111 bool HasSwiftVersionFlag =
false;
6112 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6119 if (
Op->getNumOperands() != 3)
6133 if (
ID->getString() ==
"Objective-C Image Info Version")
6135 if (
ID->getString() ==
"Objective-C Class Properties")
6136 HasClassProperties =
true;
6138 if (
ID->getString() ==
"PIC Level") {
6139 if (
auto *Behavior =
6141 uint64_t V = Behavior->getLimitedValue();
6147 if (
ID->getString() ==
"PIE Level")
6148 if (
auto *Behavior =
6155 if (
ID->getString() ==
"branch-target-enforcement" ||
6156 ID->getString().starts_with(
"sign-return-address")) {
6157 if (
auto *Behavior =
6163 Op->getOperand(1),
Op->getOperand(2)};
6173 if (
ID->getString() ==
"Objective-C Image Info Section") {
6176 Value->getString().split(ValueComp,
" ");
6177 if (ValueComp.
size() != 1) {
6178 std::string NewValue;
6179 for (
auto &S : ValueComp)
6180 NewValue += S.str();
6191 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6194 assert(Md->getValue() &&
"Expected non-empty metadata");
6195 auto Type = Md->getValue()->getType();
6198 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6199 if ((Val & 0xff) != Val) {
6200 HasSwiftVersionFlag =
true;
6201 SwiftABIVersion = (Val & 0xff00) >> 8;
6202 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6203 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6214 if (
ID->getString() ==
"amdgpu_code_object_version") {
6217 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6229 if (HasObjCFlag && !HasClassProperties) {
6235 if (HasSwiftVersionFlag) {
6239 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6241 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6249 auto TrimSpaces = [](
StringRef Section) -> std::string {
6251 Section.split(Components,
',');
6256 for (
auto Component : Components)
6257 OS <<
',' << Component.trim();
6262 for (
auto &GV : M.globals()) {
6263 if (!GV.hasSection())
6268 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6273 GV.setSection(TrimSpaces(Section));
6289struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6290 StrictFPUpgradeVisitor() =
default;
6293 if (!
Call.isStrictFP())
6299 Call.removeFnAttr(Attribute::StrictFP);
6300 Call.addFnAttr(Attribute::NoBuiltin);
6305struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6306 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6307 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6309 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6324 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6325 StrictFPUpgradeVisitor SFPV;
6330 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6331 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6332 for (
auto &Arg :
F.args())
6334 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6336 bool AddingAttrs =
false, RemovingAttrs =
false;
6337 AttrBuilder AttrsToAdd(
F.getContext());
6342 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6343 A.isValid() &&
A.isStringAttribute()) {
6344 F.setSection(
A.getValueAsString());
6346 RemovingAttrs =
true;
6350 A.isValid() &&
A.isStringAttribute()) {
6353 AddingAttrs = RemovingAttrs =
true;
6356 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6357 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6359 RemovingAttrs =
true;
6360 if (
A.getValueAsString() ==
"true") {
6361 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6370 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6373 if (
A.getValueAsBool()) {
6374 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6380 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6381 RemovingAttrs =
true;
6388 bool HandleDenormalMode =
false;
6390 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6393 DenormalFPMath = ParsedMode;
6395 AddingAttrs = RemovingAttrs =
true;
6396 HandleDenormalMode =
true;
6400 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6404 DenormalFPMathF32 = ParsedMode;
6406 AddingAttrs = RemovingAttrs =
true;
6407 HandleDenormalMode =
true;
6411 if (HandleDenormalMode)
6412 AttrsToAdd.addDenormalFPEnvAttr(
6416 F.removeFnAttrs(AttrsToRemove);
6419 F.addFnAttrs(AttrsToAdd);
6425 if (!
F.hasFnAttribute(FnAttrName))
6426 F.addFnAttr(FnAttrName,
Value);
6433 if (!
F.hasFnAttribute(FnAttrName)) {
6435 F.addFnAttr(FnAttrName);
6437 auto A =
F.getFnAttribute(FnAttrName);
6438 if (
"false" ==
A.getValueAsString())
6439 F.removeFnAttr(FnAttrName);
6440 else if (
"true" ==
A.getValueAsString()) {
6441 F.removeFnAttr(FnAttrName);
6442 F.addFnAttr(FnAttrName);
6448 Triple T(M.getTargetTriple());
6449 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6459 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6463 if (
Op->getNumOperands() != 3)
6472 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6473 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6474 : IDStr ==
"guarded-control-stack" ? &GCSValue
6475 : IDStr ==
"sign-return-address" ? &SRAValue
6476 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6477 : IDStr ==
"sign-return-address-with-bkey"
6483 *ValPtr = CI->getZExtValue();
6489 bool BTE = BTEValue == 1;
6490 bool BPPLR = BPPLRValue == 1;
6491 bool GCS = GCSValue == 1;
6492 bool SRA = SRAValue == 1;
6495 if (SRA && SRAALLValue == 1)
6496 SignTypeValue =
"all";
6499 if (SRA && SRABKeyValue == 1)
6500 SignKeyValue =
"b_key";
6502 for (
Function &
F : M.getFunctionList()) {
6503 if (
F.isDeclaration())
6510 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6511 A.isValid() &&
"none" ==
A.getValueAsString()) {
6512 F.removeFnAttr(
"sign-return-address");
6513 F.removeFnAttr(
"sign-return-address-key");
6529 if (SRAALLValue == 1)
6531 if (SRABKeyValue == 1)
6540 if (
T->getNumOperands() < 1)
6545 return S->getString().starts_with(
"llvm.vectorizer.");
6549 StringRef OldPrefix =
"llvm.vectorizer.";
6552 if (OldTag ==
"llvm.vectorizer.unroll")
6564 if (
T->getNumOperands() < 1)
6569 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6574 Ops.reserve(
T->getNumOperands());
6576 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6577 Ops.push_back(
T->getOperand(
I));
6591 Ops.reserve(
T->getNumOperands());
6602 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6603 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6604 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6607 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6609 auto I =
DL.find(
"-n64-");
6611 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6616 std::string Res =
DL.str();
6619 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6620 Res.append(Res.empty() ?
"G1" :
"-G1");
6628 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6629 Res.append(
"-ni:7:8:9");
6631 if (
DL.ends_with(
"ni:7"))
6633 if (
DL.ends_with(
"ni:7:8"))
6638 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6639 Res.append(
"-p7:160:256:256:32");
6640 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6641 Res.append(
"-p8:128:128:128:48");
6642 constexpr StringRef OldP8(
"-p8:128:128-");
6643 if (
DL.contains(OldP8))
6644 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6645 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6646 Res.append(
"-p9:192:256:256:32");
6650 if (!
DL.contains(
"m:e"))
6651 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6656 if (
T.isSystemZ() && !
DL.empty()) {
6658 if (!
DL.contains(
"-S64"))
6659 return "E-S64" +
DL.drop_front(1).str();
6663 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6666 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6667 if (!
DL.contains(AddrSpaces)) {
6669 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6670 if (R.match(Res, &
Groups))
6676 if (
T.isAArch64()) {
6678 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6679 Res.append(
"-Fn32");
6680 AddPtr32Ptr64AddrSpaces();
6684 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6688 std::string I64 =
"-i64:64";
6689 std::string I128 =
"-i128:128";
6691 size_t Pos = Res.find(I64);
6692 if (Pos !=
size_t(-1))
6693 Res.insert(Pos + I64.size(), I128);
6697 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6698 size_t Pos = Res.find(
"-S128");
6701 Res.insert(Pos,
"-f64:32:64");
6707 AddPtr32Ptr64AddrSpaces();
6715 if (!
T.isOSIAMCU()) {
6716 std::string I128 =
"-i128:128";
6719 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6720 if (R.match(Res, &
Groups))
6728 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6730 auto I =
Ref.find(
"-f80:32-");
6732 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6740 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6743 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6744 B.removeAttribute(
"no-frame-pointer-elim");
6746 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6748 if (FramePointer !=
"all")
6749 FramePointer =
"non-leaf";
6750 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6752 if (!FramePointer.
empty())
6753 B.addAttribute(
"frame-pointer", FramePointer);
6755 A =
B.getAttribute(
"null-pointer-is-valid");
6758 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6759 B.removeAttribute(
"null-pointer-is-valid");
6760 if (NullPointerIsValid)
6761 B.addAttribute(Attribute::NullPointerIsValid);
6764 A =
B.getAttribute(
"uniform-work-group-size");
6768 bool IsTrue = Val ==
"true";
6769 B.removeAttribute(
"uniform-work-group-size");
6771 B.addAttribute(
"uniform-work-group-size");
6782 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.