34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
62 cl::desc(
"Disable autoupgrade of debug info"));
72 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
87 Type *LastArgType =
F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
103 if (
F->getReturnType()->isVectorTy())
116 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
117 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
134 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
135 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
149 if (
F->getReturnType()->getScalarType()->isBFloatTy())
159 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
173 if (Name.consume_front(
"avx."))
174 return (Name.starts_with(
"blend.p") ||
175 Name ==
"cvt.ps2.pd.256" ||
176 Name ==
"cvtdq2.pd.256" ||
177 Name ==
"cvtdq2.ps.256" ||
178 Name.starts_with(
"movnt.") ||
179 Name.starts_with(
"sqrt.p") ||
180 Name.starts_with(
"storeu.") ||
181 Name.starts_with(
"vbroadcast.s") ||
182 Name.starts_with(
"vbroadcastf128") ||
183 Name.starts_with(
"vextractf128.") ||
184 Name.starts_with(
"vinsertf128.") ||
185 Name.starts_with(
"vperm2f128.") ||
186 Name.starts_with(
"vpermil."));
188 if (Name.consume_front(
"avx2."))
189 return (Name ==
"movntdqa" ||
190 Name.starts_with(
"pabs.") ||
191 Name.starts_with(
"padds.") ||
192 Name.starts_with(
"paddus.") ||
193 Name.starts_with(
"pblendd.") ||
195 Name.starts_with(
"pbroadcast") ||
196 Name.starts_with(
"pcmpeq.") ||
197 Name.starts_with(
"pcmpgt.") ||
198 Name.starts_with(
"pmax") ||
199 Name.starts_with(
"pmin") ||
200 Name.starts_with(
"pmovsx") ||
201 Name.starts_with(
"pmovzx") ||
203 Name ==
"pmulu.dq" ||
204 Name.starts_with(
"psll.dq") ||
205 Name.starts_with(
"psrl.dq") ||
206 Name.starts_with(
"psubs.") ||
207 Name.starts_with(
"psubus.") ||
208 Name.starts_with(
"vbroadcast") ||
209 Name ==
"vbroadcasti128" ||
210 Name ==
"vextracti128" ||
211 Name ==
"vinserti128" ||
212 Name ==
"vperm2i128");
214 if (Name.consume_front(
"avx512.")) {
215 if (Name.consume_front(
"mask."))
217 return (Name.starts_with(
"add.p") ||
218 Name.starts_with(
"and.") ||
219 Name.starts_with(
"andn.") ||
220 Name.starts_with(
"broadcast.s") ||
221 Name.starts_with(
"broadcastf32x4.") ||
222 Name.starts_with(
"broadcastf32x8.") ||
223 Name.starts_with(
"broadcastf64x2.") ||
224 Name.starts_with(
"broadcastf64x4.") ||
225 Name.starts_with(
"broadcasti32x4.") ||
226 Name.starts_with(
"broadcasti32x8.") ||
227 Name.starts_with(
"broadcasti64x2.") ||
228 Name.starts_with(
"broadcasti64x4.") ||
229 Name.starts_with(
"cmp.b") ||
230 Name.starts_with(
"cmp.d") ||
231 Name.starts_with(
"cmp.q") ||
232 Name.starts_with(
"cmp.w") ||
233 Name.starts_with(
"compress.b") ||
234 Name.starts_with(
"compress.d") ||
235 Name.starts_with(
"compress.p") ||
236 Name.starts_with(
"compress.q") ||
237 Name.starts_with(
"compress.store.") ||
238 Name.starts_with(
"compress.w") ||
239 Name.starts_with(
"conflict.") ||
240 Name.starts_with(
"cvtdq2pd.") ||
241 Name.starts_with(
"cvtdq2ps.") ||
242 Name ==
"cvtpd2dq.256" ||
243 Name ==
"cvtpd2ps.256" ||
244 Name ==
"cvtps2pd.128" ||
245 Name ==
"cvtps2pd.256" ||
246 Name.starts_with(
"cvtqq2pd.") ||
247 Name ==
"cvtqq2ps.256" ||
248 Name ==
"cvtqq2ps.512" ||
249 Name ==
"cvttpd2dq.256" ||
250 Name ==
"cvttps2dq.128" ||
251 Name ==
"cvttps2dq.256" ||
252 Name.starts_with(
"cvtudq2pd.") ||
253 Name.starts_with(
"cvtudq2ps.") ||
254 Name.starts_with(
"cvtuqq2pd.") ||
255 Name ==
"cvtuqq2ps.256" ||
256 Name ==
"cvtuqq2ps.512" ||
257 Name.starts_with(
"dbpsadbw.") ||
258 Name.starts_with(
"div.p") ||
259 Name.starts_with(
"expand.b") ||
260 Name.starts_with(
"expand.d") ||
261 Name.starts_with(
"expand.load.") ||
262 Name.starts_with(
"expand.p") ||
263 Name.starts_with(
"expand.q") ||
264 Name.starts_with(
"expand.w") ||
265 Name.starts_with(
"fpclass.p") ||
266 Name.starts_with(
"insert") ||
267 Name.starts_with(
"load.") ||
268 Name.starts_with(
"loadu.") ||
269 Name.starts_with(
"lzcnt.") ||
270 Name.starts_with(
"max.p") ||
271 Name.starts_with(
"min.p") ||
272 Name.starts_with(
"movddup") ||
273 Name.starts_with(
"move.s") ||
274 Name.starts_with(
"movshdup") ||
275 Name.starts_with(
"movsldup") ||
276 Name.starts_with(
"mul.p") ||
277 Name.starts_with(
"or.") ||
278 Name.starts_with(
"pabs.") ||
279 Name.starts_with(
"packssdw.") ||
280 Name.starts_with(
"packsswb.") ||
281 Name.starts_with(
"packusdw.") ||
282 Name.starts_with(
"packuswb.") ||
283 Name.starts_with(
"padd.") ||
284 Name.starts_with(
"padds.") ||
285 Name.starts_with(
"paddus.") ||
286 Name.starts_with(
"palignr.") ||
287 Name.starts_with(
"pand.") ||
288 Name.starts_with(
"pandn.") ||
289 Name.starts_with(
"pavg") ||
290 Name.starts_with(
"pbroadcast") ||
291 Name.starts_with(
"pcmpeq.") ||
292 Name.starts_with(
"pcmpgt.") ||
293 Name.starts_with(
"perm.df.") ||
294 Name.starts_with(
"perm.di.") ||
295 Name.starts_with(
"permvar.") ||
296 Name.starts_with(
"pmaddubs.w.") ||
297 Name.starts_with(
"pmaddw.d.") ||
298 Name.starts_with(
"pmax") ||
299 Name.starts_with(
"pmin") ||
300 Name ==
"pmov.qd.256" ||
301 Name ==
"pmov.qd.512" ||
302 Name ==
"pmov.wb.256" ||
303 Name ==
"pmov.wb.512" ||
304 Name.starts_with(
"pmovsx") ||
305 Name.starts_with(
"pmovzx") ||
306 Name.starts_with(
"pmul.dq.") ||
307 Name.starts_with(
"pmul.hr.sw.") ||
308 Name.starts_with(
"pmulh.w.") ||
309 Name.starts_with(
"pmulhu.w.") ||
310 Name.starts_with(
"pmull.") ||
311 Name.starts_with(
"pmultishift.qb.") ||
312 Name.starts_with(
"pmulu.dq.") ||
313 Name.starts_with(
"por.") ||
314 Name.starts_with(
"prol.") ||
315 Name.starts_with(
"prolv.") ||
316 Name.starts_with(
"pror.") ||
317 Name.starts_with(
"prorv.") ||
318 Name.starts_with(
"pshuf.b.") ||
319 Name.starts_with(
"pshuf.d.") ||
320 Name.starts_with(
"pshufh.w.") ||
321 Name.starts_with(
"pshufl.w.") ||
322 Name.starts_with(
"psll.d") ||
323 Name.starts_with(
"psll.q") ||
324 Name.starts_with(
"psll.w") ||
325 Name.starts_with(
"pslli") ||
326 Name.starts_with(
"psllv") ||
327 Name.starts_with(
"psra.d") ||
328 Name.starts_with(
"psra.q") ||
329 Name.starts_with(
"psra.w") ||
330 Name.starts_with(
"psrai") ||
331 Name.starts_with(
"psrav") ||
332 Name.starts_with(
"psrl.d") ||
333 Name.starts_with(
"psrl.q") ||
334 Name.starts_with(
"psrl.w") ||
335 Name.starts_with(
"psrli") ||
336 Name.starts_with(
"psrlv") ||
337 Name.starts_with(
"psub.") ||
338 Name.starts_with(
"psubs.") ||
339 Name.starts_with(
"psubus.") ||
340 Name.starts_with(
"pternlog.") ||
341 Name.starts_with(
"punpckh") ||
342 Name.starts_with(
"punpckl") ||
343 Name.starts_with(
"pxor.") ||
344 Name.starts_with(
"shuf.f") ||
345 Name.starts_with(
"shuf.i") ||
346 Name.starts_with(
"shuf.p") ||
347 Name.starts_with(
"sqrt.p") ||
348 Name.starts_with(
"store.b.") ||
349 Name.starts_with(
"store.d.") ||
350 Name.starts_with(
"store.p") ||
351 Name.starts_with(
"store.q.") ||
352 Name.starts_with(
"store.w.") ||
353 Name ==
"store.ss" ||
354 Name.starts_with(
"storeu.") ||
355 Name.starts_with(
"sub.p") ||
356 Name.starts_with(
"ucmp.") ||
357 Name.starts_with(
"unpckh.") ||
358 Name.starts_with(
"unpckl.") ||
359 Name.starts_with(
"valign.") ||
360 Name ==
"vcvtph2ps.128" ||
361 Name ==
"vcvtph2ps.256" ||
362 Name.starts_with(
"vextract") ||
363 Name.starts_with(
"vfmadd.") ||
364 Name.starts_with(
"vfmaddsub.") ||
365 Name.starts_with(
"vfnmadd.") ||
366 Name.starts_with(
"vfnmsub.") ||
367 Name.starts_with(
"vpdpbusd.") ||
368 Name.starts_with(
"vpdpbusds.") ||
369 Name.starts_with(
"vpdpwssd.") ||
370 Name.starts_with(
"vpdpwssds.") ||
371 Name.starts_with(
"vpermi2var.") ||
372 Name.starts_with(
"vpermil.p") ||
373 Name.starts_with(
"vpermilvar.") ||
374 Name.starts_with(
"vpermt2var.") ||
375 Name.starts_with(
"vpmadd52") ||
376 Name.starts_with(
"vpshld.") ||
377 Name.starts_with(
"vpshldv.") ||
378 Name.starts_with(
"vpshrd.") ||
379 Name.starts_with(
"vpshrdv.") ||
380 Name.starts_with(
"vpshufbitqmb.") ||
381 Name.starts_with(
"xor."));
383 if (Name.consume_front(
"mask3."))
385 return (Name.starts_with(
"vfmadd.") ||
386 Name.starts_with(
"vfmaddsub.") ||
387 Name.starts_with(
"vfmsub.") ||
388 Name.starts_with(
"vfmsubadd.") ||
389 Name.starts_with(
"vfnmsub."));
391 if (Name.consume_front(
"maskz."))
393 return (Name.starts_with(
"pternlog.") ||
394 Name.starts_with(
"vfmadd.") ||
395 Name.starts_with(
"vfmaddsub.") ||
396 Name.starts_with(
"vpdpbusd.") ||
397 Name.starts_with(
"vpdpbusds.") ||
398 Name.starts_with(
"vpdpwssd.") ||
399 Name.starts_with(
"vpdpwssds.") ||
400 Name.starts_with(
"vpermt2var.") ||
401 Name.starts_with(
"vpmadd52") ||
402 Name.starts_with(
"vpshldv.") ||
403 Name.starts_with(
"vpshrdv."));
406 return (Name ==
"movntdqa" ||
407 Name ==
"pmul.dq.512" ||
408 Name ==
"pmulu.dq.512" ||
409 Name.starts_with(
"broadcastm") ||
410 Name.starts_with(
"cmp.p") ||
411 Name.starts_with(
"cvtb2mask.") ||
412 Name.starts_with(
"cvtd2mask.") ||
413 Name.starts_with(
"cvtmask2") ||
414 Name.starts_with(
"cvtq2mask.") ||
415 Name ==
"cvtusi2sd" ||
416 Name.starts_with(
"cvtw2mask.") ||
421 Name ==
"kortestc.w" ||
422 Name ==
"kortestz.w" ||
423 Name.starts_with(
"kunpck") ||
426 Name.starts_with(
"padds.") ||
427 Name.starts_with(
"pbroadcast") ||
428 Name.starts_with(
"prol") ||
429 Name.starts_with(
"pror") ||
430 Name.starts_with(
"psll.dq") ||
431 Name.starts_with(
"psrl.dq") ||
432 Name.starts_with(
"psubs.") ||
433 Name.starts_with(
"ptestm") ||
434 Name.starts_with(
"ptestnm") ||
435 Name.starts_with(
"storent.") ||
436 Name.starts_with(
"vbroadcast.s") ||
437 Name.starts_with(
"vpshld.") ||
438 Name.starts_with(
"vpshrd."));
441 if (Name.consume_front(
"fma."))
442 return (Name.starts_with(
"vfmadd.") ||
443 Name.starts_with(
"vfmsub.") ||
444 Name.starts_with(
"vfmsubadd.") ||
445 Name.starts_with(
"vfnmadd.") ||
446 Name.starts_with(
"vfnmsub."));
448 if (Name.consume_front(
"fma4."))
449 return Name.starts_with(
"vfmadd.s");
451 if (Name.consume_front(
"sse."))
452 return (Name ==
"add.ss" ||
453 Name ==
"cvtsi2ss" ||
454 Name ==
"cvtsi642ss" ||
457 Name.starts_with(
"sqrt.p") ||
459 Name.starts_with(
"storeu.") ||
462 if (Name.consume_front(
"sse2."))
463 return (Name ==
"add.sd" ||
464 Name ==
"cvtdq2pd" ||
465 Name ==
"cvtdq2ps" ||
466 Name ==
"cvtps2pd" ||
467 Name ==
"cvtsi2sd" ||
468 Name ==
"cvtsi642sd" ||
469 Name ==
"cvtss2sd" ||
472 Name.starts_with(
"padds.") ||
473 Name.starts_with(
"paddus.") ||
474 Name.starts_with(
"pcmpeq.") ||
475 Name.starts_with(
"pcmpgt.") ||
480 Name ==
"pmulu.dq" ||
481 Name.starts_with(
"pshuf") ||
482 Name.starts_with(
"psll.dq") ||
483 Name.starts_with(
"psrl.dq") ||
484 Name.starts_with(
"psubs.") ||
485 Name.starts_with(
"psubus.") ||
486 Name.starts_with(
"sqrt.p") ||
488 Name ==
"storel.dq" ||
489 Name.starts_with(
"storeu.") ||
492 if (Name.consume_front(
"sse41."))
493 return (Name.starts_with(
"blendp") ||
494 Name ==
"movntdqa" ||
504 Name.starts_with(
"pmovsx") ||
505 Name.starts_with(
"pmovzx") ||
508 if (Name.consume_front(
"sse42."))
509 return Name ==
"crc32.64.8";
511 if (Name.consume_front(
"sse4a."))
512 return Name.starts_with(
"movnt.");
514 if (Name.consume_front(
"ssse3."))
515 return (Name ==
"pabs.b.128" ||
516 Name ==
"pabs.d.128" ||
517 Name ==
"pabs.w.128");
519 if (Name.consume_front(
"xop."))
520 return (Name ==
"vpcmov" ||
521 Name ==
"vpcmov.256" ||
522 Name.starts_with(
"vpcom") ||
523 Name.starts_with(
"vprot"));
525 return (Name ==
"addcarry.u32" ||
526 Name ==
"addcarry.u64" ||
527 Name ==
"addcarryx.u32" ||
528 Name ==
"addcarryx.u64" ||
529 Name ==
"subborrow.u32" ||
530 Name ==
"subborrow.u64" ||
531 Name.starts_with(
"vcvtph2ps."));
537 if (!Name.consume_front(
"x86."))
545 if (Name ==
"rdtscp") {
547 if (
F->getFunctionType()->getNumParams() == 0)
552 Intrinsic::x86_rdtscp);
559 if (Name.consume_front(
"sse41.ptest")) {
561 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
562 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
563 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
576 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
586 if (Name.consume_front(
"avx512.")) {
587 if (Name.consume_front(
"mask.cmp.")) {
590 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
599 }
else if (Name.starts_with(
"vpdpbusd.") ||
600 Name.starts_with(
"vpdpbusds.")) {
603 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
612 }
else if (Name.starts_with(
"vpdpwssd.") ||
613 Name.starts_with(
"vpdpwssds.")) {
616 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
629 if (Name.consume_front(
"avx2.")) {
630 if (Name.consume_front(
"vpdpb")) {
633 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
648 }
else if (Name.consume_front(
"vpdpw")) {
651 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
670 if (Name.consume_front(
"avx10.")) {
671 if (Name.consume_front(
"vpdpb")) {
674 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
683 }
else if (Name.consume_front(
"vpdpw")) {
685 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
698 if (Name.consume_front(
"avx512bf16.")) {
701 .
Case(
"cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .
Case(
"cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .
Case(
"cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .
Case(
"mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .
Case(
"cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .
Case(
"cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
719 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
728 if (Name.consume_front(
"xop.")) {
730 if (Name.starts_with(
"vpermil2")) {
733 auto Idx =
F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
746 }
else if (
F->arg_size() == 2)
749 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
761 if (Name ==
"seh.recoverfp") {
763 Intrinsic::eh_recoverfp);
775 if (Name.starts_with(
"rbit")) {
778 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
782 if (Name ==
"thread.pointer") {
785 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
789 bool Neon = Name.consume_front(
"neon.");
794 if (Name.consume_front(
"bfdot.")) {
798 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
803 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
807 std::array<Type *, 2> Tys{
818 if (Name.consume_front(
"bfm")) {
820 if (Name.consume_back(
".v4f32.v16i8")) {
866 F->arg_begin()->getType());
870 if (Name.consume_front(
"vst")) {
872 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
883 auto fArgs =
F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
899 if (Name.consume_front(
"mve.")) {
901 if (Name ==
"vctp64") {
911 if (Name.starts_with(
"vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
918 if (Name.consume_back(
".v4i1")) {
920 if (Name.consume_back(
".predicated.v2i64.v4i32"))
922 return Name ==
"mull.int" || Name ==
"vqdmull";
924 if (Name.consume_back(
".v2i64")) {
926 bool IsGather = Name.consume_front(
"vldr.gather.");
927 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
928 if (Name.consume_front(
"base.")) {
930 Name.consume_front(
"wb.");
933 return Name ==
"predicated.v2i64";
936 if (Name.consume_front(
"offset.predicated."))
937 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
938 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
951 if (Name.consume_front(
"cde.vcx")) {
953 if (Name.consume_back(
".predicated.v2i64.v4i1"))
955 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
956 Name ==
"3q" || Name ==
"3qa";
970 F->arg_begin()->getType());
974 if (Name.starts_with(
"addp")) {
976 if (
F->arg_size() != 2)
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
987 if (Name.starts_with(
"bfcvt")) {
994 if (Name.consume_front(
"sve.")) {
996 if (Name.consume_front(
"bf")) {
997 if (Name.consume_back(
".lane")) {
1001 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1015 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1020 if (Name.consume_front(
"addqv")) {
1022 if (!
F->getReturnType()->isFPOrFPVectorTy())
1025 auto Args =
F->getFunctionType()->params();
1026 Type *Tys[] = {
F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1032 if (Name.consume_front(
"ld")) {
1034 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.
match(Name)) {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1047 LoadIDs[Name[0] -
'2'], Ty);
1053 if (Name.consume_front(
"tuple.")) {
1055 if (Name.starts_with(
"get")) {
1057 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1063 if (Name.starts_with(
"set")) {
1065 auto Args =
F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1072 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.
match(Name)) {
1075 auto Args =
F->getFunctionType()->params();
1076 Type *Tys[] = {
F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1084 if (Name.starts_with(
"rev.nxv")) {
1087 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1099 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1121 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1135 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1146 if (Name.consume_front(
"mapa.shared.cluster"))
1147 if (
F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1151 if (Name.consume_front(
"cp.async.bulk.")) {
1154 .
Case(
"global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .
Case(
"shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1161 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1170 if (Name.consume_front(
"fma.rn."))
1172 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1175 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1178 if (Name.consume_front(
"fmax."))
1180 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1181 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1182 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1183 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1184 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1185 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1186 .
Case(
"ftz.nan.xorsign.abs.bf16",
1187 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1188 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1189 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1190 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1191 .
Case(
"ftz.xorsign.abs.bf16x2",
1192 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1193 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1194 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1195 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1196 .
Case(
"nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1198 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1199 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1202 if (Name.consume_front(
"fmin."))
1204 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1205 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1206 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1207 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1208 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1209 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1210 .
Case(
"ftz.nan.xorsign.abs.bf16",
1211 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1212 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1214 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1215 .
Case(
"ftz.xorsign.abs.bf16x2",
1216 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1217 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1218 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1219 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1220 .
Case(
"nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1222 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1223 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1226 if (Name.consume_front(
"neg."))
1228 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1229 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1236 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1237 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1238 Name.consume_front(
"param");
1242 bool CanUpgradeDebugIntrinsicsToRecords) {
1243 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1248 if (!Name.consume_front(
"llvm.") || Name.empty())
1254 bool IsArm = Name.consume_front(
"arm.");
1255 if (IsArm || Name.consume_front(
"aarch64.")) {
1261 if (Name.consume_front(
"amdgcn.")) {
1262 if (Name ==
"alignbit") {
1265 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1269 if (Name.consume_front(
"atomic.")) {
1270 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1271 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1281 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1282 F->arg_size() == 7) {
1286 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1287 F->arg_size() == 8) {
1292 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1293 Name.consume_front(
"flat.atomic.")) {
1294 if (Name.starts_with(
"fadd") ||
1296 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1297 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1305 if (Name.starts_with(
"ldexp.")) {
1308 F->getParent(), Intrinsic::ldexp,
1309 {F->getReturnType(), F->getArg(1)->getType()});
1318 if (
F->arg_size() == 1) {
1326 F->arg_begin()->getType());
1331 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1334 Intrinsic::coro_end);
1341 if (Name.consume_front(
"dbg.")) {
1343 if (CanUpgradeDebugIntrinsicsToRecords) {
1344 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1345 Name ==
"declare" || Name ==
"label") {
1354 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1357 Intrinsic::dbg_value);
1364 if (Name.consume_front(
"experimental.vector.")) {
1370 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1371 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1372 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1373 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1374 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1376 Intrinsic::vector_partial_reduce_add)
1379 const auto *FT =
F->getFunctionType();
1381 if (
ID == Intrinsic::vector_extract ||
1382 ID == Intrinsic::vector_interleave2)
1385 if (
ID != Intrinsic::vector_interleave2)
1387 if (
ID == Intrinsic::vector_insert ||
1388 ID == Intrinsic::vector_partial_reduce_add)
1396 if (Name.consume_front(
"reduce.")) {
1398 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1399 if (R.match(Name, &
Groups))
1401 .
Case(
"add", Intrinsic::vector_reduce_add)
1402 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1403 .
Case(
"and", Intrinsic::vector_reduce_and)
1404 .
Case(
"or", Intrinsic::vector_reduce_or)
1405 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1406 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1407 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1408 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1409 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1410 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1411 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1416 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1421 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1422 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1427 auto Args =
F->getFunctionType()->params();
1429 {Args[V2 ? 1 : 0]});
1435 if (Name.consume_front(
"splice"))
1439 if (Name.consume_front(
"experimental.stepvector.")) {
1443 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1448 if (Name.starts_with(
"flt.rounds")) {
1451 Intrinsic::get_rounding);
1456 if (Name.starts_with(
"invariant.group.barrier")) {
1458 auto Args =
F->getFunctionType()->params();
1459 Type* ObjectPtr[1] = {Args[0]};
1462 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1467 if ((Name.starts_with(
"lifetime.start") ||
1468 Name.starts_with(
"lifetime.end")) &&
1469 F->arg_size() == 2) {
1471 ? Intrinsic::lifetime_start
1472 : Intrinsic::lifetime_end;
1475 F->getArg(0)->getType());
1484 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1485 .StartsWith(
"memmove.", Intrinsic::memmove)
1487 if (
F->arg_size() == 5) {
1491 F->getFunctionType()->params().slice(0, 3);
1497 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1500 const auto *FT =
F->getFunctionType();
1501 Type *ParamTypes[2] = {
1502 FT->getParamType(0),
1506 Intrinsic::memset, ParamTypes);
1512 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1513 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1514 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1515 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1517 if (MaskedID &&
F->arg_size() == 4) {
1519 if (MaskedID == Intrinsic::masked_load ||
1520 MaskedID == Intrinsic::masked_gather) {
1522 F->getParent(), MaskedID,
1523 {F->getReturnType(), F->getArg(0)->getType()});
1527 F->getParent(), MaskedID,
1528 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1534 if (Name.consume_front(
"nvvm.")) {
1536 if (
F->arg_size() == 1) {
1539 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1540 .Case(
"clz.i", Intrinsic::ctlz)
1541 .
Case(
"popc.i", Intrinsic::ctpop)
1545 {F->getReturnType()});
1548 }
else if (
F->arg_size() == 2) {
1551 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1552 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1553 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1554 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1558 {F->getReturnType()});
1564 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1592 bool Expand =
false;
1593 if (Name.consume_front(
"abs."))
1596 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1597 else if (Name.consume_front(
"fabs."))
1599 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1600 else if (Name.consume_front(
"ex2.approx."))
1603 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1604 else if (Name.consume_front(
"atomic.load."))
1613 else if (Name.consume_front(
"bitcast."))
1616 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1617 else if (Name.consume_front(
"rotate."))
1619 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1620 else if (Name.consume_front(
"ptr.gen.to."))
1623 else if (Name.consume_front(
"ptr."))
1626 else if (Name.consume_front(
"ldg.global."))
1628 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1629 Name.starts_with(
"p."));
1632 .
Case(
"barrier0",
true)
1633 .
Case(
"barrier.n",
true)
1634 .
Case(
"barrier.sync.cnt",
true)
1635 .
Case(
"barrier.sync",
true)
1636 .
Case(
"barrier",
true)
1637 .
Case(
"bar.sync",
true)
1638 .
Case(
"barrier0.popc",
true)
1639 .
Case(
"barrier0.and",
true)
1640 .
Case(
"barrier0.or",
true)
1641 .
Case(
"clz.ll",
true)
1642 .
Case(
"popc.ll",
true)
1644 .
Case(
"swap.lo.hi.b64",
true)
1645 .
Case(
"tanh.approx.f32",
true)
1657 if (Name.starts_with(
"objectsize.")) {
1658 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1659 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1662 Intrinsic::objectsize, Tys);
1669 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1672 F->getParent(), Intrinsic::ptr_annotation,
1673 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1679 if (Name.consume_front(
"riscv.")) {
1682 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1683 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1684 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1685 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1688 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1701 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1702 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1711 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1712 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1713 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1714 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1719 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1731 if (Name ==
"stackprotectorcheck") {
1738 if (Name ==
"thread.pointer") {
1740 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1746 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1749 F->getParent(), Intrinsic::var_annotation,
1750 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1753 if (Name.consume_front(
"vector.splice")) {
1754 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1762 if (Name.consume_front(
"wasm.")) {
1765 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1766 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1767 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1772 F->getReturnType());
1776 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1778 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1780 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1799 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1808 auto *FT =
F->getFunctionType();
1811 std::string
Name =
F->getName().str();
1814 Name,
F->getParent());
1825 if (Result != std::nullopt) {
1838 bool CanUpgradeDebugIntrinsicsToRecords) {
1858 GV->
getName() ==
"llvm.global_dtors")) ||
1873 unsigned N =
Init->getNumOperands();
1874 std::vector<Constant *> NewCtors(
N);
1875 for (
unsigned i = 0; i !=
N; ++i) {
1878 Ctor->getAggregateElement(1),
1892 unsigned NumElts = ResultTy->getNumElements() * 8;
1896 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1906 for (
unsigned l = 0; l != NumElts; l += 16)
1907 for (
unsigned i = 0; i != 16; ++i) {
1908 unsigned Idx = NumElts + i - Shift;
1910 Idx -= NumElts - 16;
1911 Idxs[l + i] = Idx + l;
1914 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1918 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1926 unsigned NumElts = ResultTy->getNumElements() * 8;
1930 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1940 for (
unsigned l = 0; l != NumElts; l += 16)
1941 for (
unsigned i = 0; i != 16; ++i) {
1942 unsigned Idx = i + Shift;
1944 Idx += NumElts - 16;
1945 Idxs[l + i] = Idx + l;
1948 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1952 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1960 Mask = Builder.CreateBitCast(Mask, MaskTy);
1966 for (
unsigned i = 0; i != NumElts; ++i)
1968 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
1979 if (
C->isAllOnesValue())
1984 return Builder.CreateSelect(Mask, Op0, Op1);
1991 if (
C->isAllOnesValue())
1995 Mask->getType()->getIntegerBitWidth());
1996 Mask = Builder.CreateBitCast(Mask, MaskTy);
1997 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
1998 return Builder.CreateSelect(Mask, Op0, Op1);
2011 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2012 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2017 ShiftVal &= (NumElts - 1);
2026 if (ShiftVal > 16) {
2034 for (
unsigned l = 0; l < NumElts; l += 16) {
2035 for (
unsigned i = 0; i != 16; ++i) {
2036 unsigned Idx = ShiftVal + i;
2037 if (!IsVALIGN && Idx >= 16)
2038 Idx += NumElts - 16;
2039 Indices[l + i] = Idx + l;
2044 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2050 bool ZeroMask,
bool IndexForm) {
2053 unsigned EltWidth = Ty->getScalarSizeInBits();
2054 bool IsFloat = Ty->isFPOrFPVectorTy();
2056 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2057 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2058 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2059 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2060 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2061 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2062 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2063 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2064 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2065 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2066 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2067 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2068 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2069 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2070 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2071 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2072 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2073 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2074 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2075 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2076 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2077 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2078 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2079 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2080 else if (VecWidth == 128 && EltWidth == 16)
2081 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2082 else if (VecWidth == 256 && EltWidth == 16)
2083 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2084 else if (VecWidth == 512 && EltWidth == 16)
2085 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2086 else if (VecWidth == 128 && EltWidth == 8)
2087 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2088 else if (VecWidth == 256 && EltWidth == 8)
2089 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2090 else if (VecWidth == 512 && EltWidth == 8)
2091 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2102 Value *V = Builder.CreateIntrinsic(IID, Args);
2114 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2125 bool IsRotateRight) {
2135 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2136 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2139 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2140 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2185 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2190 bool IsShiftRight,
bool ZeroMask) {
2204 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2205 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2208 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2209 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2224 const Align Alignment =
2226 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2231 if (
C->isAllOnesValue())
2232 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2237 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2243 const Align Alignment =
2252 if (
C->isAllOnesValue())
2253 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2258 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2264 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2265 {Op0, Builder.getInt1(
false)});
2280 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2281 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2282 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2283 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2284 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2287 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2288 LHS = Builder.CreateAnd(
LHS, Mask);
2289 RHS = Builder.CreateAnd(
RHS, Mask);
2306 if (!
C || !
C->isAllOnesValue())
2307 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2312 for (
unsigned i = 0; i != NumElts; ++i)
2314 for (
unsigned i = NumElts; i != 8; ++i)
2315 Indices[i] = NumElts + i % NumElts;
2316 Vec = Builder.CreateShuffleVector(Vec,
2320 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2324 unsigned CC,
bool Signed) {
2332 }
else if (CC == 7) {
2368 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2369 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2371 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2372 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2381 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2387 Name = Name.substr(12);
2392 if (Name.starts_with(
"max.p")) {
2393 if (VecWidth == 128 && EltWidth == 32)
2394 IID = Intrinsic::x86_sse_max_ps;
2395 else if (VecWidth == 128 && EltWidth == 64)
2396 IID = Intrinsic::x86_sse2_max_pd;
2397 else if (VecWidth == 256 && EltWidth == 32)
2398 IID = Intrinsic::x86_avx_max_ps_256;
2399 else if (VecWidth == 256 && EltWidth == 64)
2400 IID = Intrinsic::x86_avx_max_pd_256;
2403 }
else if (Name.starts_with(
"min.p")) {
2404 if (VecWidth == 128 && EltWidth == 32)
2405 IID = Intrinsic::x86_sse_min_ps;
2406 else if (VecWidth == 128 && EltWidth == 64)
2407 IID = Intrinsic::x86_sse2_min_pd;
2408 else if (VecWidth == 256 && EltWidth == 32)
2409 IID = Intrinsic::x86_avx_min_ps_256;
2410 else if (VecWidth == 256 && EltWidth == 64)
2411 IID = Intrinsic::x86_avx_min_pd_256;
2414 }
else if (Name.starts_with(
"pshuf.b.")) {
2415 if (VecWidth == 128)
2416 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2417 else if (VecWidth == 256)
2418 IID = Intrinsic::x86_avx2_pshuf_b;
2419 else if (VecWidth == 512)
2420 IID = Intrinsic::x86_avx512_pshuf_b_512;
2423 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2424 if (VecWidth == 128)
2425 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2426 else if (VecWidth == 256)
2427 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2428 else if (VecWidth == 512)
2429 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2432 }
else if (Name.starts_with(
"pmulh.w.")) {
2433 if (VecWidth == 128)
2434 IID = Intrinsic::x86_sse2_pmulh_w;
2435 else if (VecWidth == 256)
2436 IID = Intrinsic::x86_avx2_pmulh_w;
2437 else if (VecWidth == 512)
2438 IID = Intrinsic::x86_avx512_pmulh_w_512;
2441 }
else if (Name.starts_with(
"pmulhu.w.")) {
2442 if (VecWidth == 128)
2443 IID = Intrinsic::x86_sse2_pmulhu_w;
2444 else if (VecWidth == 256)
2445 IID = Intrinsic::x86_avx2_pmulhu_w;
2446 else if (VecWidth == 512)
2447 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2450 }
else if (Name.starts_with(
"pmaddw.d.")) {
2451 if (VecWidth == 128)
2452 IID = Intrinsic::x86_sse2_pmadd_wd;
2453 else if (VecWidth == 256)
2454 IID = Intrinsic::x86_avx2_pmadd_wd;
2455 else if (VecWidth == 512)
2456 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2459 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2460 if (VecWidth == 128)
2461 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2462 else if (VecWidth == 256)
2463 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2464 else if (VecWidth == 512)
2465 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2468 }
else if (Name.starts_with(
"packsswb.")) {
2469 if (VecWidth == 128)
2470 IID = Intrinsic::x86_sse2_packsswb_128;
2471 else if (VecWidth == 256)
2472 IID = Intrinsic::x86_avx2_packsswb;
2473 else if (VecWidth == 512)
2474 IID = Intrinsic::x86_avx512_packsswb_512;
2477 }
else if (Name.starts_with(
"packssdw.")) {
2478 if (VecWidth == 128)
2479 IID = Intrinsic::x86_sse2_packssdw_128;
2480 else if (VecWidth == 256)
2481 IID = Intrinsic::x86_avx2_packssdw;
2482 else if (VecWidth == 512)
2483 IID = Intrinsic::x86_avx512_packssdw_512;
2486 }
else if (Name.starts_with(
"packuswb.")) {
2487 if (VecWidth == 128)
2488 IID = Intrinsic::x86_sse2_packuswb_128;
2489 else if (VecWidth == 256)
2490 IID = Intrinsic::x86_avx2_packuswb;
2491 else if (VecWidth == 512)
2492 IID = Intrinsic::x86_avx512_packuswb_512;
2495 }
else if (Name.starts_with(
"packusdw.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_sse41_packusdw;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_packusdw;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_packusdw_512;
2504 }
else if (Name.starts_with(
"vpermilvar.")) {
2505 if (VecWidth == 128 && EltWidth == 32)
2506 IID = Intrinsic::x86_avx_vpermilvar_ps;
2507 else if (VecWidth == 128 && EltWidth == 64)
2508 IID = Intrinsic::x86_avx_vpermilvar_pd;
2509 else if (VecWidth == 256 && EltWidth == 32)
2510 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2511 else if (VecWidth == 256 && EltWidth == 64)
2512 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2513 else if (VecWidth == 512 && EltWidth == 32)
2514 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2515 else if (VecWidth == 512 && EltWidth == 64)
2516 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2519 }
else if (Name ==
"cvtpd2dq.256") {
2520 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2521 }
else if (Name ==
"cvtpd2ps.256") {
2522 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2523 }
else if (Name ==
"cvttpd2dq.256") {
2524 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2525 }
else if (Name ==
"cvttps2dq.128") {
2526 IID = Intrinsic::x86_sse2_cvttps2dq;
2527 }
else if (Name ==
"cvttps2dq.256") {
2528 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2529 }
else if (Name.starts_with(
"permvar.")) {
2531 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2532 IID = Intrinsic::x86_avx2_permps;
2533 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2534 IID = Intrinsic::x86_avx2_permd;
2535 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2536 IID = Intrinsic::x86_avx512_permvar_df_256;
2537 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2538 IID = Intrinsic::x86_avx512_permvar_di_256;
2539 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2540 IID = Intrinsic::x86_avx512_permvar_sf_512;
2541 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2542 IID = Intrinsic::x86_avx512_permvar_si_512;
2543 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2544 IID = Intrinsic::x86_avx512_permvar_df_512;
2545 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2546 IID = Intrinsic::x86_avx512_permvar_di_512;
2547 else if (VecWidth == 128 && EltWidth == 16)
2548 IID = Intrinsic::x86_avx512_permvar_hi_128;
2549 else if (VecWidth == 256 && EltWidth == 16)
2550 IID = Intrinsic::x86_avx512_permvar_hi_256;
2551 else if (VecWidth == 512 && EltWidth == 16)
2552 IID = Intrinsic::x86_avx512_permvar_hi_512;
2553 else if (VecWidth == 128 && EltWidth == 8)
2554 IID = Intrinsic::x86_avx512_permvar_qi_128;
2555 else if (VecWidth == 256 && EltWidth == 8)
2556 IID = Intrinsic::x86_avx512_permvar_qi_256;
2557 else if (VecWidth == 512 && EltWidth == 8)
2558 IID = Intrinsic::x86_avx512_permvar_qi_512;
2561 }
else if (Name.starts_with(
"dbpsadbw.")) {
2562 if (VecWidth == 128)
2563 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2564 else if (VecWidth == 256)
2565 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2566 else if (VecWidth == 512)
2567 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2570 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2571 if (VecWidth == 128)
2572 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2573 else if (VecWidth == 256)
2574 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2575 else if (VecWidth == 512)
2576 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2579 }
else if (Name.starts_with(
"conflict.")) {
2580 if (Name[9] ==
'd' && VecWidth == 128)
2581 IID = Intrinsic::x86_avx512_conflict_d_128;
2582 else if (Name[9] ==
'd' && VecWidth == 256)
2583 IID = Intrinsic::x86_avx512_conflict_d_256;
2584 else if (Name[9] ==
'd' && VecWidth == 512)
2585 IID = Intrinsic::x86_avx512_conflict_d_512;
2586 else if (Name[9] ==
'q' && VecWidth == 128)
2587 IID = Intrinsic::x86_avx512_conflict_q_128;
2588 else if (Name[9] ==
'q' && VecWidth == 256)
2589 IID = Intrinsic::x86_avx512_conflict_q_256;
2590 else if (Name[9] ==
'q' && VecWidth == 512)
2591 IID = Intrinsic::x86_avx512_conflict_q_512;
2594 }
else if (Name.starts_with(
"pavg.")) {
2595 if (Name[5] ==
'b' && VecWidth == 128)
2596 IID = Intrinsic::x86_sse2_pavg_b;
2597 else if (Name[5] ==
'b' && VecWidth == 256)
2598 IID = Intrinsic::x86_avx2_pavg_b;
2599 else if (Name[5] ==
'b' && VecWidth == 512)
2600 IID = Intrinsic::x86_avx512_pavg_b_512;
2601 else if (Name[5] ==
'w' && VecWidth == 128)
2602 IID = Intrinsic::x86_sse2_pavg_w;
2603 else if (Name[5] ==
'w' && VecWidth == 256)
2604 IID = Intrinsic::x86_avx2_pavg_w;
2605 else if (Name[5] ==
'w' && VecWidth == 512)
2606 IID = Intrinsic::x86_avx512_pavg_w_512;
2615 Rep = Builder.CreateIntrinsic(IID, Args);
2626 if (AsmStr->find(
"mov\tfp") == 0 &&
2627 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2628 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2629 AsmStr->replace(Pos, 1,
";");
2635 Value *Rep =
nullptr;
2637 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2639 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2640 Value *Cmp = Builder.CreateICmpSGE(
2642 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2643 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2644 Type *Ty = (Name ==
"abs.bf16")
2648 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2649 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2650 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2651 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2652 : Intrinsic::nvvm_fabs;
2653 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2654 }
else if (Name.consume_front(
"ex2.approx.")) {
2656 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2657 : Intrinsic::nvvm_ex2_approx;
2658 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2659 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2660 Name.starts_with(
"atomic.load.add.f64.p")) {
2665 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2666 Name.starts_with(
"atomic.load.dec.32.p")) {
2671 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2673 }
else if (Name ==
"clz.ll") {
2676 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2677 {Arg, Builder.getFalse()},
2679 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2680 }
else if (Name ==
"popc.ll") {
2684 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2685 Arg,
nullptr,
"ctpop");
2686 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2687 }
else if (Name ==
"h2f") {
2688 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2691 }
else if (Name.consume_front(
"bitcast.") &&
2692 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2695 }
else if (Name ==
"rotate.b32") {
2698 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2699 {Arg, Arg, ShiftAmt});
2700 }
else if (Name ==
"rotate.b64") {
2704 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2705 {Arg, Arg, ZExtShiftAmt});
2706 }
else if (Name ==
"rotate.right.b64") {
2710 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2711 {Arg, Arg, ZExtShiftAmt});
2712 }
else if (Name ==
"swap.lo.hi.b64") {
2715 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2716 {Arg, Arg, Builder.getInt64(32)});
2717 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2720 Name.starts_with(
".to.gen"))) {
2722 }
else if (Name.consume_front(
"ldg.global")) {
2726 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2729 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2731 }
else if (Name ==
"tanh.approx.f32") {
2735 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2737 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2739 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2740 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2742 }
else if (Name ==
"barrier") {
2743 Rep = Builder.CreateIntrinsic(
2744 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2746 }
else if (Name ==
"barrier.sync") {
2747 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2749 }
else if (Name ==
"barrier.sync.cnt") {
2750 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2752 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2753 Name ==
"barrier0.or") {
2755 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2759 .
Case(
"barrier0.popc",
2760 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2761 .
Case(
"barrier0.and",
2762 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2763 .
Case(
"barrier0.or",
2764 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2765 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2766 Rep = Builder.CreateZExt(Bar, CI->
getType());
2770 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2780 ? Builder.CreateBitCast(Arg, NewType)
2783 Rep = Builder.CreateCall(NewFn, Args);
2784 if (
F->getReturnType()->isIntegerTy())
2785 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2795 Value *Rep =
nullptr;
2797 if (Name.starts_with(
"sse4a.movnt.")) {
2809 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2812 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2813 }
else if (Name.starts_with(
"avx.movnt.") ||
2814 Name.starts_with(
"avx512.storent.")) {
2826 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2827 }
else if (Name ==
"sse2.storel.dq") {
2832 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2833 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2834 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2835 }
else if (Name.starts_with(
"sse.storeu.") ||
2836 Name.starts_with(
"sse2.storeu.") ||
2837 Name.starts_with(
"avx.storeu.")) {
2840 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2841 }
else if (Name ==
"avx512.mask.store.ss") {
2845 }
else if (Name.starts_with(
"avx512.mask.store")) {
2847 bool Aligned = Name[17] !=
'u';
2850 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2853 bool CmpEq = Name[9] ==
'e';
2856 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2857 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2864 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2865 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2867 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2868 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2869 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2870 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2871 Name.starts_with(
"sse2.sqrt.p") ||
2872 Name.starts_with(
"sse.sqrt.p")) {
2873 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2874 {CI->getArgOperand(0)});
2875 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2879 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2880 : Intrinsic::x86_avx512_sqrt_pd_512;
2883 Rep = Builder.CreateIntrinsic(IID, Args);
2885 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2886 {CI->getArgOperand(0)});
2890 }
else if (Name.starts_with(
"avx512.ptestm") ||
2891 Name.starts_with(
"avx512.ptestnm")) {
2895 Rep = Builder.CreateAnd(Op0, Op1);
2901 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2903 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2906 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2909 }
else if (Name.starts_with(
"avx512.kunpck")) {
2914 for (
unsigned i = 0; i != NumElts; ++i)
2923 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2924 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2925 }
else if (Name ==
"avx512.kand.w") {
2928 Rep = Builder.CreateAnd(
LHS,
RHS);
2929 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2930 }
else if (Name ==
"avx512.kandn.w") {
2933 LHS = Builder.CreateNot(
LHS);
2934 Rep = Builder.CreateAnd(
LHS,
RHS);
2935 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2936 }
else if (Name ==
"avx512.kor.w") {
2939 Rep = Builder.CreateOr(
LHS,
RHS);
2940 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2941 }
else if (Name ==
"avx512.kxor.w") {
2944 Rep = Builder.CreateXor(
LHS,
RHS);
2945 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2946 }
else if (Name ==
"avx512.kxnor.w") {
2949 LHS = Builder.CreateNot(
LHS);
2950 Rep = Builder.CreateXor(
LHS,
RHS);
2951 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2952 }
else if (Name ==
"avx512.knot.w") {
2954 Rep = Builder.CreateNot(Rep);
2955 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2956 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2959 Rep = Builder.CreateOr(
LHS,
RHS);
2960 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2962 if (Name[14] ==
'c')
2966 Rep = Builder.CreateICmpEQ(Rep,
C);
2967 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2968 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
2969 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
2970 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
2971 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
2974 ConstantInt::get(I32Ty, 0));
2976 ConstantInt::get(I32Ty, 0));
2978 if (Name.contains(
".add."))
2979 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2980 else if (Name.contains(
".sub."))
2981 EltOp = Builder.CreateFSub(Elt0, Elt1);
2982 else if (Name.contains(
".mul."))
2983 EltOp = Builder.CreateFMul(Elt0, Elt1);
2985 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2986 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
2987 ConstantInt::get(I32Ty, 0));
2988 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
2990 bool CmpEq = Name[16] ==
'e';
2992 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3000 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3003 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3006 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3013 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3018 if (VecWidth == 128 && EltWidth == 32)
3019 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3020 else if (VecWidth == 256 && EltWidth == 32)
3021 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3022 else if (VecWidth == 512 && EltWidth == 32)
3023 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3024 else if (VecWidth == 128 && EltWidth == 64)
3025 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3026 else if (VecWidth == 256 && EltWidth == 64)
3027 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3028 else if (VecWidth == 512 && EltWidth == 64)
3029 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3036 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3038 Type *OpTy = Args[0]->getType();
3042 if (VecWidth == 128 && EltWidth == 32)
3043 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3044 else if (VecWidth == 256 && EltWidth == 32)
3045 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3046 else if (VecWidth == 512 && EltWidth == 32)
3047 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3048 else if (VecWidth == 128 && EltWidth == 64)
3049 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3050 else if (VecWidth == 256 && EltWidth == 64)
3051 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3052 else if (VecWidth == 512 && EltWidth == 64)
3053 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3058 if (VecWidth == 512)
3060 Args.push_back(Mask);
3062 Rep = Builder.CreateIntrinsic(IID, Args);
3063 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3067 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3070 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3071 Name.starts_with(
"avx512.cvtw2mask.") ||
3072 Name.starts_with(
"avx512.cvtd2mask.") ||
3073 Name.starts_with(
"avx512.cvtq2mask.")) {
3078 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3079 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3080 Name.starts_with(
"avx512.mask.pabs")) {
3082 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3083 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3084 Name.starts_with(
"avx512.mask.pmaxs")) {
3086 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3087 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3088 Name.starts_with(
"avx512.mask.pmaxu")) {
3090 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3091 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3092 Name.starts_with(
"avx512.mask.pmins")) {
3094 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3095 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3096 Name.starts_with(
"avx512.mask.pminu")) {
3098 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3099 Name ==
"avx512.pmulu.dq.512" ||
3100 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3102 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3103 Name ==
"avx512.pmul.dq.512" ||
3104 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3106 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3107 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3112 }
else if (Name ==
"avx512.cvtusi2sd") {
3117 }
else if (Name ==
"sse2.cvtss2sd") {
3119 Rep = Builder.CreateFPExt(
3122 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3123 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3124 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3125 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3126 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3127 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3128 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3129 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3130 Name ==
"avx512.mask.cvtqq2ps.256" ||
3131 Name ==
"avx512.mask.cvtqq2ps.512" ||
3132 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3133 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3134 Name ==
"avx.cvt.ps2.pd.256" ||
3135 Name ==
"avx512.mask.cvtps2pd.128" ||
3136 Name ==
"avx512.mask.cvtps2pd.256") {
3141 unsigned NumDstElts = DstTy->getNumElements();
3143 assert(NumDstElts == 2 &&
"Unexpected vector size");
3144 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3147 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3148 bool IsUnsigned = Name.contains(
"cvtu");
3150 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3154 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3155 : Intrinsic::x86_avx512_sitofp_round;
3156 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3159 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3160 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3166 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3167 Name.starts_with(
"vcvtph2ps.")) {
3171 unsigned NumDstElts = DstTy->getNumElements();
3172 if (NumDstElts != SrcTy->getNumElements()) {
3173 assert(NumDstElts == 4 &&
"Unexpected vector size");
3174 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3176 Rep = Builder.CreateBitCast(
3178 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3182 }
else if (Name.starts_with(
"avx512.mask.load")) {
3184 bool Aligned = Name[16] !=
'u';
3187 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3190 ResultTy->getNumElements());
3192 Rep = Builder.CreateIntrinsic(
3193 Intrinsic::masked_expandload, ResultTy,
3195 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3201 Rep = Builder.CreateIntrinsic(
3202 Intrinsic::masked_compressstore, ResultTy,
3204 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3205 Name.starts_with(
"avx512.mask.expand.")) {
3209 ResultTy->getNumElements());
3211 bool IsCompress = Name[12] ==
'c';
3212 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3213 : Intrinsic::x86_avx512_mask_expand;
3214 Rep = Builder.CreateIntrinsic(
3216 }
else if (Name.starts_with(
"xop.vpcom")) {
3218 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3219 Name.ends_with(
"uq"))
3221 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3222 Name.ends_with(
"d") || Name.ends_with(
"q"))
3231 Name = Name.substr(9);
3232 if (Name.starts_with(
"lt"))
3234 else if (Name.starts_with(
"le"))
3236 else if (Name.starts_with(
"gt"))
3238 else if (Name.starts_with(
"ge"))
3240 else if (Name.starts_with(
"eq"))
3242 else if (Name.starts_with(
"ne"))
3244 else if (Name.starts_with(
"false"))
3246 else if (Name.starts_with(
"true"))
3253 }
else if (Name.starts_with(
"xop.vpcmov")) {
3255 Value *NotSel = Builder.CreateNot(Sel);
3258 Rep = Builder.CreateOr(Sel0, Sel1);
3259 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3260 Name.starts_with(
"avx512.mask.prol")) {
3262 }
else if (Name.starts_with(
"avx512.pror") ||
3263 Name.starts_with(
"avx512.mask.pror")) {
3265 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3266 Name.starts_with(
"avx512.mask.vpshld") ||
3267 Name.starts_with(
"avx512.maskz.vpshld")) {
3268 bool ZeroMask = Name[11] ==
'z';
3270 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3271 Name.starts_with(
"avx512.mask.vpshrd") ||
3272 Name.starts_with(
"avx512.maskz.vpshrd")) {
3273 bool ZeroMask = Name[11] ==
'z';
3275 }
else if (Name ==
"sse42.crc32.64.8") {
3278 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3280 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3281 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3282 Name.starts_with(
"avx512.vbroadcast.s")) {
3285 Type *EltTy = VecTy->getElementType();
3286 unsigned EltNum = VecTy->getNumElements();
3290 for (
unsigned I = 0;
I < EltNum; ++
I)
3291 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3292 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3293 Name.starts_with(
"sse41.pmovzx") ||
3294 Name.starts_with(
"avx2.pmovsx") ||
3295 Name.starts_with(
"avx2.pmovzx") ||
3296 Name.starts_with(
"avx512.mask.pmovsx") ||
3297 Name.starts_with(
"avx512.mask.pmovzx")) {
3299 unsigned NumDstElts = DstTy->getNumElements();
3303 for (
unsigned i = 0; i != NumDstElts; ++i)
3308 bool DoSext = Name.contains(
"pmovsx");
3310 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3315 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3316 Name ==
"avx512.mask.pmov.qd.512" ||
3317 Name ==
"avx512.mask.pmov.wb.256" ||
3318 Name ==
"avx512.mask.pmov.wb.512") {
3323 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3324 Name ==
"avx2.vbroadcasti128") {
3330 if (NumSrcElts == 2)
3331 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3333 Rep = Builder.CreateShuffleVector(Load,
3335 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3336 Name.starts_with(
"avx512.mask.shuf.f")) {
3341 unsigned ControlBitsMask = NumLanes - 1;
3342 unsigned NumControlBits = NumLanes / 2;
3345 for (
unsigned l = 0; l != NumLanes; ++l) {
3346 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3348 if (l >= NumLanes / 2)
3349 LaneMask += NumLanes;
3350 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3351 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3357 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3358 Name.starts_with(
"avx512.mask.broadcasti")) {
3361 unsigned NumDstElts =
3365 for (
unsigned i = 0; i != NumDstElts; ++i)
3366 ShuffleMask[i] = i % NumSrcElts;
3372 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3373 Name.starts_with(
"avx2.vbroadcast") ||
3374 Name.starts_with(
"avx512.pbroadcast") ||
3375 Name.starts_with(
"avx512.mask.broadcast.s")) {
3382 Rep = Builder.CreateShuffleVector(
Op, M);
3387 }
else if (Name.starts_with(
"sse2.padds.") ||
3388 Name.starts_with(
"avx2.padds.") ||
3389 Name.starts_with(
"avx512.padds.") ||
3390 Name.starts_with(
"avx512.mask.padds.")) {
3392 }
else if (Name.starts_with(
"sse2.psubs.") ||
3393 Name.starts_with(
"avx2.psubs.") ||
3394 Name.starts_with(
"avx512.psubs.") ||
3395 Name.starts_with(
"avx512.mask.psubs.")) {
3397 }
else if (Name.starts_with(
"sse2.paddus.") ||
3398 Name.starts_with(
"avx2.paddus.") ||
3399 Name.starts_with(
"avx512.mask.paddus.")) {
3401 }
else if (Name.starts_with(
"sse2.psubus.") ||
3402 Name.starts_with(
"avx2.psubus.") ||
3403 Name.starts_with(
"avx512.mask.psubus.")) {
3405 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3410 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3414 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3419 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3424 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3425 Name ==
"avx512.psll.dq.512") {
3429 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3430 Name ==
"avx512.psrl.dq.512") {
3434 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3435 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3436 Name.starts_with(
"avx2.pblendd.")) {
3441 unsigned NumElts = VecTy->getNumElements();
3444 for (
unsigned i = 0; i != NumElts; ++i)
3445 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3447 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3448 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3449 Name ==
"avx2.vinserti128" ||
3450 Name.starts_with(
"avx512.mask.insert")) {
3454 unsigned DstNumElts =
3456 unsigned SrcNumElts =
3458 unsigned Scale = DstNumElts / SrcNumElts;
3465 for (
unsigned i = 0; i != SrcNumElts; ++i)
3467 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3468 Idxs[i] = SrcNumElts;
3469 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3483 for (
unsigned i = 0; i != DstNumElts; ++i)
3486 for (
unsigned i = 0; i != SrcNumElts; ++i)
3487 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3488 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3494 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3495 Name ==
"avx2.vextracti128" ||
3496 Name.starts_with(
"avx512.mask.vextract")) {
3499 unsigned DstNumElts =
3501 unsigned SrcNumElts =
3503 unsigned Scale = SrcNumElts / DstNumElts;
3510 for (
unsigned i = 0; i != DstNumElts; ++i) {
3511 Idxs[i] = i + (Imm * DstNumElts);
3513 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3519 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3520 Name.starts_with(
"avx512.mask.perm.di.")) {
3524 unsigned NumElts = VecTy->getNumElements();
3527 for (
unsigned i = 0; i != NumElts; ++i)
3528 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3530 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3535 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3547 unsigned HalfSize = NumElts / 2;
3559 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3560 for (
unsigned i = 0; i < HalfSize; ++i)
3561 ShuffleMask[i] = StartIndex + i;
3564 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3565 for (
unsigned i = 0; i < HalfSize; ++i)
3566 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3568 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3570 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3571 Name.starts_with(
"avx512.mask.vpermil.p") ||
3572 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3576 unsigned NumElts = VecTy->getNumElements();
3578 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3579 unsigned IdxMask = ((1 << IdxSize) - 1);
3585 for (
unsigned i = 0; i != NumElts; ++i)
3586 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3588 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3593 }
else if (Name ==
"sse2.pshufl.w" ||
3594 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3600 for (
unsigned l = 0; l != NumElts; l += 8) {
3601 for (
unsigned i = 0; i != 4; ++i)
3602 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3603 for (
unsigned i = 4; i != 8; ++i)
3604 Idxs[i + l] = i + l;
3607 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3612 }
else if (Name ==
"sse2.pshufh.w" ||
3613 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3619 for (
unsigned l = 0; l != NumElts; l += 8) {
3620 for (
unsigned i = 0; i != 4; ++i)
3621 Idxs[i + l] = i + l;
3622 for (
unsigned i = 0; i != 4; ++i)
3623 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3626 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3631 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3638 unsigned HalfLaneElts = NumLaneElts / 2;
3641 for (
unsigned i = 0; i != NumElts; ++i) {
3643 Idxs[i] = i - (i % NumLaneElts);
3645 if ((i % NumLaneElts) >= HalfLaneElts)
3649 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3652 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3656 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3657 Name.starts_with(
"avx512.mask.movshdup") ||
3658 Name.starts_with(
"avx512.mask.movsldup")) {
3664 if (Name.starts_with(
"avx512.mask.movshdup."))
3668 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3669 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3670 Idxs[i + l + 0] = i + l +
Offset;
3671 Idxs[i + l + 1] = i + l +
Offset;
3674 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3678 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3679 Name.starts_with(
"avx512.mask.unpckl.")) {
3686 for (
int l = 0; l != NumElts; l += NumLaneElts)
3687 for (
int i = 0; i != NumLaneElts; ++i)
3688 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3690 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3694 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3695 Name.starts_with(
"avx512.mask.unpckh.")) {
3702 for (
int l = 0; l != NumElts; l += NumLaneElts)
3703 for (
int i = 0; i != NumLaneElts; ++i)
3704 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3706 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3710 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3711 Name.starts_with(
"avx512.mask.pand.")) {
3714 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3716 Rep = Builder.CreateBitCast(Rep, FTy);
3719 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3720 Name.starts_with(
"avx512.mask.pandn.")) {
3723 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3724 Rep = Builder.CreateAnd(Rep,
3726 Rep = Builder.CreateBitCast(Rep, FTy);
3729 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3730 Name.starts_with(
"avx512.mask.por.")) {
3733 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3735 Rep = Builder.CreateBitCast(Rep, FTy);
3738 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3739 Name.starts_with(
"avx512.mask.pxor.")) {
3742 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3744 Rep = Builder.CreateBitCast(Rep, FTy);
3747 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3751 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3755 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3759 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3760 if (Name.ends_with(
".512")) {
3762 if (Name[17] ==
's')
3763 IID = Intrinsic::x86_avx512_add_ps_512;
3765 IID = Intrinsic::x86_avx512_add_pd_512;
3767 Rep = Builder.CreateIntrinsic(
3775 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3776 if (Name.ends_with(
".512")) {
3778 if (Name[17] ==
's')
3779 IID = Intrinsic::x86_avx512_div_ps_512;
3781 IID = Intrinsic::x86_avx512_div_pd_512;
3783 Rep = Builder.CreateIntrinsic(
3791 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3792 if (Name.ends_with(
".512")) {
3794 if (Name[17] ==
's')
3795 IID = Intrinsic::x86_avx512_mul_ps_512;
3797 IID = Intrinsic::x86_avx512_mul_pd_512;
3799 Rep = Builder.CreateIntrinsic(
3807 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3808 if (Name.ends_with(
".512")) {
3810 if (Name[17] ==
's')
3811 IID = Intrinsic::x86_avx512_sub_ps_512;
3813 IID = Intrinsic::x86_avx512_sub_pd_512;
3815 Rep = Builder.CreateIntrinsic(
3823 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3824 Name.starts_with(
"avx512.mask.min.p")) &&
3825 Name.drop_front(18) ==
".512") {
3826 bool IsDouble = Name[17] ==
'd';
3827 bool IsMin = Name[13] ==
'i';
3829 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3830 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3833 Rep = Builder.CreateIntrinsic(
3838 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3840 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3841 {CI->getArgOperand(0), Builder.getInt1(false)});
3844 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3845 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3846 bool IsVariable = Name[16] ==
'v';
3847 char Size = Name[16] ==
'.' ? Name[17]
3848 : Name[17] ==
'.' ? Name[18]
3849 : Name[18] ==
'.' ? Name[19]
3853 if (IsVariable && Name[17] !=
'.') {
3854 if (
Size ==
'd' && Name[17] ==
'2')
3855 IID = Intrinsic::x86_avx2_psllv_q;
3856 else if (
Size ==
'd' && Name[17] ==
'4')
3857 IID = Intrinsic::x86_avx2_psllv_q_256;
3858 else if (
Size ==
's' && Name[17] ==
'4')
3859 IID = Intrinsic::x86_avx2_psllv_d;
3860 else if (
Size ==
's' && Name[17] ==
'8')
3861 IID = Intrinsic::x86_avx2_psllv_d_256;
3862 else if (
Size ==
'h' && Name[17] ==
'8')
3863 IID = Intrinsic::x86_avx512_psllv_w_128;
3864 else if (
Size ==
'h' && Name[17] ==
'1')
3865 IID = Intrinsic::x86_avx512_psllv_w_256;
3866 else if (Name[17] ==
'3' && Name[18] ==
'2')
3867 IID = Intrinsic::x86_avx512_psllv_w_512;
3870 }
else if (Name.ends_with(
".128")) {
3872 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3873 : Intrinsic::x86_sse2_psll_d;
3874 else if (
Size ==
'q')
3875 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3876 : Intrinsic::x86_sse2_psll_q;
3877 else if (
Size ==
'w')
3878 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3879 : Intrinsic::x86_sse2_psll_w;
3882 }
else if (Name.ends_with(
".256")) {
3884 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3885 : Intrinsic::x86_avx2_psll_d;
3886 else if (
Size ==
'q')
3887 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3888 : Intrinsic::x86_avx2_psll_q;
3889 else if (
Size ==
'w')
3890 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3891 : Intrinsic::x86_avx2_psll_w;
3896 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3897 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3898 : Intrinsic::x86_avx512_psll_d_512;
3899 else if (
Size ==
'q')
3900 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3901 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3902 : Intrinsic::x86_avx512_psll_q_512;
3903 else if (
Size ==
'w')
3904 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3905 : Intrinsic::x86_avx512_psll_w_512;
3911 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3912 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3913 bool IsVariable = Name[16] ==
'v';
3914 char Size = Name[16] ==
'.' ? Name[17]
3915 : Name[17] ==
'.' ? Name[18]
3916 : Name[18] ==
'.' ? Name[19]
3920 if (IsVariable && Name[17] !=
'.') {
3921 if (
Size ==
'd' && Name[17] ==
'2')
3922 IID = Intrinsic::x86_avx2_psrlv_q;
3923 else if (
Size ==
'd' && Name[17] ==
'4')
3924 IID = Intrinsic::x86_avx2_psrlv_q_256;
3925 else if (
Size ==
's' && Name[17] ==
'4')
3926 IID = Intrinsic::x86_avx2_psrlv_d;
3927 else if (
Size ==
's' && Name[17] ==
'8')
3928 IID = Intrinsic::x86_avx2_psrlv_d_256;
3929 else if (
Size ==
'h' && Name[17] ==
'8')
3930 IID = Intrinsic::x86_avx512_psrlv_w_128;
3931 else if (
Size ==
'h' && Name[17] ==
'1')
3932 IID = Intrinsic::x86_avx512_psrlv_w_256;
3933 else if (Name[17] ==
'3' && Name[18] ==
'2')
3934 IID = Intrinsic::x86_avx512_psrlv_w_512;
3937 }
else if (Name.ends_with(
".128")) {
3939 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3940 : Intrinsic::x86_sse2_psrl_d;
3941 else if (
Size ==
'q')
3942 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3943 : Intrinsic::x86_sse2_psrl_q;
3944 else if (
Size ==
'w')
3945 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3946 : Intrinsic::x86_sse2_psrl_w;
3949 }
else if (Name.ends_with(
".256")) {
3951 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3952 : Intrinsic::x86_avx2_psrl_d;
3953 else if (
Size ==
'q')
3954 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3955 : Intrinsic::x86_avx2_psrl_q;
3956 else if (
Size ==
'w')
3957 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3958 : Intrinsic::x86_avx2_psrl_w;
3963 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3964 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3965 : Intrinsic::x86_avx512_psrl_d_512;
3966 else if (
Size ==
'q')
3967 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3968 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3969 : Intrinsic::x86_avx512_psrl_q_512;
3970 else if (
Size ==
'w')
3971 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3972 : Intrinsic::x86_avx512_psrl_w_512;
3978 }
else if (Name.starts_with(
"avx512.mask.psra")) {
3979 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3980 bool IsVariable = Name[16] ==
'v';
3981 char Size = Name[16] ==
'.' ? Name[17]
3982 : Name[17] ==
'.' ? Name[18]
3983 : Name[18] ==
'.' ? Name[19]
3987 if (IsVariable && Name[17] !=
'.') {
3988 if (
Size ==
's' && Name[17] ==
'4')
3989 IID = Intrinsic::x86_avx2_psrav_d;
3990 else if (
Size ==
's' && Name[17] ==
'8')
3991 IID = Intrinsic::x86_avx2_psrav_d_256;
3992 else if (
Size ==
'h' && Name[17] ==
'8')
3993 IID = Intrinsic::x86_avx512_psrav_w_128;
3994 else if (
Size ==
'h' && Name[17] ==
'1')
3995 IID = Intrinsic::x86_avx512_psrav_w_256;
3996 else if (Name[17] ==
'3' && Name[18] ==
'2')
3997 IID = Intrinsic::x86_avx512_psrav_w_512;
4000 }
else if (Name.ends_with(
".128")) {
4002 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4003 : Intrinsic::x86_sse2_psra_d;
4004 else if (
Size ==
'q')
4005 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4006 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4007 : Intrinsic::x86_avx512_psra_q_128;
4008 else if (
Size ==
'w')
4009 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4010 : Intrinsic::x86_sse2_psra_w;
4013 }
else if (Name.ends_with(
".256")) {
4015 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4016 : Intrinsic::x86_avx2_psra_d;
4017 else if (
Size ==
'q')
4018 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4019 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4020 : Intrinsic::x86_avx512_psra_q_256;
4021 else if (
Size ==
'w')
4022 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4023 : Intrinsic::x86_avx2_psra_w;
4028 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4029 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4030 : Intrinsic::x86_avx512_psra_d_512;
4031 else if (
Size ==
'q')
4032 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4033 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4034 : Intrinsic::x86_avx512_psra_q_512;
4035 else if (
Size ==
'w')
4036 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4037 : Intrinsic::x86_avx512_psra_w_512;
4043 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4045 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4047 }
else if (Name.ends_with(
".movntdqa")) {
4051 LoadInst *LI = Builder.CreateAlignedLoad(
4056 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4057 Name.starts_with(
"fma.vfmsub.") ||
4058 Name.starts_with(
"fma.vfnmadd.") ||
4059 Name.starts_with(
"fma.vfnmsub.")) {
4060 bool NegMul = Name[6] ==
'n';
4061 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4062 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4073 if (NegMul && !IsScalar)
4074 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4075 if (NegMul && IsScalar)
4076 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4078 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4080 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4084 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4092 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4096 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4097 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4098 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4099 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4100 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4101 bool IsMask3 = Name[11] ==
'3';
4102 bool IsMaskZ = Name[11] ==
'z';
4104 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4105 bool NegMul = Name[2] ==
'n';
4106 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4112 if (NegMul && (IsMask3 || IsMaskZ))
4113 A = Builder.CreateFNeg(
A);
4114 if (NegMul && !(IsMask3 || IsMaskZ))
4115 B = Builder.CreateFNeg(
B);
4117 C = Builder.CreateFNeg(
C);
4119 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4120 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4121 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4128 if (Name.back() ==
'd')
4129 IID = Intrinsic::x86_avx512_vfmadd_f64;
4131 IID = Intrinsic::x86_avx512_vfmadd_f32;
4132 Rep = Builder.CreateIntrinsic(IID,
Ops);
4134 Rep = Builder.CreateFMA(
A,
B,
C);
4143 if (NegAcc && IsMask3)
4148 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4150 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4151 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4152 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4153 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4154 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4155 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4156 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4157 bool IsMask3 = Name[11] ==
'3';
4158 bool IsMaskZ = Name[11] ==
'z';
4160 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4161 bool NegMul = Name[2] ==
'n';
4162 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4168 if (NegMul && (IsMask3 || IsMaskZ))
4169 A = Builder.CreateFNeg(
A);
4170 if (NegMul && !(IsMask3 || IsMaskZ))
4171 B = Builder.CreateFNeg(
B);
4173 C = Builder.CreateFNeg(
C);
4180 if (Name[Name.size() - 5] ==
's')
4181 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4183 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4187 Rep = Builder.CreateFMA(
A,
B,
C);
4195 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4199 if (VecWidth == 128 && EltWidth == 32)
4200 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4201 else if (VecWidth == 256 && EltWidth == 32)
4202 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4203 else if (VecWidth == 128 && EltWidth == 64)
4204 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4205 else if (VecWidth == 256 && EltWidth == 64)
4206 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4212 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4213 Rep = Builder.CreateIntrinsic(IID,
Ops);
4214 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4215 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4216 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4217 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4218 bool IsMask3 = Name[11] ==
'3';
4219 bool IsMaskZ = Name[11] ==
'z';
4221 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4222 bool IsSubAdd = Name[3] ==
's';
4226 if (Name[Name.size() - 5] ==
's')
4227 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4229 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4234 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4236 Rep = Builder.CreateIntrinsic(IID,
Ops);
4245 Value *Odd = Builder.CreateCall(FMA,
Ops);
4246 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4247 Value *Even = Builder.CreateCall(FMA,
Ops);
4253 for (
int i = 0; i != NumElts; ++i)
4254 Idxs[i] = i + (i % 2) * NumElts;
4256 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4264 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4265 Name.starts_with(
"avx512.maskz.pternlog.")) {
4266 bool ZeroMask = Name[11] ==
'z';
4270 if (VecWidth == 128 && EltWidth == 32)
4271 IID = Intrinsic::x86_avx512_pternlog_d_128;
4272 else if (VecWidth == 256 && EltWidth == 32)
4273 IID = Intrinsic::x86_avx512_pternlog_d_256;
4274 else if (VecWidth == 512 && EltWidth == 32)
4275 IID = Intrinsic::x86_avx512_pternlog_d_512;
4276 else if (VecWidth == 128 && EltWidth == 64)
4277 IID = Intrinsic::x86_avx512_pternlog_q_128;
4278 else if (VecWidth == 256 && EltWidth == 64)
4279 IID = Intrinsic::x86_avx512_pternlog_q_256;
4280 else if (VecWidth == 512 && EltWidth == 64)
4281 IID = Intrinsic::x86_avx512_pternlog_q_512;
4287 Rep = Builder.CreateIntrinsic(IID, Args);
4291 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4292 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4293 bool ZeroMask = Name[11] ==
'z';
4294 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4297 if (VecWidth == 128 && !
High)
4298 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4299 else if (VecWidth == 256 && !
High)
4300 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4301 else if (VecWidth == 512 && !
High)
4302 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4303 else if (VecWidth == 128 &&
High)
4304 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4305 else if (VecWidth == 256 &&
High)
4306 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4307 else if (VecWidth == 512 &&
High)
4308 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4314 Rep = Builder.CreateIntrinsic(IID, Args);
4318 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4319 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4320 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4321 bool ZeroMask = Name[11] ==
'z';
4322 bool IndexForm = Name[17] ==
'i';
4324 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4325 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4326 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4327 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4328 bool ZeroMask = Name[11] ==
'z';
4329 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4332 if (VecWidth == 128 && !IsSaturating)
4333 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4334 else if (VecWidth == 256 && !IsSaturating)
4335 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4336 else if (VecWidth == 512 && !IsSaturating)
4337 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4338 else if (VecWidth == 128 && IsSaturating)
4339 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4340 else if (VecWidth == 256 && IsSaturating)
4341 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4342 else if (VecWidth == 512 && IsSaturating)
4343 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4353 if (Args[1]->
getType()->isVectorTy() &&
4356 ->isIntegerTy(32) &&
4357 Args[2]->
getType()->isVectorTy() &&
4360 ->isIntegerTy(32)) {
4361 Type *NewArgType =
nullptr;
4362 if (VecWidth == 128)
4364 else if (VecWidth == 256)
4366 else if (VecWidth == 512)
4371 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4372 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4375 Rep = Builder.CreateIntrinsic(IID, Args);
4379 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4380 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4381 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4382 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4383 bool ZeroMask = Name[11] ==
'z';
4384 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4387 if (VecWidth == 128 && !IsSaturating)
4388 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4389 else if (VecWidth == 256 && !IsSaturating)
4390 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4391 else if (VecWidth == 512 && !IsSaturating)
4392 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4393 else if (VecWidth == 128 && IsSaturating)
4394 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4395 else if (VecWidth == 256 && IsSaturating)
4396 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4397 else if (VecWidth == 512 && IsSaturating)
4398 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4408 if (Args[1]->
getType()->isVectorTy() &&
4411 ->isIntegerTy(32) &&
4412 Args[2]->
getType()->isVectorTy() &&
4415 ->isIntegerTy(32)) {
4416 Type *NewArgType =
nullptr;
4417 if (VecWidth == 128)
4419 else if (VecWidth == 256)
4421 else if (VecWidth == 512)
4426 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4427 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4430 Rep = Builder.CreateIntrinsic(IID, Args);
4434 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4435 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4436 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4438 if (Name[0] ==
'a' && Name.back() ==
'2')
4439 IID = Intrinsic::x86_addcarry_32;
4440 else if (Name[0] ==
'a' && Name.back() ==
'4')
4441 IID = Intrinsic::x86_addcarry_64;
4442 else if (Name[0] ==
's' && Name.back() ==
'2')
4443 IID = Intrinsic::x86_subborrow_32;
4444 else if (Name[0] ==
's' && Name.back() ==
'4')
4445 IID = Intrinsic::x86_subborrow_64;
4452 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4455 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4458 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4462 }
else if (Name.starts_with(
"avx512.mask.") &&
4472 if (Name.starts_with(
"neon.bfcvt")) {
4473 if (Name.starts_with(
"neon.bfcvtn2")) {
4475 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4477 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4478 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4481 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4482 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4484 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4488 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4489 return Builder.CreateShuffleVector(
4492 return Builder.CreateFPTrunc(CI->
getOperand(0),
4495 }
else if (Name.starts_with(
"sve.fcvt")) {
4498 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4499 .
Case(
"sve.fcvtnt.bf16f32",
4500 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4512 if (Args[1]->
getType() != BadPredTy)
4515 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4516 BadPredTy, Args[1]);
4517 Args[1] = Builder.CreateIntrinsic(
4518 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4520 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4529 if (Name ==
"mve.vctp64.old") {
4532 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4535 Value *C1 = Builder.CreateIntrinsic(
4536 Intrinsic::arm_mve_pred_v2i,
4538 return Builder.CreateIntrinsic(
4539 Intrinsic::arm_mve_pred_i2v,
4541 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4542 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4543 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4544 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4546 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4547 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4548 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4549 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4551 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4552 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4553 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4554 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4555 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4556 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4557 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4558 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4559 std::vector<Type *> Tys;
4563 case Intrinsic::arm_mve_mull_int_predicated:
4564 case Intrinsic::arm_mve_vqdmull_predicated:
4565 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4568 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4569 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4570 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4574 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4578 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4582 case Intrinsic::arm_cde_vcx1q_predicated:
4583 case Intrinsic::arm_cde_vcx1qa_predicated:
4584 case Intrinsic::arm_cde_vcx2q_predicated:
4585 case Intrinsic::arm_cde_vcx2qa_predicated:
4586 case Intrinsic::arm_cde_vcx3q_predicated:
4587 case Intrinsic::arm_cde_vcx3qa_predicated:
4594 std::vector<Value *>
Ops;
4596 Type *Ty =
Op->getType();
4597 if (Ty->getScalarSizeInBits() == 1) {
4598 Value *C1 = Builder.CreateIntrinsic(
4599 Intrinsic::arm_mve_pred_v2i,
4601 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4606 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4621 auto UpgradeLegacyWMMAIUIntrinsicCall =
4626 Args.push_back(Builder.getFalse());
4630 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4637 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4642 NewCall->copyMetadata(*CI);
4646 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4647 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4648 "intrinsic should have 7 arguments");
4651 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4653 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4654 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4655 "intrinsic should have 8 arguments");
4660 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4680 if (NumOperands < 3)
4693 bool IsVolatile =
false;
4697 if (NumOperands > 3)
4702 if (NumOperands > 5) {
4704 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4718 if (VT->getElementType()->isIntegerTy(16)) {
4721 Val = Builder.CreateBitCast(Val, AsBF16);
4729 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4731 unsigned AddrSpace = PtrTy->getAddressSpace();
4734 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4736 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4741 MDNode *RangeNotPrivate =
4744 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4750 return Builder.CreateBitCast(RMW, RetTy);
4771 return MAV->getMetadata();
4778 return I->getDebugLoc().getAsMDNode();
4786 if (Name ==
"label") {
4789 }
else if (Name ==
"assign") {
4796 }
else if (Name ==
"declare") {
4801 }
else if (Name ==
"addr") {
4811 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4813 }
else if (Name ==
"value") {
4816 unsigned ExprOp = 2;
4830 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4838 int64_t OffsetVal =
Offset->getSExtValue();
4839 return Builder.CreateIntrinsic(OffsetVal >= 0
4840 ? Intrinsic::vector_splice_left
4841 : Intrinsic::vector_splice_right,
4843 {CI->getArgOperand(0), CI->getArgOperand(1),
4844 Builder.getInt32(std::abs(OffsetVal))});
4867 assert(Name.starts_with(
"llvm.") &&
"Intrinsic doesn't start with 'llvm.'");
4868 Name = Name.substr(5);
4870 bool IsX86 = Name.consume_front(
"x86.");
4871 bool IsNVVM = Name.consume_front(
"nvvm.");
4872 bool IsAArch64 = Name.consume_front(
"aarch64.");
4873 bool IsARM = Name.consume_front(
"arm.");
4874 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4875 bool IsDbg = Name.consume_front(
"dbg.");
4877 (Name.consume_front(
"experimental.vector.splice") ||
4878 Name.consume_front(
"vector.splice")) &&
4879 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
4880 Value *Rep =
nullptr;
4882 if (!IsX86 && Name ==
"stackprotectorcheck") {
4884 }
else if (IsNVVM) {
4888 }
else if (IsAArch64) {
4892 }
else if (IsAMDGCN) {
4896 }
else if (IsOldSplice) {
4908 const auto &DefaultCase = [&]() ->
void {
4916 "Unknown function for CallBase upgrade and isn't just a name change");
4924 "Return type must have changed");
4925 assert(OldST->getNumElements() ==
4927 "Must have same number of elements");
4930 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4933 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4934 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4935 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4954 case Intrinsic::arm_neon_vst1:
4955 case Intrinsic::arm_neon_vst2:
4956 case Intrinsic::arm_neon_vst3:
4957 case Intrinsic::arm_neon_vst4:
4958 case Intrinsic::arm_neon_vst2lane:
4959 case Intrinsic::arm_neon_vst3lane:
4960 case Intrinsic::arm_neon_vst4lane: {
4962 NewCall = Builder.CreateCall(NewFn, Args);
4965 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4966 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4967 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4972 NewCall = Builder.CreateCall(NewFn, Args);
4975 case Intrinsic::aarch64_sve_ld3_sret:
4976 case Intrinsic::aarch64_sve_ld4_sret:
4977 case Intrinsic::aarch64_sve_ld2_sret: {
4979 Name = Name.substr(5);
4986 unsigned MinElts = RetTy->getMinNumElements() /
N;
4988 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4990 for (
unsigned I = 0;
I <
N;
I++) {
4991 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
4992 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
4998 case Intrinsic::coro_end: {
5001 NewCall = Builder.CreateCall(NewFn, Args);
5005 case Intrinsic::vector_extract: {
5007 Name = Name.substr(5);
5008 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5013 unsigned MinElts = RetTy->getMinNumElements();
5016 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5020 case Intrinsic::vector_insert: {
5022 Name = Name.substr(5);
5023 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5027 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5032 NewCall = Builder.CreateCall(
5036 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5042 assert(
N > 1 &&
"Create is expected to be between 2-4");
5045 unsigned MinElts = RetTy->getMinNumElements() /
N;
5046 for (
unsigned I = 0;
I <
N;
I++) {
5048 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5055 case Intrinsic::arm_neon_bfdot:
5056 case Intrinsic::arm_neon_bfmmla:
5057 case Intrinsic::arm_neon_bfmlalb:
5058 case Intrinsic::arm_neon_bfmlalt:
5059 case Intrinsic::aarch64_neon_bfdot:
5060 case Intrinsic::aarch64_neon_bfmmla:
5061 case Intrinsic::aarch64_neon_bfmlalb:
5062 case Intrinsic::aarch64_neon_bfmlalt: {
5065 "Mismatch between function args and call args");
5066 size_t OperandWidth =
5068 assert((OperandWidth == 64 || OperandWidth == 128) &&
5069 "Unexpected operand width");
5071 auto Iter = CI->
args().begin();
5072 Args.push_back(*Iter++);
5073 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5074 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5075 NewCall = Builder.CreateCall(NewFn, Args);
5079 case Intrinsic::bitreverse:
5080 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5083 case Intrinsic::ctlz:
5084 case Intrinsic::cttz: {
5091 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5095 case Intrinsic::objectsize: {
5096 Value *NullIsUnknownSize =
5100 NewCall = Builder.CreateCall(
5105 case Intrinsic::ctpop:
5106 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5109 case Intrinsic::convert_from_fp16:
5110 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5113 case Intrinsic::dbg_value: {
5115 Name = Name.substr(5);
5117 if (Name.starts_with(
"dbg.addr")) {
5131 if (
Offset->isZeroValue()) {
5132 NewCall = Builder.CreateCall(
5141 case Intrinsic::ptr_annotation:
5149 NewCall = Builder.CreateCall(
5158 case Intrinsic::var_annotation:
5165 NewCall = Builder.CreateCall(
5174 case Intrinsic::riscv_aes32dsi:
5175 case Intrinsic::riscv_aes32dsmi:
5176 case Intrinsic::riscv_aes32esi:
5177 case Intrinsic::riscv_aes32esmi:
5178 case Intrinsic::riscv_sm4ks:
5179 case Intrinsic::riscv_sm4ed: {
5189 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5190 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5196 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5197 Value *Res = NewCall;
5199 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5205 case Intrinsic::nvvm_mapa_shared_cluster: {
5209 Value *Res = NewCall;
5210 Res = Builder.CreateAddrSpaceCast(
5217 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5218 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5221 Args[0] = Builder.CreateAddrSpaceCast(
5224 NewCall = Builder.CreateCall(NewFn, Args);
5230 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5231 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5232 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5233 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5234 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5235 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5236 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5237 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5244 Args[0] = Builder.CreateAddrSpaceCast(
5253 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5255 NewCall = Builder.CreateCall(NewFn, Args);
5261 case Intrinsic::riscv_sha256sig0:
5262 case Intrinsic::riscv_sha256sig1:
5263 case Intrinsic::riscv_sha256sum0:
5264 case Intrinsic::riscv_sha256sum1:
5265 case Intrinsic::riscv_sm3p0:
5266 case Intrinsic::riscv_sm3p1: {
5273 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5275 NewCall = Builder.CreateCall(NewFn, Arg);
5277 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5284 case Intrinsic::x86_xop_vfrcz_ss:
5285 case Intrinsic::x86_xop_vfrcz_sd:
5286 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5289 case Intrinsic::x86_xop_vpermil2pd:
5290 case Intrinsic::x86_xop_vpermil2ps:
5291 case Intrinsic::x86_xop_vpermil2pd_256:
5292 case Intrinsic::x86_xop_vpermil2ps_256: {
5296 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5297 NewCall = Builder.CreateCall(NewFn, Args);
5301 case Intrinsic::x86_sse41_ptestc:
5302 case Intrinsic::x86_sse41_ptestz:
5303 case Intrinsic::x86_sse41_ptestnzc: {
5317 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5318 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5320 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5324 case Intrinsic::x86_rdtscp: {
5330 NewCall = Builder.CreateCall(NewFn);
5332 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5335 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5343 case Intrinsic::x86_sse41_insertps:
5344 case Intrinsic::x86_sse41_dppd:
5345 case Intrinsic::x86_sse41_dpps:
5346 case Intrinsic::x86_sse41_mpsadbw:
5347 case Intrinsic::x86_avx_dp_ps_256:
5348 case Intrinsic::x86_avx2_mpsadbw: {
5354 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5355 NewCall = Builder.CreateCall(NewFn, Args);
5359 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5360 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5361 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5362 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5363 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5364 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5370 NewCall = Builder.CreateCall(NewFn, Args);
5379 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5380 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5381 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5382 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5383 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5384 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5388 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5389 Args[1] = Builder.CreateBitCast(
5392 NewCall = Builder.CreateCall(NewFn, Args);
5393 Value *Res = Builder.CreateBitCast(
5401 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5402 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5403 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5407 Args[1] = Builder.CreateBitCast(
5409 Args[2] = Builder.CreateBitCast(
5412 NewCall = Builder.CreateCall(NewFn, Args);
5416 case Intrinsic::thread_pointer: {
5417 NewCall = Builder.CreateCall(NewFn, {});
5421 case Intrinsic::memcpy:
5422 case Intrinsic::memmove:
5423 case Intrinsic::memset: {
5439 NewCall = Builder.CreateCall(NewFn, Args);
5441 AttributeList NewAttrs = AttributeList::get(
5442 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5443 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5444 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5449 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5452 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5456 case Intrinsic::masked_load:
5457 case Intrinsic::masked_gather:
5458 case Intrinsic::masked_store:
5459 case Intrinsic::masked_scatter: {
5465 auto GetMaybeAlign = [](
Value *
Op) {
5475 auto GetAlign = [&](
Value *
Op) {
5484 case Intrinsic::masked_load:
5485 NewCall = Builder.CreateMaskedLoad(
5489 case Intrinsic::masked_gather:
5490 NewCall = Builder.CreateMaskedGather(
5496 case Intrinsic::masked_store:
5497 NewCall = Builder.CreateMaskedStore(
5501 case Intrinsic::masked_scatter:
5502 NewCall = Builder.CreateMaskedScatter(
5504 DL.getValueOrABITypeAlignment(
5518 case Intrinsic::lifetime_start:
5519 case Intrinsic::lifetime_end: {
5531 NewCall = Builder.CreateLifetimeStart(Ptr);
5533 NewCall = Builder.CreateLifetimeEnd(Ptr);
5542 case Intrinsic::x86_avx512_vpdpbusd_128:
5543 case Intrinsic::x86_avx512_vpdpbusd_256:
5544 case Intrinsic::x86_avx512_vpdpbusd_512:
5545 case Intrinsic::x86_avx512_vpdpbusds_128:
5546 case Intrinsic::x86_avx512_vpdpbusds_256:
5547 case Intrinsic::x86_avx512_vpdpbusds_512:
5548 case Intrinsic::x86_avx2_vpdpbssd_128:
5549 case Intrinsic::x86_avx2_vpdpbssd_256:
5550 case Intrinsic::x86_avx10_vpdpbssd_512:
5551 case Intrinsic::x86_avx2_vpdpbssds_128:
5552 case Intrinsic::x86_avx2_vpdpbssds_256:
5553 case Intrinsic::x86_avx10_vpdpbssds_512:
5554 case Intrinsic::x86_avx2_vpdpbsud_128:
5555 case Intrinsic::x86_avx2_vpdpbsud_256:
5556 case Intrinsic::x86_avx10_vpdpbsud_512:
5557 case Intrinsic::x86_avx2_vpdpbsuds_128:
5558 case Intrinsic::x86_avx2_vpdpbsuds_256:
5559 case Intrinsic::x86_avx10_vpdpbsuds_512:
5560 case Intrinsic::x86_avx2_vpdpbuud_128:
5561 case Intrinsic::x86_avx2_vpdpbuud_256:
5562 case Intrinsic::x86_avx10_vpdpbuud_512:
5563 case Intrinsic::x86_avx2_vpdpbuuds_128:
5564 case Intrinsic::x86_avx2_vpdpbuuds_256:
5565 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5570 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5571 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5573 NewCall = Builder.CreateCall(NewFn, Args);
5576 case Intrinsic::x86_avx512_vpdpwssd_128:
5577 case Intrinsic::x86_avx512_vpdpwssd_256:
5578 case Intrinsic::x86_avx512_vpdpwssd_512:
5579 case Intrinsic::x86_avx512_vpdpwssds_128:
5580 case Intrinsic::x86_avx512_vpdpwssds_256:
5581 case Intrinsic::x86_avx512_vpdpwssds_512:
5582 case Intrinsic::x86_avx2_vpdpwsud_128:
5583 case Intrinsic::x86_avx2_vpdpwsud_256:
5584 case Intrinsic::x86_avx10_vpdpwsud_512:
5585 case Intrinsic::x86_avx2_vpdpwsuds_128:
5586 case Intrinsic::x86_avx2_vpdpwsuds_256:
5587 case Intrinsic::x86_avx10_vpdpwsuds_512:
5588 case Intrinsic::x86_avx2_vpdpwusd_128:
5589 case Intrinsic::x86_avx2_vpdpwusd_256:
5590 case Intrinsic::x86_avx10_vpdpwusd_512:
5591 case Intrinsic::x86_avx2_vpdpwusds_128:
5592 case Intrinsic::x86_avx2_vpdpwusds_256:
5593 case Intrinsic::x86_avx10_vpdpwusds_512:
5594 case Intrinsic::x86_avx2_vpdpwuud_128:
5595 case Intrinsic::x86_avx2_vpdpwuud_256:
5596 case Intrinsic::x86_avx10_vpdpwuud_512:
5597 case Intrinsic::x86_avx2_vpdpwuuds_128:
5598 case Intrinsic::x86_avx2_vpdpwuuds_256:
5599 case Intrinsic::x86_avx10_vpdpwuuds_512:
5604 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5605 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5607 NewCall = Builder.CreateCall(NewFn, Args);
5610 assert(NewCall &&
"Should have either set this variable or returned through "
5611 "the default case");
5618 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5632 F->eraseFromParent();
5638 if (NumOperands == 0)
5646 if (NumOperands == 3) {
5650 Metadata *Elts2[] = {ScalarType, ScalarType,
5664 if (
Opc != Instruction::BitCast)
5668 Type *SrcTy = V->getType();
5685 if (
Opc != Instruction::BitCast)
5688 Type *SrcTy =
C->getType();
5715 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5716 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5717 if (Flag->getNumOperands() < 3)
5719 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5720 return K->getString() ==
"Debug Info Version";
5723 if (OpIt != ModFlags->op_end()) {
5724 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5731 bool BrokenDebugInfo =
false;
5734 if (!BrokenDebugInfo)
5740 M.getContext().diagnose(Diag);
5747 M.getContext().diagnose(DiagVersion);
5757 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5760 if (
F->hasFnAttribute(Attr)) {
5763 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5765 auto [Part, Rest] = S.
split(
',');
5771 const unsigned Dim = DimC -
'x';
5772 assert(Dim < 3 &&
"Unexpected dim char");
5782 F->addFnAttr(Attr, NewAttr);
5786 return S ==
"x" || S ==
"y" || S ==
"z";
5791 if (K ==
"kernel") {
5803 const unsigned Idx = (AlignIdxValuePair >> 16);
5804 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5809 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5814 if (K ==
"minctasm") {
5819 if (K ==
"maxnreg") {
5824 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5828 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5832 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5836 if (K ==
"grid_constant") {
5851 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5858 if (!SeenNodes.
insert(MD).second)
5865 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5872 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5874 const MDOperand &V = MD->getOperand(j + 1);
5877 NewOperands.
append({K, V});
5880 if (NewOperands.
size() > 1)
5893 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5894 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5895 if (ModRetainReleaseMarker) {
5901 ID->getString().split(ValueComp,
"#");
5902 if (ValueComp.
size() == 2) {
5903 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5907 M.eraseNamedMetadata(ModRetainReleaseMarker);
5918 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5944 bool InvalidCast =
false;
5946 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
5959 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
5961 Args.push_back(Arg);
5968 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5973 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
5986 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5994 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5995 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5996 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5997 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5998 {
"objc_autoreleaseReturnValue",
5999 llvm::Intrinsic::objc_autoreleaseReturnValue},
6000 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6001 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6002 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6003 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6004 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6005 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6006 {
"objc_release", llvm::Intrinsic::objc_release},
6007 {
"objc_retain", llvm::Intrinsic::objc_retain},
6008 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6009 {
"objc_retainAutoreleaseReturnValue",
6010 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6011 {
"objc_retainAutoreleasedReturnValue",
6012 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6013 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6014 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6015 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6016 {
"objc_unsafeClaimAutoreleasedReturnValue",
6017 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6018 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6019 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6020 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6021 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6022 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6023 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6024 {
"objc_arc_annotation_topdown_bbstart",
6025 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6026 {
"objc_arc_annotation_topdown_bbend",
6027 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6028 {
"objc_arc_annotation_bottomup_bbstart",
6029 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6030 {
"objc_arc_annotation_bottomup_bbend",
6031 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6033 for (
auto &
I : RuntimeFuncs)
6034 UpgradeToIntrinsic(
I.first,
I.second);
6038 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6042 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6043 bool HasSwiftVersionFlag =
false;
6044 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6051 if (
Op->getNumOperands() != 3)
6065 if (
ID->getString() ==
"Objective-C Image Info Version")
6067 if (
ID->getString() ==
"Objective-C Class Properties")
6068 HasClassProperties =
true;
6070 if (
ID->getString() ==
"PIC Level") {
6071 if (
auto *Behavior =
6073 uint64_t V = Behavior->getLimitedValue();
6079 if (
ID->getString() ==
"PIE Level")
6080 if (
auto *Behavior =
6087 if (
ID->getString() ==
"branch-target-enforcement" ||
6088 ID->getString().starts_with(
"sign-return-address")) {
6089 if (
auto *Behavior =
6095 Op->getOperand(1),
Op->getOperand(2)};
6105 if (
ID->getString() ==
"Objective-C Image Info Section") {
6108 Value->getString().split(ValueComp,
" ");
6109 if (ValueComp.
size() != 1) {
6110 std::string NewValue;
6111 for (
auto &S : ValueComp)
6112 NewValue += S.str();
6123 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6126 assert(Md->getValue() &&
"Expected non-empty metadata");
6127 auto Type = Md->getValue()->getType();
6130 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6131 if ((Val & 0xff) != Val) {
6132 HasSwiftVersionFlag =
true;
6133 SwiftABIVersion = (Val & 0xff00) >> 8;
6134 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6135 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6146 if (
ID->getString() ==
"amdgpu_code_object_version") {
6149 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6161 if (HasObjCFlag && !HasClassProperties) {
6167 if (HasSwiftVersionFlag) {
6171 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6173 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6181 auto TrimSpaces = [](
StringRef Section) -> std::string {
6183 Section.split(Components,
',');
6188 for (
auto Component : Components)
6189 OS <<
',' << Component.trim();
6194 for (
auto &GV : M.globals()) {
6195 if (!GV.hasSection())
6200 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6205 GV.setSection(TrimSpaces(Section));
6221struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6222 StrictFPUpgradeVisitor() =
default;
6225 if (!
Call.isStrictFP())
6231 Call.removeFnAttr(Attribute::StrictFP);
6232 Call.addFnAttr(Attribute::NoBuiltin);
6237struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6238 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6239 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6241 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6256 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6257 StrictFPUpgradeVisitor SFPV;
6262 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6263 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6264 for (
auto &Arg :
F.args())
6266 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6270 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6271 A.isValid() &&
A.isStringAttribute()) {
6272 F.setSection(
A.getValueAsString());
6273 F.removeFnAttr(
"implicit-section-name");
6280 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6283 if (
A.getValueAsBool()) {
6284 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6290 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
6298 if (!
F.hasFnAttribute(FnAttrName))
6299 F.addFnAttr(FnAttrName,
Value);
6306 if (!
F.hasFnAttribute(FnAttrName)) {
6308 F.addFnAttr(FnAttrName);
6310 auto A =
F.getFnAttribute(FnAttrName);
6311 if (
"false" ==
A.getValueAsString())
6312 F.removeFnAttr(FnAttrName);
6313 else if (
"true" ==
A.getValueAsString()) {
6314 F.removeFnAttr(FnAttrName);
6315 F.addFnAttr(FnAttrName);
6321 Triple T(M.getTargetTriple());
6322 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6332 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6336 if (
Op->getNumOperands() != 3)
6345 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6346 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6347 : IDStr ==
"guarded-control-stack" ? &GCSValue
6348 : IDStr ==
"sign-return-address" ? &SRAValue
6349 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6350 : IDStr ==
"sign-return-address-with-bkey"
6356 *ValPtr = CI->getZExtValue();
6362 bool BTE = BTEValue == 1;
6363 bool BPPLR = BPPLRValue == 1;
6364 bool GCS = GCSValue == 1;
6365 bool SRA = SRAValue == 1;
6368 if (SRA && SRAALLValue == 1)
6369 SignTypeValue =
"all";
6372 if (SRA && SRABKeyValue == 1)
6373 SignKeyValue =
"b_key";
6375 for (
Function &
F : M.getFunctionList()) {
6376 if (
F.isDeclaration())
6383 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6384 A.isValid() &&
"none" ==
A.getValueAsString()) {
6385 F.removeFnAttr(
"sign-return-address");
6386 F.removeFnAttr(
"sign-return-address-key");
6402 if (SRAALLValue == 1)
6404 if (SRABKeyValue == 1)
6413 if (
T->getNumOperands() < 1)
6418 return S->getString().starts_with(
"llvm.vectorizer.");
6422 StringRef OldPrefix =
"llvm.vectorizer.";
6425 if (OldTag ==
"llvm.vectorizer.unroll")
6437 if (
T->getNumOperands() < 1)
6442 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6447 Ops.reserve(
T->getNumOperands());
6449 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6450 Ops.push_back(
T->getOperand(
I));
6464 Ops.reserve(
T->getNumOperands());
6475 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6476 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6477 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6480 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6482 auto I =
DL.find(
"-n64-");
6484 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6489 std::string Res =
DL.str();
6492 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6493 Res.append(Res.empty() ?
"G1" :
"-G1");
6501 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6502 Res.append(
"-ni:7:8:9");
6504 if (
DL.ends_with(
"ni:7"))
6506 if (
DL.ends_with(
"ni:7:8"))
6511 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6512 Res.append(
"-p7:160:256:256:32");
6513 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6514 Res.append(
"-p8:128:128:128:48");
6515 constexpr StringRef OldP8(
"-p8:128:128-");
6516 if (
DL.contains(OldP8))
6517 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6518 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6519 Res.append(
"-p9:192:256:256:32");
6523 if (!
DL.contains(
"m:e"))
6524 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6529 if (
T.isSystemZ() && !
DL.empty()) {
6531 if (!
DL.contains(
"-S64"))
6532 return "E-S64" +
DL.drop_front(1).str();
6536 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6539 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6540 if (!
DL.contains(AddrSpaces)) {
6542 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6543 if (R.match(Res, &
Groups))
6549 if (
T.isAArch64()) {
6551 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6552 Res.append(
"-Fn32");
6553 AddPtr32Ptr64AddrSpaces();
6557 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6561 std::string I64 =
"-i64:64";
6562 std::string I128 =
"-i128:128";
6564 size_t Pos = Res.find(I64);
6565 if (Pos !=
size_t(-1))
6566 Res.insert(Pos + I64.size(), I128);
6570 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6571 size_t Pos = Res.find(
"-S128");
6574 Res.insert(Pos,
"-f64:32:64");
6580 AddPtr32Ptr64AddrSpaces();
6588 if (!
T.isOSIAMCU()) {
6589 std::string I128 =
"-i128:128";
6592 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6593 if (R.match(Res, &
Groups))
6601 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6603 auto I =
Ref.find(
"-f80:32-");
6605 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6613 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6616 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6617 B.removeAttribute(
"no-frame-pointer-elim");
6619 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6621 if (FramePointer !=
"all")
6622 FramePointer =
"non-leaf";
6623 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6625 if (!FramePointer.
empty())
6626 B.addAttribute(
"frame-pointer", FramePointer);
6628 A =
B.getAttribute(
"null-pointer-is-valid");
6631 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6632 B.removeAttribute(
"null-pointer-is-valid");
6633 if (NullPointerIsValid)
6634 B.addAttribute(Attribute::NullPointerIsValid);
6644 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.