34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
62 cl::desc(
"Disable autoupgrade of debug info"));
72 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
87 Type *LastArgType =
F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
103 if (
F->getReturnType()->isVectorTy())
116 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
117 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
134 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
135 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
149 if (
F->getReturnType()->getScalarType()->isBFloatTy())
159 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
173 if (Name.consume_front(
"avx."))
174 return (Name.starts_with(
"blend.p") ||
175 Name ==
"cvt.ps2.pd.256" ||
176 Name ==
"cvtdq2.pd.256" ||
177 Name ==
"cvtdq2.ps.256" ||
178 Name.starts_with(
"movnt.") ||
179 Name.starts_with(
"sqrt.p") ||
180 Name.starts_with(
"storeu.") ||
181 Name.starts_with(
"vbroadcast.s") ||
182 Name.starts_with(
"vbroadcastf128") ||
183 Name.starts_with(
"vextractf128.") ||
184 Name.starts_with(
"vinsertf128.") ||
185 Name.starts_with(
"vperm2f128.") ||
186 Name.starts_with(
"vpermil."));
188 if (Name.consume_front(
"avx2."))
189 return (Name ==
"movntdqa" ||
190 Name.starts_with(
"pabs.") ||
191 Name.starts_with(
"padds.") ||
192 Name.starts_with(
"paddus.") ||
193 Name.starts_with(
"pblendd.") ||
195 Name.starts_with(
"pbroadcast") ||
196 Name.starts_with(
"pcmpeq.") ||
197 Name.starts_with(
"pcmpgt.") ||
198 Name.starts_with(
"pmax") ||
199 Name.starts_with(
"pmin") ||
200 Name.starts_with(
"pmovsx") ||
201 Name.starts_with(
"pmovzx") ||
203 Name ==
"pmulu.dq" ||
204 Name.starts_with(
"psll.dq") ||
205 Name.starts_with(
"psrl.dq") ||
206 Name.starts_with(
"psubs.") ||
207 Name.starts_with(
"psubus.") ||
208 Name.starts_with(
"vbroadcast") ||
209 Name ==
"vbroadcasti128" ||
210 Name ==
"vextracti128" ||
211 Name ==
"vinserti128" ||
212 Name ==
"vperm2i128");
214 if (Name.consume_front(
"avx512.")) {
215 if (Name.consume_front(
"mask."))
217 return (Name.starts_with(
"add.p") ||
218 Name.starts_with(
"and.") ||
219 Name.starts_with(
"andn.") ||
220 Name.starts_with(
"broadcast.s") ||
221 Name.starts_with(
"broadcastf32x4.") ||
222 Name.starts_with(
"broadcastf32x8.") ||
223 Name.starts_with(
"broadcastf64x2.") ||
224 Name.starts_with(
"broadcastf64x4.") ||
225 Name.starts_with(
"broadcasti32x4.") ||
226 Name.starts_with(
"broadcasti32x8.") ||
227 Name.starts_with(
"broadcasti64x2.") ||
228 Name.starts_with(
"broadcasti64x4.") ||
229 Name.starts_with(
"cmp.b") ||
230 Name.starts_with(
"cmp.d") ||
231 Name.starts_with(
"cmp.q") ||
232 Name.starts_with(
"cmp.w") ||
233 Name.starts_with(
"compress.b") ||
234 Name.starts_with(
"compress.d") ||
235 Name.starts_with(
"compress.p") ||
236 Name.starts_with(
"compress.q") ||
237 Name.starts_with(
"compress.store.") ||
238 Name.starts_with(
"compress.w") ||
239 Name.starts_with(
"conflict.") ||
240 Name.starts_with(
"cvtdq2pd.") ||
241 Name.starts_with(
"cvtdq2ps.") ||
242 Name ==
"cvtpd2dq.256" ||
243 Name ==
"cvtpd2ps.256" ||
244 Name ==
"cvtps2pd.128" ||
245 Name ==
"cvtps2pd.256" ||
246 Name.starts_with(
"cvtqq2pd.") ||
247 Name ==
"cvtqq2ps.256" ||
248 Name ==
"cvtqq2ps.512" ||
249 Name ==
"cvttpd2dq.256" ||
250 Name ==
"cvttps2dq.128" ||
251 Name ==
"cvttps2dq.256" ||
252 Name.starts_with(
"cvtudq2pd.") ||
253 Name.starts_with(
"cvtudq2ps.") ||
254 Name.starts_with(
"cvtuqq2pd.") ||
255 Name ==
"cvtuqq2ps.256" ||
256 Name ==
"cvtuqq2ps.512" ||
257 Name.starts_with(
"dbpsadbw.") ||
258 Name.starts_with(
"div.p") ||
259 Name.starts_with(
"expand.b") ||
260 Name.starts_with(
"expand.d") ||
261 Name.starts_with(
"expand.load.") ||
262 Name.starts_with(
"expand.p") ||
263 Name.starts_with(
"expand.q") ||
264 Name.starts_with(
"expand.w") ||
265 Name.starts_with(
"fpclass.p") ||
266 Name.starts_with(
"insert") ||
267 Name.starts_with(
"load.") ||
268 Name.starts_with(
"loadu.") ||
269 Name.starts_with(
"lzcnt.") ||
270 Name.starts_with(
"max.p") ||
271 Name.starts_with(
"min.p") ||
272 Name.starts_with(
"movddup") ||
273 Name.starts_with(
"move.s") ||
274 Name.starts_with(
"movshdup") ||
275 Name.starts_with(
"movsldup") ||
276 Name.starts_with(
"mul.p") ||
277 Name.starts_with(
"or.") ||
278 Name.starts_with(
"pabs.") ||
279 Name.starts_with(
"packssdw.") ||
280 Name.starts_with(
"packsswb.") ||
281 Name.starts_with(
"packusdw.") ||
282 Name.starts_with(
"packuswb.") ||
283 Name.starts_with(
"padd.") ||
284 Name.starts_with(
"padds.") ||
285 Name.starts_with(
"paddus.") ||
286 Name.starts_with(
"palignr.") ||
287 Name.starts_with(
"pand.") ||
288 Name.starts_with(
"pandn.") ||
289 Name.starts_with(
"pavg") ||
290 Name.starts_with(
"pbroadcast") ||
291 Name.starts_with(
"pcmpeq.") ||
292 Name.starts_with(
"pcmpgt.") ||
293 Name.starts_with(
"perm.df.") ||
294 Name.starts_with(
"perm.di.") ||
295 Name.starts_with(
"permvar.") ||
296 Name.starts_with(
"pmaddubs.w.") ||
297 Name.starts_with(
"pmaddw.d.") ||
298 Name.starts_with(
"pmax") ||
299 Name.starts_with(
"pmin") ||
300 Name ==
"pmov.qd.256" ||
301 Name ==
"pmov.qd.512" ||
302 Name ==
"pmov.wb.256" ||
303 Name ==
"pmov.wb.512" ||
304 Name.starts_with(
"pmovsx") ||
305 Name.starts_with(
"pmovzx") ||
306 Name.starts_with(
"pmul.dq.") ||
307 Name.starts_with(
"pmul.hr.sw.") ||
308 Name.starts_with(
"pmulh.w.") ||
309 Name.starts_with(
"pmulhu.w.") ||
310 Name.starts_with(
"pmull.") ||
311 Name.starts_with(
"pmultishift.qb.") ||
312 Name.starts_with(
"pmulu.dq.") ||
313 Name.starts_with(
"por.") ||
314 Name.starts_with(
"prol.") ||
315 Name.starts_with(
"prolv.") ||
316 Name.starts_with(
"pror.") ||
317 Name.starts_with(
"prorv.") ||
318 Name.starts_with(
"pshuf.b.") ||
319 Name.starts_with(
"pshuf.d.") ||
320 Name.starts_with(
"pshufh.w.") ||
321 Name.starts_with(
"pshufl.w.") ||
322 Name.starts_with(
"psll.d") ||
323 Name.starts_with(
"psll.q") ||
324 Name.starts_with(
"psll.w") ||
325 Name.starts_with(
"pslli") ||
326 Name.starts_with(
"psllv") ||
327 Name.starts_with(
"psra.d") ||
328 Name.starts_with(
"psra.q") ||
329 Name.starts_with(
"psra.w") ||
330 Name.starts_with(
"psrai") ||
331 Name.starts_with(
"psrav") ||
332 Name.starts_with(
"psrl.d") ||
333 Name.starts_with(
"psrl.q") ||
334 Name.starts_with(
"psrl.w") ||
335 Name.starts_with(
"psrli") ||
336 Name.starts_with(
"psrlv") ||
337 Name.starts_with(
"psub.") ||
338 Name.starts_with(
"psubs.") ||
339 Name.starts_with(
"psubus.") ||
340 Name.starts_with(
"pternlog.") ||
341 Name.starts_with(
"punpckh") ||
342 Name.starts_with(
"punpckl") ||
343 Name.starts_with(
"pxor.") ||
344 Name.starts_with(
"shuf.f") ||
345 Name.starts_with(
"shuf.i") ||
346 Name.starts_with(
"shuf.p") ||
347 Name.starts_with(
"sqrt.p") ||
348 Name.starts_with(
"store.b.") ||
349 Name.starts_with(
"store.d.") ||
350 Name.starts_with(
"store.p") ||
351 Name.starts_with(
"store.q.") ||
352 Name.starts_with(
"store.w.") ||
353 Name ==
"store.ss" ||
354 Name.starts_with(
"storeu.") ||
355 Name.starts_with(
"sub.p") ||
356 Name.starts_with(
"ucmp.") ||
357 Name.starts_with(
"unpckh.") ||
358 Name.starts_with(
"unpckl.") ||
359 Name.starts_with(
"valign.") ||
360 Name ==
"vcvtph2ps.128" ||
361 Name ==
"vcvtph2ps.256" ||
362 Name.starts_with(
"vextract") ||
363 Name.starts_with(
"vfmadd.") ||
364 Name.starts_with(
"vfmaddsub.") ||
365 Name.starts_with(
"vfnmadd.") ||
366 Name.starts_with(
"vfnmsub.") ||
367 Name.starts_with(
"vpdpbusd.") ||
368 Name.starts_with(
"vpdpbusds.") ||
369 Name.starts_with(
"vpdpwssd.") ||
370 Name.starts_with(
"vpdpwssds.") ||
371 Name.starts_with(
"vpermi2var.") ||
372 Name.starts_with(
"vpermil.p") ||
373 Name.starts_with(
"vpermilvar.") ||
374 Name.starts_with(
"vpermt2var.") ||
375 Name.starts_with(
"vpmadd52") ||
376 Name.starts_with(
"vpshld.") ||
377 Name.starts_with(
"vpshldv.") ||
378 Name.starts_with(
"vpshrd.") ||
379 Name.starts_with(
"vpshrdv.") ||
380 Name.starts_with(
"vpshufbitqmb.") ||
381 Name.starts_with(
"xor."));
383 if (Name.consume_front(
"mask3."))
385 return (Name.starts_with(
"vfmadd.") ||
386 Name.starts_with(
"vfmaddsub.") ||
387 Name.starts_with(
"vfmsub.") ||
388 Name.starts_with(
"vfmsubadd.") ||
389 Name.starts_with(
"vfnmsub."));
391 if (Name.consume_front(
"maskz."))
393 return (Name.starts_with(
"pternlog.") ||
394 Name.starts_with(
"vfmadd.") ||
395 Name.starts_with(
"vfmaddsub.") ||
396 Name.starts_with(
"vpdpbusd.") ||
397 Name.starts_with(
"vpdpbusds.") ||
398 Name.starts_with(
"vpdpwssd.") ||
399 Name.starts_with(
"vpdpwssds.") ||
400 Name.starts_with(
"vpermt2var.") ||
401 Name.starts_with(
"vpmadd52") ||
402 Name.starts_with(
"vpshldv.") ||
403 Name.starts_with(
"vpshrdv."));
406 return (Name ==
"movntdqa" ||
407 Name ==
"pmul.dq.512" ||
408 Name ==
"pmulu.dq.512" ||
409 Name.starts_with(
"broadcastm") ||
410 Name.starts_with(
"cmp.p") ||
411 Name.starts_with(
"cvtb2mask.") ||
412 Name.starts_with(
"cvtd2mask.") ||
413 Name.starts_with(
"cvtmask2") ||
414 Name.starts_with(
"cvtq2mask.") ||
415 Name ==
"cvtusi2sd" ||
416 Name.starts_with(
"cvtw2mask.") ||
421 Name ==
"kortestc.w" ||
422 Name ==
"kortestz.w" ||
423 Name.starts_with(
"kunpck") ||
426 Name.starts_with(
"padds.") ||
427 Name.starts_with(
"pbroadcast") ||
428 Name.starts_with(
"prol") ||
429 Name.starts_with(
"pror") ||
430 Name.starts_with(
"psll.dq") ||
431 Name.starts_with(
"psrl.dq") ||
432 Name.starts_with(
"psubs.") ||
433 Name.starts_with(
"ptestm") ||
434 Name.starts_with(
"ptestnm") ||
435 Name.starts_with(
"storent.") ||
436 Name.starts_with(
"vbroadcast.s") ||
437 Name.starts_with(
"vpshld.") ||
438 Name.starts_with(
"vpshrd."));
441 if (Name.consume_front(
"fma."))
442 return (Name.starts_with(
"vfmadd.") ||
443 Name.starts_with(
"vfmsub.") ||
444 Name.starts_with(
"vfmsubadd.") ||
445 Name.starts_with(
"vfnmadd.") ||
446 Name.starts_with(
"vfnmsub."));
448 if (Name.consume_front(
"fma4."))
449 return Name.starts_with(
"vfmadd.s");
451 if (Name.consume_front(
"sse."))
452 return (Name ==
"add.ss" ||
453 Name ==
"cvtsi2ss" ||
454 Name ==
"cvtsi642ss" ||
457 Name.starts_with(
"sqrt.p") ||
459 Name.starts_with(
"storeu.") ||
462 if (Name.consume_front(
"sse2."))
463 return (Name ==
"add.sd" ||
464 Name ==
"cvtdq2pd" ||
465 Name ==
"cvtdq2ps" ||
466 Name ==
"cvtps2pd" ||
467 Name ==
"cvtsi2sd" ||
468 Name ==
"cvtsi642sd" ||
469 Name ==
"cvtss2sd" ||
472 Name.starts_with(
"padds.") ||
473 Name.starts_with(
"paddus.") ||
474 Name.starts_with(
"pcmpeq.") ||
475 Name.starts_with(
"pcmpgt.") ||
480 Name ==
"pmulu.dq" ||
481 Name.starts_with(
"pshuf") ||
482 Name.starts_with(
"psll.dq") ||
483 Name.starts_with(
"psrl.dq") ||
484 Name.starts_with(
"psubs.") ||
485 Name.starts_with(
"psubus.") ||
486 Name.starts_with(
"sqrt.p") ||
488 Name ==
"storel.dq" ||
489 Name.starts_with(
"storeu.") ||
492 if (Name.consume_front(
"sse41."))
493 return (Name.starts_with(
"blendp") ||
494 Name ==
"movntdqa" ||
504 Name.starts_with(
"pmovsx") ||
505 Name.starts_with(
"pmovzx") ||
508 if (Name.consume_front(
"sse42."))
509 return Name ==
"crc32.64.8";
511 if (Name.consume_front(
"sse4a."))
512 return Name.starts_with(
"movnt.");
514 if (Name.consume_front(
"ssse3."))
515 return (Name ==
"pabs.b.128" ||
516 Name ==
"pabs.d.128" ||
517 Name ==
"pabs.w.128");
519 if (Name.consume_front(
"xop."))
520 return (Name ==
"vpcmov" ||
521 Name ==
"vpcmov.256" ||
522 Name.starts_with(
"vpcom") ||
523 Name.starts_with(
"vprot"));
525 return (Name ==
"addcarry.u32" ||
526 Name ==
"addcarry.u64" ||
527 Name ==
"addcarryx.u32" ||
528 Name ==
"addcarryx.u64" ||
529 Name ==
"subborrow.u32" ||
530 Name ==
"subborrow.u64" ||
531 Name.starts_with(
"vcvtph2ps."));
537 if (!Name.consume_front(
"x86."))
545 if (Name ==
"rdtscp") {
547 if (
F->getFunctionType()->getNumParams() == 0)
552 Intrinsic::x86_rdtscp);
559 if (Name.consume_front(
"sse41.ptest")) {
561 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
562 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
563 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
576 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
586 if (Name.consume_front(
"avx512.")) {
587 if (Name.consume_front(
"mask.cmp.")) {
590 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
599 }
else if (Name.starts_with(
"vpdpbusd.") ||
600 Name.starts_with(
"vpdpbusds.")) {
603 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
612 }
else if (Name.starts_with(
"vpdpwssd.") ||
613 Name.starts_with(
"vpdpwssds.")) {
616 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
629 if (Name.consume_front(
"avx2.")) {
630 if (Name.consume_front(
"vpdpb")) {
633 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
648 }
else if (Name.consume_front(
"vpdpw")) {
651 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
670 if (Name.consume_front(
"avx10.")) {
671 if (Name.consume_front(
"vpdpb")) {
674 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
683 }
else if (Name.consume_front(
"vpdpw")) {
685 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
698 if (Name.consume_front(
"avx512bf16.")) {
701 .
Case(
"cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .
Case(
"cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .
Case(
"cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .
Case(
"mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .
Case(
"cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .
Case(
"cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
719 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
728 if (Name.consume_front(
"xop.")) {
730 if (Name.starts_with(
"vpermil2")) {
733 auto Idx =
F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
746 }
else if (
F->arg_size() == 2)
749 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
761 if (Name ==
"seh.recoverfp") {
763 Intrinsic::eh_recoverfp);
775 if (Name.starts_with(
"rbit")) {
778 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
782 if (Name ==
"thread.pointer") {
785 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
789 bool Neon = Name.consume_front(
"neon.");
794 if (Name.consume_front(
"bfdot.")) {
798 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
803 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
807 std::array<Type *, 2> Tys{
818 if (Name.consume_front(
"bfm")) {
820 if (Name.consume_back(
".v4f32.v16i8")) {
866 F->arg_begin()->getType());
870 if (Name.consume_front(
"vst")) {
872 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
883 auto fArgs =
F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
899 if (Name.consume_front(
"mve.")) {
901 if (Name ==
"vctp64") {
911 if (Name.starts_with(
"vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
918 if (Name.consume_back(
".v4i1")) {
920 if (Name.consume_back(
".predicated.v2i64.v4i32"))
922 return Name ==
"mull.int" || Name ==
"vqdmull";
924 if (Name.consume_back(
".v2i64")) {
926 bool IsGather = Name.consume_front(
"vldr.gather.");
927 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
928 if (Name.consume_front(
"base.")) {
930 Name.consume_front(
"wb.");
933 return Name ==
"predicated.v2i64";
936 if (Name.consume_front(
"offset.predicated."))
937 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
938 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
951 if (Name.consume_front(
"cde.vcx")) {
953 if (Name.consume_back(
".predicated.v2i64.v4i1"))
955 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
956 Name ==
"3q" || Name ==
"3qa";
970 F->arg_begin()->getType());
974 if (Name.starts_with(
"addp")) {
976 if (
F->arg_size() != 2)
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
987 if (Name.starts_with(
"bfcvt")) {
994 if (Name.consume_front(
"sve.")) {
996 if (Name.consume_front(
"bf")) {
997 if (Name.consume_back(
".lane")) {
1001 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1015 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1020 if (Name.consume_front(
"addqv")) {
1022 if (!
F->getReturnType()->isFPOrFPVectorTy())
1025 auto Args =
F->getFunctionType()->params();
1026 Type *Tys[] = {
F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1032 if (Name.consume_front(
"ld")) {
1034 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.
match(Name)) {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1047 LoadIDs[Name[0] -
'2'], Ty);
1053 if (Name.consume_front(
"tuple.")) {
1055 if (Name.starts_with(
"get")) {
1057 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1063 if (Name.starts_with(
"set")) {
1065 auto Args =
F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1072 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.
match(Name)) {
1075 auto Args =
F->getFunctionType()->params();
1076 Type *Tys[] = {
F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1084 if (Name.starts_with(
"rev.nxv")) {
1087 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1099 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1121 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1135 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1146 if (Name.consume_front(
"mapa.shared.cluster"))
1147 if (
F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1151 if (Name.consume_front(
"cp.async.bulk.")) {
1154 .
Case(
"global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .
Case(
"shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1161 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1170 if (Name.consume_front(
"fma.rn."))
1172 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1175 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1178 if (Name.consume_front(
"fmax."))
1180 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1181 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1182 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1183 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1184 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1185 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1186 .
Case(
"ftz.nan.xorsign.abs.bf16",
1187 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1188 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1189 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1190 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1191 .
Case(
"ftz.xorsign.abs.bf16x2",
1192 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1193 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1194 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1195 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1196 .
Case(
"nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1198 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1199 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1202 if (Name.consume_front(
"fmin."))
1204 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1205 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1206 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1207 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1208 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1209 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1210 .
Case(
"ftz.nan.xorsign.abs.bf16",
1211 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1212 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1214 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1215 .
Case(
"ftz.xorsign.abs.bf16x2",
1216 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1217 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1218 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1219 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1220 .
Case(
"nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1222 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1223 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1226 if (Name.consume_front(
"neg."))
1228 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1229 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1236 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1237 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1238 Name.consume_front(
"param");
1244 if (Name.starts_with(
"to.fp16")) {
1248 FuncTy->getReturnType());
1251 if (Name.starts_with(
"from.fp16")) {
1255 FuncTy->getReturnType());
1262 bool CanUpgradeDebugIntrinsicsToRecords) {
1263 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1268 if (!Name.consume_front(
"llvm.") || Name.empty())
1274 bool IsArm = Name.consume_front(
"arm.");
1275 if (IsArm || Name.consume_front(
"aarch64.")) {
1281 if (Name.consume_front(
"amdgcn.")) {
1282 if (Name ==
"alignbit") {
1285 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1289 if (Name.consume_front(
"atomic.")) {
1290 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1291 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1301 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1302 F->arg_size() == 7) {
1306 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1307 F->arg_size() == 8) {
1312 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1313 Name.consume_front(
"flat.atomic.")) {
1314 if (Name.starts_with(
"fadd") ||
1316 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1317 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1325 if (Name.starts_with(
"ldexp.")) {
1328 F->getParent(), Intrinsic::ldexp,
1329 {F->getReturnType(), F->getArg(1)->getType()});
1338 if (
F->arg_size() == 1) {
1339 if (Name.consume_front(
"convert.")) {
1353 F->arg_begin()->getType());
1358 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1361 Intrinsic::coro_end);
1368 if (Name.consume_front(
"dbg.")) {
1370 if (CanUpgradeDebugIntrinsicsToRecords) {
1371 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1372 Name ==
"declare" || Name ==
"label") {
1381 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1384 Intrinsic::dbg_value);
1391 if (Name.consume_front(
"experimental.vector.")) {
1397 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1398 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1399 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1400 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1401 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1403 Intrinsic::vector_partial_reduce_add)
1406 const auto *FT =
F->getFunctionType();
1408 if (
ID == Intrinsic::vector_extract ||
1409 ID == Intrinsic::vector_interleave2)
1412 if (
ID != Intrinsic::vector_interleave2)
1414 if (
ID == Intrinsic::vector_insert ||
1415 ID == Intrinsic::vector_partial_reduce_add)
1423 if (Name.consume_front(
"reduce.")) {
1425 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1426 if (R.match(Name, &
Groups))
1428 .
Case(
"add", Intrinsic::vector_reduce_add)
1429 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1430 .
Case(
"and", Intrinsic::vector_reduce_and)
1431 .
Case(
"or", Intrinsic::vector_reduce_or)
1432 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1433 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1434 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1435 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1436 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1437 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1438 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1443 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1448 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1449 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1454 auto Args =
F->getFunctionType()->params();
1456 {Args[V2 ? 1 : 0]});
1462 if (Name.consume_front(
"splice"))
1466 if (Name.consume_front(
"experimental.stepvector.")) {
1470 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1475 if (Name.starts_with(
"flt.rounds")) {
1478 Intrinsic::get_rounding);
1483 if (Name.starts_with(
"invariant.group.barrier")) {
1485 auto Args =
F->getFunctionType()->params();
1486 Type* ObjectPtr[1] = {Args[0]};
1489 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1494 if ((Name.starts_with(
"lifetime.start") ||
1495 Name.starts_with(
"lifetime.end")) &&
1496 F->arg_size() == 2) {
1498 ? Intrinsic::lifetime_start
1499 : Intrinsic::lifetime_end;
1502 F->getArg(0)->getType());
1511 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1512 .StartsWith(
"memmove.", Intrinsic::memmove)
1514 if (
F->arg_size() == 5) {
1518 F->getFunctionType()->params().slice(0, 3);
1524 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1527 const auto *FT =
F->getFunctionType();
1528 Type *ParamTypes[2] = {
1529 FT->getParamType(0),
1533 Intrinsic::memset, ParamTypes);
1539 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1540 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1541 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1542 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1544 if (MaskedID &&
F->arg_size() == 4) {
1546 if (MaskedID == Intrinsic::masked_load ||
1547 MaskedID == Intrinsic::masked_gather) {
1549 F->getParent(), MaskedID,
1550 {F->getReturnType(), F->getArg(0)->getType()});
1554 F->getParent(), MaskedID,
1555 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1561 if (Name.consume_front(
"nvvm.")) {
1563 if (
F->arg_size() == 1) {
1566 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1567 .Case(
"clz.i", Intrinsic::ctlz)
1568 .
Case(
"popc.i", Intrinsic::ctpop)
1572 {F->getReturnType()});
1575 }
else if (
F->arg_size() == 2) {
1578 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1579 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1580 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1581 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1585 {F->getReturnType()});
1591 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1619 bool Expand =
false;
1620 if (Name.consume_front(
"abs."))
1623 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1624 else if (Name.consume_front(
"fabs."))
1626 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1627 else if (Name.consume_front(
"ex2.approx."))
1630 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1631 else if (Name.consume_front(
"atomic.load."))
1640 else if (Name.consume_front(
"bitcast."))
1643 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1644 else if (Name.consume_front(
"rotate."))
1646 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1647 else if (Name.consume_front(
"ptr.gen.to."))
1650 else if (Name.consume_front(
"ptr."))
1653 else if (Name.consume_front(
"ldg.global."))
1655 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1656 Name.starts_with(
"p."));
1659 .
Case(
"barrier0",
true)
1660 .
Case(
"barrier.n",
true)
1661 .
Case(
"barrier.sync.cnt",
true)
1662 .
Case(
"barrier.sync",
true)
1663 .
Case(
"barrier",
true)
1664 .
Case(
"bar.sync",
true)
1665 .
Case(
"barrier0.popc",
true)
1666 .
Case(
"barrier0.and",
true)
1667 .
Case(
"barrier0.or",
true)
1668 .
Case(
"clz.ll",
true)
1669 .
Case(
"popc.ll",
true)
1671 .
Case(
"swap.lo.hi.b64",
true)
1672 .
Case(
"tanh.approx.f32",
true)
1684 if (Name.starts_with(
"objectsize.")) {
1685 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1686 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1689 Intrinsic::objectsize, Tys);
1696 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1699 F->getParent(), Intrinsic::ptr_annotation,
1700 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1706 if (Name.consume_front(
"riscv.")) {
1709 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1710 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1711 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1712 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1715 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1728 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1729 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1738 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1739 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1740 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1741 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1746 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1758 if (Name ==
"stackprotectorcheck") {
1765 if (Name ==
"thread.pointer") {
1767 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1773 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1776 F->getParent(), Intrinsic::var_annotation,
1777 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1780 if (Name.consume_front(
"vector.splice")) {
1781 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1789 if (Name.consume_front(
"wasm.")) {
1792 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1793 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1794 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1799 F->getReturnType());
1803 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1805 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1807 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1826 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1835 auto *FT =
F->getFunctionType();
1838 std::string
Name =
F->getName().str();
1841 Name,
F->getParent());
1852 if (Result != std::nullopt) {
1865 bool CanUpgradeDebugIntrinsicsToRecords) {
1885 GV->
getName() ==
"llvm.global_dtors")) ||
1900 unsigned N =
Init->getNumOperands();
1901 std::vector<Constant *> NewCtors(
N);
1902 for (
unsigned i = 0; i !=
N; ++i) {
1905 Ctor->getAggregateElement(1),
1919 unsigned NumElts = ResultTy->getNumElements() * 8;
1923 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1933 for (
unsigned l = 0; l != NumElts; l += 16)
1934 for (
unsigned i = 0; i != 16; ++i) {
1935 unsigned Idx = NumElts + i - Shift;
1937 Idx -= NumElts - 16;
1938 Idxs[l + i] = Idx + l;
1941 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1945 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1953 unsigned NumElts = ResultTy->getNumElements() * 8;
1957 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1967 for (
unsigned l = 0; l != NumElts; l += 16)
1968 for (
unsigned i = 0; i != 16; ++i) {
1969 unsigned Idx = i + Shift;
1971 Idx += NumElts - 16;
1972 Idxs[l + i] = Idx + l;
1975 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1979 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1987 Mask = Builder.CreateBitCast(Mask, MaskTy);
1993 for (
unsigned i = 0; i != NumElts; ++i)
1995 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2006 if (
C->isAllOnesValue())
2011 return Builder.CreateSelect(Mask, Op0, Op1);
2018 if (
C->isAllOnesValue())
2022 Mask->getType()->getIntegerBitWidth());
2023 Mask = Builder.CreateBitCast(Mask, MaskTy);
2024 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2025 return Builder.CreateSelect(Mask, Op0, Op1);
2038 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2039 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2044 ShiftVal &= (NumElts - 1);
2053 if (ShiftVal > 16) {
2061 for (
unsigned l = 0; l < NumElts; l += 16) {
2062 for (
unsigned i = 0; i != 16; ++i) {
2063 unsigned Idx = ShiftVal + i;
2064 if (!IsVALIGN && Idx >= 16)
2065 Idx += NumElts - 16;
2066 Indices[l + i] = Idx + l;
2071 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2077 bool ZeroMask,
bool IndexForm) {
2080 unsigned EltWidth = Ty->getScalarSizeInBits();
2081 bool IsFloat = Ty->isFPOrFPVectorTy();
2083 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2084 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2085 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2086 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2087 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2088 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2089 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2090 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2091 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2092 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2093 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2094 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2095 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2096 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2097 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2098 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2099 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2100 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2101 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2102 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2103 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2104 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2105 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2106 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2107 else if (VecWidth == 128 && EltWidth == 16)
2108 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2109 else if (VecWidth == 256 && EltWidth == 16)
2110 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2111 else if (VecWidth == 512 && EltWidth == 16)
2112 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2113 else if (VecWidth == 128 && EltWidth == 8)
2114 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2115 else if (VecWidth == 256 && EltWidth == 8)
2116 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2117 else if (VecWidth == 512 && EltWidth == 8)
2118 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2129 Value *V = Builder.CreateIntrinsic(IID, Args);
2141 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2152 bool IsRotateRight) {
2162 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2163 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2166 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2167 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2212 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2217 bool IsShiftRight,
bool ZeroMask) {
2231 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2232 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2235 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2236 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2251 const Align Alignment =
2253 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2258 if (
C->isAllOnesValue())
2259 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2264 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2270 const Align Alignment =
2279 if (
C->isAllOnesValue())
2280 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2285 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2291 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2292 {Op0, Builder.getInt1(
false)});
2307 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2308 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2309 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2310 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2311 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2314 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2315 LHS = Builder.CreateAnd(
LHS, Mask);
2316 RHS = Builder.CreateAnd(
RHS, Mask);
2333 if (!
C || !
C->isAllOnesValue())
2334 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2339 for (
unsigned i = 0; i != NumElts; ++i)
2341 for (
unsigned i = NumElts; i != 8; ++i)
2342 Indices[i] = NumElts + i % NumElts;
2343 Vec = Builder.CreateShuffleVector(Vec,
2347 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2351 unsigned CC,
bool Signed) {
2359 }
else if (CC == 7) {
2395 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2396 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2398 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2399 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2408 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2414 Name = Name.substr(12);
2419 if (Name.starts_with(
"max.p")) {
2420 if (VecWidth == 128 && EltWidth == 32)
2421 IID = Intrinsic::x86_sse_max_ps;
2422 else if (VecWidth == 128 && EltWidth == 64)
2423 IID = Intrinsic::x86_sse2_max_pd;
2424 else if (VecWidth == 256 && EltWidth == 32)
2425 IID = Intrinsic::x86_avx_max_ps_256;
2426 else if (VecWidth == 256 && EltWidth == 64)
2427 IID = Intrinsic::x86_avx_max_pd_256;
2430 }
else if (Name.starts_with(
"min.p")) {
2431 if (VecWidth == 128 && EltWidth == 32)
2432 IID = Intrinsic::x86_sse_min_ps;
2433 else if (VecWidth == 128 && EltWidth == 64)
2434 IID = Intrinsic::x86_sse2_min_pd;
2435 else if (VecWidth == 256 && EltWidth == 32)
2436 IID = Intrinsic::x86_avx_min_ps_256;
2437 else if (VecWidth == 256 && EltWidth == 64)
2438 IID = Intrinsic::x86_avx_min_pd_256;
2441 }
else if (Name.starts_with(
"pshuf.b.")) {
2442 if (VecWidth == 128)
2443 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2444 else if (VecWidth == 256)
2445 IID = Intrinsic::x86_avx2_pshuf_b;
2446 else if (VecWidth == 512)
2447 IID = Intrinsic::x86_avx512_pshuf_b_512;
2450 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2451 if (VecWidth == 128)
2452 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2453 else if (VecWidth == 256)
2454 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2455 else if (VecWidth == 512)
2456 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2459 }
else if (Name.starts_with(
"pmulh.w.")) {
2460 if (VecWidth == 128)
2461 IID = Intrinsic::x86_sse2_pmulh_w;
2462 else if (VecWidth == 256)
2463 IID = Intrinsic::x86_avx2_pmulh_w;
2464 else if (VecWidth == 512)
2465 IID = Intrinsic::x86_avx512_pmulh_w_512;
2468 }
else if (Name.starts_with(
"pmulhu.w.")) {
2469 if (VecWidth == 128)
2470 IID = Intrinsic::x86_sse2_pmulhu_w;
2471 else if (VecWidth == 256)
2472 IID = Intrinsic::x86_avx2_pmulhu_w;
2473 else if (VecWidth == 512)
2474 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2477 }
else if (Name.starts_with(
"pmaddw.d.")) {
2478 if (VecWidth == 128)
2479 IID = Intrinsic::x86_sse2_pmadd_wd;
2480 else if (VecWidth == 256)
2481 IID = Intrinsic::x86_avx2_pmadd_wd;
2482 else if (VecWidth == 512)
2483 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2486 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2487 if (VecWidth == 128)
2488 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2489 else if (VecWidth == 256)
2490 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2491 else if (VecWidth == 512)
2492 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2495 }
else if (Name.starts_with(
"packsswb.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_sse2_packsswb_128;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_packsswb;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_packsswb_512;
2504 }
else if (Name.starts_with(
"packssdw.")) {
2505 if (VecWidth == 128)
2506 IID = Intrinsic::x86_sse2_packssdw_128;
2507 else if (VecWidth == 256)
2508 IID = Intrinsic::x86_avx2_packssdw;
2509 else if (VecWidth == 512)
2510 IID = Intrinsic::x86_avx512_packssdw_512;
2513 }
else if (Name.starts_with(
"packuswb.")) {
2514 if (VecWidth == 128)
2515 IID = Intrinsic::x86_sse2_packuswb_128;
2516 else if (VecWidth == 256)
2517 IID = Intrinsic::x86_avx2_packuswb;
2518 else if (VecWidth == 512)
2519 IID = Intrinsic::x86_avx512_packuswb_512;
2522 }
else if (Name.starts_with(
"packusdw.")) {
2523 if (VecWidth == 128)
2524 IID = Intrinsic::x86_sse41_packusdw;
2525 else if (VecWidth == 256)
2526 IID = Intrinsic::x86_avx2_packusdw;
2527 else if (VecWidth == 512)
2528 IID = Intrinsic::x86_avx512_packusdw_512;
2531 }
else if (Name.starts_with(
"vpermilvar.")) {
2532 if (VecWidth == 128 && EltWidth == 32)
2533 IID = Intrinsic::x86_avx_vpermilvar_ps;
2534 else if (VecWidth == 128 && EltWidth == 64)
2535 IID = Intrinsic::x86_avx_vpermilvar_pd;
2536 else if (VecWidth == 256 && EltWidth == 32)
2537 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2538 else if (VecWidth == 256 && EltWidth == 64)
2539 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2540 else if (VecWidth == 512 && EltWidth == 32)
2541 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2542 else if (VecWidth == 512 && EltWidth == 64)
2543 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2546 }
else if (Name ==
"cvtpd2dq.256") {
2547 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2548 }
else if (Name ==
"cvtpd2ps.256") {
2549 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2550 }
else if (Name ==
"cvttpd2dq.256") {
2551 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2552 }
else if (Name ==
"cvttps2dq.128") {
2553 IID = Intrinsic::x86_sse2_cvttps2dq;
2554 }
else if (Name ==
"cvttps2dq.256") {
2555 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2556 }
else if (Name.starts_with(
"permvar.")) {
2558 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2559 IID = Intrinsic::x86_avx2_permps;
2560 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2561 IID = Intrinsic::x86_avx2_permd;
2562 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2563 IID = Intrinsic::x86_avx512_permvar_df_256;
2564 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2565 IID = Intrinsic::x86_avx512_permvar_di_256;
2566 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2567 IID = Intrinsic::x86_avx512_permvar_sf_512;
2568 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2569 IID = Intrinsic::x86_avx512_permvar_si_512;
2570 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2571 IID = Intrinsic::x86_avx512_permvar_df_512;
2572 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2573 IID = Intrinsic::x86_avx512_permvar_di_512;
2574 else if (VecWidth == 128 && EltWidth == 16)
2575 IID = Intrinsic::x86_avx512_permvar_hi_128;
2576 else if (VecWidth == 256 && EltWidth == 16)
2577 IID = Intrinsic::x86_avx512_permvar_hi_256;
2578 else if (VecWidth == 512 && EltWidth == 16)
2579 IID = Intrinsic::x86_avx512_permvar_hi_512;
2580 else if (VecWidth == 128 && EltWidth == 8)
2581 IID = Intrinsic::x86_avx512_permvar_qi_128;
2582 else if (VecWidth == 256 && EltWidth == 8)
2583 IID = Intrinsic::x86_avx512_permvar_qi_256;
2584 else if (VecWidth == 512 && EltWidth == 8)
2585 IID = Intrinsic::x86_avx512_permvar_qi_512;
2588 }
else if (Name.starts_with(
"dbpsadbw.")) {
2589 if (VecWidth == 128)
2590 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2591 else if (VecWidth == 256)
2592 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2593 else if (VecWidth == 512)
2594 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2597 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2598 if (VecWidth == 128)
2599 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2600 else if (VecWidth == 256)
2601 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2602 else if (VecWidth == 512)
2603 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2606 }
else if (Name.starts_with(
"conflict.")) {
2607 if (Name[9] ==
'd' && VecWidth == 128)
2608 IID = Intrinsic::x86_avx512_conflict_d_128;
2609 else if (Name[9] ==
'd' && VecWidth == 256)
2610 IID = Intrinsic::x86_avx512_conflict_d_256;
2611 else if (Name[9] ==
'd' && VecWidth == 512)
2612 IID = Intrinsic::x86_avx512_conflict_d_512;
2613 else if (Name[9] ==
'q' && VecWidth == 128)
2614 IID = Intrinsic::x86_avx512_conflict_q_128;
2615 else if (Name[9] ==
'q' && VecWidth == 256)
2616 IID = Intrinsic::x86_avx512_conflict_q_256;
2617 else if (Name[9] ==
'q' && VecWidth == 512)
2618 IID = Intrinsic::x86_avx512_conflict_q_512;
2621 }
else if (Name.starts_with(
"pavg.")) {
2622 if (Name[5] ==
'b' && VecWidth == 128)
2623 IID = Intrinsic::x86_sse2_pavg_b;
2624 else if (Name[5] ==
'b' && VecWidth == 256)
2625 IID = Intrinsic::x86_avx2_pavg_b;
2626 else if (Name[5] ==
'b' && VecWidth == 512)
2627 IID = Intrinsic::x86_avx512_pavg_b_512;
2628 else if (Name[5] ==
'w' && VecWidth == 128)
2629 IID = Intrinsic::x86_sse2_pavg_w;
2630 else if (Name[5] ==
'w' && VecWidth == 256)
2631 IID = Intrinsic::x86_avx2_pavg_w;
2632 else if (Name[5] ==
'w' && VecWidth == 512)
2633 IID = Intrinsic::x86_avx512_pavg_w_512;
2642 Rep = Builder.CreateIntrinsic(IID, Args);
2653 if (AsmStr->find(
"mov\tfp") == 0 &&
2654 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2655 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2656 AsmStr->replace(Pos, 1,
";");
2662 Value *Rep =
nullptr;
2664 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2666 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2667 Value *Cmp = Builder.CreateICmpSGE(
2669 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2670 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2671 Type *Ty = (Name ==
"abs.bf16")
2675 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2676 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2677 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2678 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2679 : Intrinsic::nvvm_fabs;
2680 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2681 }
else if (Name.consume_front(
"ex2.approx.")) {
2683 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2684 : Intrinsic::nvvm_ex2_approx;
2685 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2686 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2687 Name.starts_with(
"atomic.load.add.f64.p")) {
2692 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2693 Name.starts_with(
"atomic.load.dec.32.p")) {
2698 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2700 }
else if (Name ==
"clz.ll") {
2703 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2704 {Arg, Builder.getFalse()},
2706 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2707 }
else if (Name ==
"popc.ll") {
2711 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2712 Arg,
nullptr,
"ctpop");
2713 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2714 }
else if (Name ==
"h2f") {
2716 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2717 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2718 }
else if (Name.consume_front(
"bitcast.") &&
2719 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2722 }
else if (Name ==
"rotate.b32") {
2725 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2726 {Arg, Arg, ShiftAmt});
2727 }
else if (Name ==
"rotate.b64") {
2731 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2732 {Arg, Arg, ZExtShiftAmt});
2733 }
else if (Name ==
"rotate.right.b64") {
2737 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2738 {Arg, Arg, ZExtShiftAmt});
2739 }
else if (Name ==
"swap.lo.hi.b64") {
2742 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2743 {Arg, Arg, Builder.getInt64(32)});
2744 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2747 Name.starts_with(
".to.gen"))) {
2749 }
else if (Name.consume_front(
"ldg.global")) {
2753 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2756 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2758 }
else if (Name ==
"tanh.approx.f32") {
2762 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2764 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2766 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2767 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2769 }
else if (Name ==
"barrier") {
2770 Rep = Builder.CreateIntrinsic(
2771 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2773 }
else if (Name ==
"barrier.sync") {
2774 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2776 }
else if (Name ==
"barrier.sync.cnt") {
2777 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2779 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2780 Name ==
"barrier0.or") {
2782 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2786 .
Case(
"barrier0.popc",
2787 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2788 .
Case(
"barrier0.and",
2789 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2790 .
Case(
"barrier0.or",
2791 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2792 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2793 Rep = Builder.CreateZExt(Bar, CI->
getType());
2797 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2807 ? Builder.CreateBitCast(Arg, NewType)
2810 Rep = Builder.CreateCall(NewFn, Args);
2811 if (
F->getReturnType()->isIntegerTy())
2812 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2822 Value *Rep =
nullptr;
2824 if (Name.starts_with(
"sse4a.movnt.")) {
2836 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2839 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2840 }
else if (Name.starts_with(
"avx.movnt.") ||
2841 Name.starts_with(
"avx512.storent.")) {
2853 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2854 }
else if (Name ==
"sse2.storel.dq") {
2859 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2860 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2861 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2862 }
else if (Name.starts_with(
"sse.storeu.") ||
2863 Name.starts_with(
"sse2.storeu.") ||
2864 Name.starts_with(
"avx.storeu.")) {
2867 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2868 }
else if (Name ==
"avx512.mask.store.ss") {
2872 }
else if (Name.starts_with(
"avx512.mask.store")) {
2874 bool Aligned = Name[17] !=
'u';
2877 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2880 bool CmpEq = Name[9] ==
'e';
2883 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2884 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2891 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2892 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2894 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2895 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2896 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2897 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2898 Name.starts_with(
"sse2.sqrt.p") ||
2899 Name.starts_with(
"sse.sqrt.p")) {
2900 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2901 {CI->getArgOperand(0)});
2902 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2906 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2907 : Intrinsic::x86_avx512_sqrt_pd_512;
2910 Rep = Builder.CreateIntrinsic(IID, Args);
2912 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2913 {CI->getArgOperand(0)});
2917 }
else if (Name.starts_with(
"avx512.ptestm") ||
2918 Name.starts_with(
"avx512.ptestnm")) {
2922 Rep = Builder.CreateAnd(Op0, Op1);
2928 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2930 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2933 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2936 }
else if (Name.starts_with(
"avx512.kunpck")) {
2941 for (
unsigned i = 0; i != NumElts; ++i)
2950 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2951 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2952 }
else if (Name ==
"avx512.kand.w") {
2955 Rep = Builder.CreateAnd(
LHS,
RHS);
2956 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2957 }
else if (Name ==
"avx512.kandn.w") {
2960 LHS = Builder.CreateNot(
LHS);
2961 Rep = Builder.CreateAnd(
LHS,
RHS);
2962 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2963 }
else if (Name ==
"avx512.kor.w") {
2966 Rep = Builder.CreateOr(
LHS,
RHS);
2967 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2968 }
else if (Name ==
"avx512.kxor.w") {
2971 Rep = Builder.CreateXor(
LHS,
RHS);
2972 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2973 }
else if (Name ==
"avx512.kxnor.w") {
2976 LHS = Builder.CreateNot(
LHS);
2977 Rep = Builder.CreateXor(
LHS,
RHS);
2978 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2979 }
else if (Name ==
"avx512.knot.w") {
2981 Rep = Builder.CreateNot(Rep);
2982 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2983 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2986 Rep = Builder.CreateOr(
LHS,
RHS);
2987 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2989 if (Name[14] ==
'c')
2993 Rep = Builder.CreateICmpEQ(Rep,
C);
2994 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2995 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
2996 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
2997 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
2998 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3001 ConstantInt::get(I32Ty, 0));
3003 ConstantInt::get(I32Ty, 0));
3005 if (Name.contains(
".add."))
3006 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3007 else if (Name.contains(
".sub."))
3008 EltOp = Builder.CreateFSub(Elt0, Elt1);
3009 else if (Name.contains(
".mul."))
3010 EltOp = Builder.CreateFMul(Elt0, Elt1);
3012 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3013 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3014 ConstantInt::get(I32Ty, 0));
3015 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3017 bool CmpEq = Name[16] ==
'e';
3019 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3027 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3030 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3033 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3040 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3045 if (VecWidth == 128 && EltWidth == 32)
3046 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3047 else if (VecWidth == 256 && EltWidth == 32)
3048 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3049 else if (VecWidth == 512 && EltWidth == 32)
3050 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3051 else if (VecWidth == 128 && EltWidth == 64)
3052 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3053 else if (VecWidth == 256 && EltWidth == 64)
3054 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3055 else if (VecWidth == 512 && EltWidth == 64)
3056 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3063 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3065 Type *OpTy = Args[0]->getType();
3069 if (VecWidth == 128 && EltWidth == 32)
3070 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3071 else if (VecWidth == 256 && EltWidth == 32)
3072 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3073 else if (VecWidth == 512 && EltWidth == 32)
3074 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3075 else if (VecWidth == 128 && EltWidth == 64)
3076 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3077 else if (VecWidth == 256 && EltWidth == 64)
3078 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3079 else if (VecWidth == 512 && EltWidth == 64)
3080 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3085 if (VecWidth == 512)
3087 Args.push_back(Mask);
3089 Rep = Builder.CreateIntrinsic(IID, Args);
3090 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3094 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3097 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3098 Name.starts_with(
"avx512.cvtw2mask.") ||
3099 Name.starts_with(
"avx512.cvtd2mask.") ||
3100 Name.starts_with(
"avx512.cvtq2mask.")) {
3105 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3106 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3107 Name.starts_with(
"avx512.mask.pabs")) {
3109 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3110 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3111 Name.starts_with(
"avx512.mask.pmaxs")) {
3113 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3114 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3115 Name.starts_with(
"avx512.mask.pmaxu")) {
3117 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3118 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3119 Name.starts_with(
"avx512.mask.pmins")) {
3121 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3122 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3123 Name.starts_with(
"avx512.mask.pminu")) {
3125 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3126 Name ==
"avx512.pmulu.dq.512" ||
3127 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3129 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3130 Name ==
"avx512.pmul.dq.512" ||
3131 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3133 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3134 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3139 }
else if (Name ==
"avx512.cvtusi2sd") {
3144 }
else if (Name ==
"sse2.cvtss2sd") {
3146 Rep = Builder.CreateFPExt(
3149 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3150 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3151 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3152 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3153 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3154 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3155 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3156 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3157 Name ==
"avx512.mask.cvtqq2ps.256" ||
3158 Name ==
"avx512.mask.cvtqq2ps.512" ||
3159 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3160 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3161 Name ==
"avx.cvt.ps2.pd.256" ||
3162 Name ==
"avx512.mask.cvtps2pd.128" ||
3163 Name ==
"avx512.mask.cvtps2pd.256") {
3168 unsigned NumDstElts = DstTy->getNumElements();
3170 assert(NumDstElts == 2 &&
"Unexpected vector size");
3171 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3174 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3175 bool IsUnsigned = Name.contains(
"cvtu");
3177 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3181 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3182 : Intrinsic::x86_avx512_sitofp_round;
3183 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3186 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3187 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3193 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3194 Name.starts_with(
"vcvtph2ps.")) {
3198 unsigned NumDstElts = DstTy->getNumElements();
3199 if (NumDstElts != SrcTy->getNumElements()) {
3200 assert(NumDstElts == 4 &&
"Unexpected vector size");
3201 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3203 Rep = Builder.CreateBitCast(
3205 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3209 }
else if (Name.starts_with(
"avx512.mask.load")) {
3211 bool Aligned = Name[16] !=
'u';
3214 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3217 ResultTy->getNumElements());
3219 Rep = Builder.CreateIntrinsic(
3220 Intrinsic::masked_expandload, ResultTy,
3222 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3228 Rep = Builder.CreateIntrinsic(
3229 Intrinsic::masked_compressstore, ResultTy,
3231 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3232 Name.starts_with(
"avx512.mask.expand.")) {
3236 ResultTy->getNumElements());
3238 bool IsCompress = Name[12] ==
'c';
3239 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3240 : Intrinsic::x86_avx512_mask_expand;
3241 Rep = Builder.CreateIntrinsic(
3243 }
else if (Name.starts_with(
"xop.vpcom")) {
3245 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3246 Name.ends_with(
"uq"))
3248 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3249 Name.ends_with(
"d") || Name.ends_with(
"q"))
3258 Name = Name.substr(9);
3259 if (Name.starts_with(
"lt"))
3261 else if (Name.starts_with(
"le"))
3263 else if (Name.starts_with(
"gt"))
3265 else if (Name.starts_with(
"ge"))
3267 else if (Name.starts_with(
"eq"))
3269 else if (Name.starts_with(
"ne"))
3271 else if (Name.starts_with(
"false"))
3273 else if (Name.starts_with(
"true"))
3280 }
else if (Name.starts_with(
"xop.vpcmov")) {
3282 Value *NotSel = Builder.CreateNot(Sel);
3285 Rep = Builder.CreateOr(Sel0, Sel1);
3286 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3287 Name.starts_with(
"avx512.mask.prol")) {
3289 }
else if (Name.starts_with(
"avx512.pror") ||
3290 Name.starts_with(
"avx512.mask.pror")) {
3292 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3293 Name.starts_with(
"avx512.mask.vpshld") ||
3294 Name.starts_with(
"avx512.maskz.vpshld")) {
3295 bool ZeroMask = Name[11] ==
'z';
3297 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3298 Name.starts_with(
"avx512.mask.vpshrd") ||
3299 Name.starts_with(
"avx512.maskz.vpshrd")) {
3300 bool ZeroMask = Name[11] ==
'z';
3302 }
else if (Name ==
"sse42.crc32.64.8") {
3305 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3307 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3308 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3309 Name.starts_with(
"avx512.vbroadcast.s")) {
3312 Type *EltTy = VecTy->getElementType();
3313 unsigned EltNum = VecTy->getNumElements();
3317 for (
unsigned I = 0;
I < EltNum; ++
I)
3318 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3319 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3320 Name.starts_with(
"sse41.pmovzx") ||
3321 Name.starts_with(
"avx2.pmovsx") ||
3322 Name.starts_with(
"avx2.pmovzx") ||
3323 Name.starts_with(
"avx512.mask.pmovsx") ||
3324 Name.starts_with(
"avx512.mask.pmovzx")) {
3326 unsigned NumDstElts = DstTy->getNumElements();
3330 for (
unsigned i = 0; i != NumDstElts; ++i)
3335 bool DoSext = Name.contains(
"pmovsx");
3337 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3342 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3343 Name ==
"avx512.mask.pmov.qd.512" ||
3344 Name ==
"avx512.mask.pmov.wb.256" ||
3345 Name ==
"avx512.mask.pmov.wb.512") {
3350 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3351 Name ==
"avx2.vbroadcasti128") {
3357 if (NumSrcElts == 2)
3358 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3360 Rep = Builder.CreateShuffleVector(Load,
3362 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3363 Name.starts_with(
"avx512.mask.shuf.f")) {
3368 unsigned ControlBitsMask = NumLanes - 1;
3369 unsigned NumControlBits = NumLanes / 2;
3372 for (
unsigned l = 0; l != NumLanes; ++l) {
3373 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3375 if (l >= NumLanes / 2)
3376 LaneMask += NumLanes;
3377 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3378 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3384 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3385 Name.starts_with(
"avx512.mask.broadcasti")) {
3388 unsigned NumDstElts =
3392 for (
unsigned i = 0; i != NumDstElts; ++i)
3393 ShuffleMask[i] = i % NumSrcElts;
3399 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3400 Name.starts_with(
"avx2.vbroadcast") ||
3401 Name.starts_with(
"avx512.pbroadcast") ||
3402 Name.starts_with(
"avx512.mask.broadcast.s")) {
3409 Rep = Builder.CreateShuffleVector(
Op, M);
3414 }
else if (Name.starts_with(
"sse2.padds.") ||
3415 Name.starts_with(
"avx2.padds.") ||
3416 Name.starts_with(
"avx512.padds.") ||
3417 Name.starts_with(
"avx512.mask.padds.")) {
3419 }
else if (Name.starts_with(
"sse2.psubs.") ||
3420 Name.starts_with(
"avx2.psubs.") ||
3421 Name.starts_with(
"avx512.psubs.") ||
3422 Name.starts_with(
"avx512.mask.psubs.")) {
3424 }
else if (Name.starts_with(
"sse2.paddus.") ||
3425 Name.starts_with(
"avx2.paddus.") ||
3426 Name.starts_with(
"avx512.mask.paddus.")) {
3428 }
else if (Name.starts_with(
"sse2.psubus.") ||
3429 Name.starts_with(
"avx2.psubus.") ||
3430 Name.starts_with(
"avx512.mask.psubus.")) {
3432 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3437 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3441 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3446 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3451 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3452 Name ==
"avx512.psll.dq.512") {
3456 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3457 Name ==
"avx512.psrl.dq.512") {
3461 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3462 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3463 Name.starts_with(
"avx2.pblendd.")) {
3468 unsigned NumElts = VecTy->getNumElements();
3471 for (
unsigned i = 0; i != NumElts; ++i)
3472 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3474 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3475 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3476 Name ==
"avx2.vinserti128" ||
3477 Name.starts_with(
"avx512.mask.insert")) {
3481 unsigned DstNumElts =
3483 unsigned SrcNumElts =
3485 unsigned Scale = DstNumElts / SrcNumElts;
3492 for (
unsigned i = 0; i != SrcNumElts; ++i)
3494 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3495 Idxs[i] = SrcNumElts;
3496 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3510 for (
unsigned i = 0; i != DstNumElts; ++i)
3513 for (
unsigned i = 0; i != SrcNumElts; ++i)
3514 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3515 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3521 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3522 Name ==
"avx2.vextracti128" ||
3523 Name.starts_with(
"avx512.mask.vextract")) {
3526 unsigned DstNumElts =
3528 unsigned SrcNumElts =
3530 unsigned Scale = SrcNumElts / DstNumElts;
3537 for (
unsigned i = 0; i != DstNumElts; ++i) {
3538 Idxs[i] = i + (Imm * DstNumElts);
3540 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3546 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3547 Name.starts_with(
"avx512.mask.perm.di.")) {
3551 unsigned NumElts = VecTy->getNumElements();
3554 for (
unsigned i = 0; i != NumElts; ++i)
3555 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3557 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3562 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3574 unsigned HalfSize = NumElts / 2;
3586 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3587 for (
unsigned i = 0; i < HalfSize; ++i)
3588 ShuffleMask[i] = StartIndex + i;
3591 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3592 for (
unsigned i = 0; i < HalfSize; ++i)
3593 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3595 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3597 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3598 Name.starts_with(
"avx512.mask.vpermil.p") ||
3599 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3603 unsigned NumElts = VecTy->getNumElements();
3605 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3606 unsigned IdxMask = ((1 << IdxSize) - 1);
3612 for (
unsigned i = 0; i != NumElts; ++i)
3613 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3615 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3620 }
else if (Name ==
"sse2.pshufl.w" ||
3621 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3627 for (
unsigned l = 0; l != NumElts; l += 8) {
3628 for (
unsigned i = 0; i != 4; ++i)
3629 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3630 for (
unsigned i = 4; i != 8; ++i)
3631 Idxs[i + l] = i + l;
3634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3639 }
else if (Name ==
"sse2.pshufh.w" ||
3640 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3646 for (
unsigned l = 0; l != NumElts; l += 8) {
3647 for (
unsigned i = 0; i != 4; ++i)
3648 Idxs[i + l] = i + l;
3649 for (
unsigned i = 0; i != 4; ++i)
3650 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3653 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3658 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3665 unsigned HalfLaneElts = NumLaneElts / 2;
3668 for (
unsigned i = 0; i != NumElts; ++i) {
3670 Idxs[i] = i - (i % NumLaneElts);
3672 if ((i % NumLaneElts) >= HalfLaneElts)
3676 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3679 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3683 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3684 Name.starts_with(
"avx512.mask.movshdup") ||
3685 Name.starts_with(
"avx512.mask.movsldup")) {
3691 if (Name.starts_with(
"avx512.mask.movshdup."))
3695 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3696 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3697 Idxs[i + l + 0] = i + l +
Offset;
3698 Idxs[i + l + 1] = i + l +
Offset;
3701 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3705 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3706 Name.starts_with(
"avx512.mask.unpckl.")) {
3713 for (
int l = 0; l != NumElts; l += NumLaneElts)
3714 for (
int i = 0; i != NumLaneElts; ++i)
3715 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3717 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3721 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3722 Name.starts_with(
"avx512.mask.unpckh.")) {
3729 for (
int l = 0; l != NumElts; l += NumLaneElts)
3730 for (
int i = 0; i != NumLaneElts; ++i)
3731 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3733 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3737 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3738 Name.starts_with(
"avx512.mask.pand.")) {
3741 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3743 Rep = Builder.CreateBitCast(Rep, FTy);
3746 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3747 Name.starts_with(
"avx512.mask.pandn.")) {
3750 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3751 Rep = Builder.CreateAnd(Rep,
3753 Rep = Builder.CreateBitCast(Rep, FTy);
3756 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3757 Name.starts_with(
"avx512.mask.por.")) {
3760 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3762 Rep = Builder.CreateBitCast(Rep, FTy);
3765 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3766 Name.starts_with(
"avx512.mask.pxor.")) {
3769 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3771 Rep = Builder.CreateBitCast(Rep, FTy);
3774 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3778 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3782 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3786 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3787 if (Name.ends_with(
".512")) {
3789 if (Name[17] ==
's')
3790 IID = Intrinsic::x86_avx512_add_ps_512;
3792 IID = Intrinsic::x86_avx512_add_pd_512;
3794 Rep = Builder.CreateIntrinsic(
3802 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3803 if (Name.ends_with(
".512")) {
3805 if (Name[17] ==
's')
3806 IID = Intrinsic::x86_avx512_div_ps_512;
3808 IID = Intrinsic::x86_avx512_div_pd_512;
3810 Rep = Builder.CreateIntrinsic(
3818 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3819 if (Name.ends_with(
".512")) {
3821 if (Name[17] ==
's')
3822 IID = Intrinsic::x86_avx512_mul_ps_512;
3824 IID = Intrinsic::x86_avx512_mul_pd_512;
3826 Rep = Builder.CreateIntrinsic(
3834 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3835 if (Name.ends_with(
".512")) {
3837 if (Name[17] ==
's')
3838 IID = Intrinsic::x86_avx512_sub_ps_512;
3840 IID = Intrinsic::x86_avx512_sub_pd_512;
3842 Rep = Builder.CreateIntrinsic(
3850 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3851 Name.starts_with(
"avx512.mask.min.p")) &&
3852 Name.drop_front(18) ==
".512") {
3853 bool IsDouble = Name[17] ==
'd';
3854 bool IsMin = Name[13] ==
'i';
3856 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3857 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3860 Rep = Builder.CreateIntrinsic(
3865 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3867 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3868 {CI->getArgOperand(0), Builder.getInt1(false)});
3871 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3872 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3873 bool IsVariable = Name[16] ==
'v';
3874 char Size = Name[16] ==
'.' ? Name[17]
3875 : Name[17] ==
'.' ? Name[18]
3876 : Name[18] ==
'.' ? Name[19]
3880 if (IsVariable && Name[17] !=
'.') {
3881 if (
Size ==
'd' && Name[17] ==
'2')
3882 IID = Intrinsic::x86_avx2_psllv_q;
3883 else if (
Size ==
'd' && Name[17] ==
'4')
3884 IID = Intrinsic::x86_avx2_psllv_q_256;
3885 else if (
Size ==
's' && Name[17] ==
'4')
3886 IID = Intrinsic::x86_avx2_psllv_d;
3887 else if (
Size ==
's' && Name[17] ==
'8')
3888 IID = Intrinsic::x86_avx2_psllv_d_256;
3889 else if (
Size ==
'h' && Name[17] ==
'8')
3890 IID = Intrinsic::x86_avx512_psllv_w_128;
3891 else if (
Size ==
'h' && Name[17] ==
'1')
3892 IID = Intrinsic::x86_avx512_psllv_w_256;
3893 else if (Name[17] ==
'3' && Name[18] ==
'2')
3894 IID = Intrinsic::x86_avx512_psllv_w_512;
3897 }
else if (Name.ends_with(
".128")) {
3899 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3900 : Intrinsic::x86_sse2_psll_d;
3901 else if (
Size ==
'q')
3902 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3903 : Intrinsic::x86_sse2_psll_q;
3904 else if (
Size ==
'w')
3905 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3906 : Intrinsic::x86_sse2_psll_w;
3909 }
else if (Name.ends_with(
".256")) {
3911 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3912 : Intrinsic::x86_avx2_psll_d;
3913 else if (
Size ==
'q')
3914 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3915 : Intrinsic::x86_avx2_psll_q;
3916 else if (
Size ==
'w')
3917 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3918 : Intrinsic::x86_avx2_psll_w;
3923 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3924 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3925 : Intrinsic::x86_avx512_psll_d_512;
3926 else if (
Size ==
'q')
3927 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3928 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3929 : Intrinsic::x86_avx512_psll_q_512;
3930 else if (
Size ==
'w')
3931 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3932 : Intrinsic::x86_avx512_psll_w_512;
3938 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3939 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3940 bool IsVariable = Name[16] ==
'v';
3941 char Size = Name[16] ==
'.' ? Name[17]
3942 : Name[17] ==
'.' ? Name[18]
3943 : Name[18] ==
'.' ? Name[19]
3947 if (IsVariable && Name[17] !=
'.') {
3948 if (
Size ==
'd' && Name[17] ==
'2')
3949 IID = Intrinsic::x86_avx2_psrlv_q;
3950 else if (
Size ==
'd' && Name[17] ==
'4')
3951 IID = Intrinsic::x86_avx2_psrlv_q_256;
3952 else if (
Size ==
's' && Name[17] ==
'4')
3953 IID = Intrinsic::x86_avx2_psrlv_d;
3954 else if (
Size ==
's' && Name[17] ==
'8')
3955 IID = Intrinsic::x86_avx2_psrlv_d_256;
3956 else if (
Size ==
'h' && Name[17] ==
'8')
3957 IID = Intrinsic::x86_avx512_psrlv_w_128;
3958 else if (
Size ==
'h' && Name[17] ==
'1')
3959 IID = Intrinsic::x86_avx512_psrlv_w_256;
3960 else if (Name[17] ==
'3' && Name[18] ==
'2')
3961 IID = Intrinsic::x86_avx512_psrlv_w_512;
3964 }
else if (Name.ends_with(
".128")) {
3966 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3967 : Intrinsic::x86_sse2_psrl_d;
3968 else if (
Size ==
'q')
3969 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3970 : Intrinsic::x86_sse2_psrl_q;
3971 else if (
Size ==
'w')
3972 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3973 : Intrinsic::x86_sse2_psrl_w;
3976 }
else if (Name.ends_with(
".256")) {
3978 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3979 : Intrinsic::x86_avx2_psrl_d;
3980 else if (
Size ==
'q')
3981 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3982 : Intrinsic::x86_avx2_psrl_q;
3983 else if (
Size ==
'w')
3984 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3985 : Intrinsic::x86_avx2_psrl_w;
3990 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3991 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3992 : Intrinsic::x86_avx512_psrl_d_512;
3993 else if (
Size ==
'q')
3994 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3995 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3996 : Intrinsic::x86_avx512_psrl_q_512;
3997 else if (
Size ==
'w')
3998 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3999 : Intrinsic::x86_avx512_psrl_w_512;
4005 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4006 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4007 bool IsVariable = Name[16] ==
'v';
4008 char Size = Name[16] ==
'.' ? Name[17]
4009 : Name[17] ==
'.' ? Name[18]
4010 : Name[18] ==
'.' ? Name[19]
4014 if (IsVariable && Name[17] !=
'.') {
4015 if (
Size ==
's' && Name[17] ==
'4')
4016 IID = Intrinsic::x86_avx2_psrav_d;
4017 else if (
Size ==
's' && Name[17] ==
'8')
4018 IID = Intrinsic::x86_avx2_psrav_d_256;
4019 else if (
Size ==
'h' && Name[17] ==
'8')
4020 IID = Intrinsic::x86_avx512_psrav_w_128;
4021 else if (
Size ==
'h' && Name[17] ==
'1')
4022 IID = Intrinsic::x86_avx512_psrav_w_256;
4023 else if (Name[17] ==
'3' && Name[18] ==
'2')
4024 IID = Intrinsic::x86_avx512_psrav_w_512;
4027 }
else if (Name.ends_with(
".128")) {
4029 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4030 : Intrinsic::x86_sse2_psra_d;
4031 else if (
Size ==
'q')
4032 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4033 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4034 : Intrinsic::x86_avx512_psra_q_128;
4035 else if (
Size ==
'w')
4036 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4037 : Intrinsic::x86_sse2_psra_w;
4040 }
else if (Name.ends_with(
".256")) {
4042 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4043 : Intrinsic::x86_avx2_psra_d;
4044 else if (
Size ==
'q')
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4046 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4047 : Intrinsic::x86_avx512_psra_q_256;
4048 else if (
Size ==
'w')
4049 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4050 : Intrinsic::x86_avx2_psra_w;
4055 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4056 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4057 : Intrinsic::x86_avx512_psra_d_512;
4058 else if (
Size ==
'q')
4059 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4060 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4061 : Intrinsic::x86_avx512_psra_q_512;
4062 else if (
Size ==
'w')
4063 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4064 : Intrinsic::x86_avx512_psra_w_512;
4070 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4072 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4074 }
else if (Name.ends_with(
".movntdqa")) {
4078 LoadInst *LI = Builder.CreateAlignedLoad(
4083 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4084 Name.starts_with(
"fma.vfmsub.") ||
4085 Name.starts_with(
"fma.vfnmadd.") ||
4086 Name.starts_with(
"fma.vfnmsub.")) {
4087 bool NegMul = Name[6] ==
'n';
4088 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4089 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4100 if (NegMul && !IsScalar)
4101 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4102 if (NegMul && IsScalar)
4103 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4105 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4107 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4111 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4119 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4123 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4124 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4125 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4126 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4127 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4128 bool IsMask3 = Name[11] ==
'3';
4129 bool IsMaskZ = Name[11] ==
'z';
4131 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4132 bool NegMul = Name[2] ==
'n';
4133 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4139 if (NegMul && (IsMask3 || IsMaskZ))
4140 A = Builder.CreateFNeg(
A);
4141 if (NegMul && !(IsMask3 || IsMaskZ))
4142 B = Builder.CreateFNeg(
B);
4144 C = Builder.CreateFNeg(
C);
4146 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4147 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4148 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4155 if (Name.back() ==
'd')
4156 IID = Intrinsic::x86_avx512_vfmadd_f64;
4158 IID = Intrinsic::x86_avx512_vfmadd_f32;
4159 Rep = Builder.CreateIntrinsic(IID,
Ops);
4161 Rep = Builder.CreateFMA(
A,
B,
C);
4170 if (NegAcc && IsMask3)
4175 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4177 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4178 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4179 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4180 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4181 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4182 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4183 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4184 bool IsMask3 = Name[11] ==
'3';
4185 bool IsMaskZ = Name[11] ==
'z';
4187 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4188 bool NegMul = Name[2] ==
'n';
4189 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4195 if (NegMul && (IsMask3 || IsMaskZ))
4196 A = Builder.CreateFNeg(
A);
4197 if (NegMul && !(IsMask3 || IsMaskZ))
4198 B = Builder.CreateFNeg(
B);
4200 C = Builder.CreateFNeg(
C);
4207 if (Name[Name.size() - 5] ==
's')
4208 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4210 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4214 Rep = Builder.CreateFMA(
A,
B,
C);
4222 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4226 if (VecWidth == 128 && EltWidth == 32)
4227 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4228 else if (VecWidth == 256 && EltWidth == 32)
4229 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4230 else if (VecWidth == 128 && EltWidth == 64)
4231 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4232 else if (VecWidth == 256 && EltWidth == 64)
4233 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4239 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4240 Rep = Builder.CreateIntrinsic(IID,
Ops);
4241 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4242 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4243 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4244 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4245 bool IsMask3 = Name[11] ==
'3';
4246 bool IsMaskZ = Name[11] ==
'z';
4248 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4249 bool IsSubAdd = Name[3] ==
's';
4253 if (Name[Name.size() - 5] ==
's')
4254 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4256 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4261 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4263 Rep = Builder.CreateIntrinsic(IID,
Ops);
4272 Value *Odd = Builder.CreateCall(FMA,
Ops);
4273 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4274 Value *Even = Builder.CreateCall(FMA,
Ops);
4280 for (
int i = 0; i != NumElts; ++i)
4281 Idxs[i] = i + (i % 2) * NumElts;
4283 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4291 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4292 Name.starts_with(
"avx512.maskz.pternlog.")) {
4293 bool ZeroMask = Name[11] ==
'z';
4297 if (VecWidth == 128 && EltWidth == 32)
4298 IID = Intrinsic::x86_avx512_pternlog_d_128;
4299 else if (VecWidth == 256 && EltWidth == 32)
4300 IID = Intrinsic::x86_avx512_pternlog_d_256;
4301 else if (VecWidth == 512 && EltWidth == 32)
4302 IID = Intrinsic::x86_avx512_pternlog_d_512;
4303 else if (VecWidth == 128 && EltWidth == 64)
4304 IID = Intrinsic::x86_avx512_pternlog_q_128;
4305 else if (VecWidth == 256 && EltWidth == 64)
4306 IID = Intrinsic::x86_avx512_pternlog_q_256;
4307 else if (VecWidth == 512 && EltWidth == 64)
4308 IID = Intrinsic::x86_avx512_pternlog_q_512;
4314 Rep = Builder.CreateIntrinsic(IID, Args);
4318 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4319 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4320 bool ZeroMask = Name[11] ==
'z';
4321 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4324 if (VecWidth == 128 && !
High)
4325 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4326 else if (VecWidth == 256 && !
High)
4327 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4328 else if (VecWidth == 512 && !
High)
4329 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4330 else if (VecWidth == 128 &&
High)
4331 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4332 else if (VecWidth == 256 &&
High)
4333 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4334 else if (VecWidth == 512 &&
High)
4335 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4341 Rep = Builder.CreateIntrinsic(IID, Args);
4345 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4346 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4347 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4348 bool ZeroMask = Name[11] ==
'z';
4349 bool IndexForm = Name[17] ==
'i';
4351 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4352 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4353 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4354 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4355 bool ZeroMask = Name[11] ==
'z';
4356 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4359 if (VecWidth == 128 && !IsSaturating)
4360 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4361 else if (VecWidth == 256 && !IsSaturating)
4362 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4363 else if (VecWidth == 512 && !IsSaturating)
4364 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4365 else if (VecWidth == 128 && IsSaturating)
4366 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4367 else if (VecWidth == 256 && IsSaturating)
4368 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4369 else if (VecWidth == 512 && IsSaturating)
4370 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4380 if (Args[1]->
getType()->isVectorTy() &&
4383 ->isIntegerTy(32) &&
4384 Args[2]->
getType()->isVectorTy() &&
4387 ->isIntegerTy(32)) {
4388 Type *NewArgType =
nullptr;
4389 if (VecWidth == 128)
4391 else if (VecWidth == 256)
4393 else if (VecWidth == 512)
4398 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4399 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4402 Rep = Builder.CreateIntrinsic(IID, Args);
4406 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4407 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4408 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4409 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4410 bool ZeroMask = Name[11] ==
'z';
4411 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4414 if (VecWidth == 128 && !IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4416 else if (VecWidth == 256 && !IsSaturating)
4417 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4418 else if (VecWidth == 512 && !IsSaturating)
4419 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4420 else if (VecWidth == 128 && IsSaturating)
4421 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4422 else if (VecWidth == 256 && IsSaturating)
4423 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4424 else if (VecWidth == 512 && IsSaturating)
4425 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4435 if (Args[1]->
getType()->isVectorTy() &&
4438 ->isIntegerTy(32) &&
4439 Args[2]->
getType()->isVectorTy() &&
4442 ->isIntegerTy(32)) {
4443 Type *NewArgType =
nullptr;
4444 if (VecWidth == 128)
4446 else if (VecWidth == 256)
4448 else if (VecWidth == 512)
4453 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4454 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4457 Rep = Builder.CreateIntrinsic(IID, Args);
4461 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4462 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4463 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4465 if (Name[0] ==
'a' && Name.back() ==
'2')
4466 IID = Intrinsic::x86_addcarry_32;
4467 else if (Name[0] ==
'a' && Name.back() ==
'4')
4468 IID = Intrinsic::x86_addcarry_64;
4469 else if (Name[0] ==
's' && Name.back() ==
'2')
4470 IID = Intrinsic::x86_subborrow_32;
4471 else if (Name[0] ==
's' && Name.back() ==
'4')
4472 IID = Intrinsic::x86_subborrow_64;
4479 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4482 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4485 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4489 }
else if (Name.starts_with(
"avx512.mask.") &&
4499 if (Name.starts_with(
"neon.bfcvt")) {
4500 if (Name.starts_with(
"neon.bfcvtn2")) {
4502 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4504 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4505 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4508 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4509 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4511 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4515 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4516 return Builder.CreateShuffleVector(
4519 return Builder.CreateFPTrunc(CI->
getOperand(0),
4522 }
else if (Name.starts_with(
"sve.fcvt")) {
4525 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4526 .
Case(
"sve.fcvtnt.bf16f32",
4527 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4539 if (Args[1]->
getType() != BadPredTy)
4542 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4543 BadPredTy, Args[1]);
4544 Args[1] = Builder.CreateIntrinsic(
4545 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4547 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4556 if (Name ==
"mve.vctp64.old") {
4559 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4562 Value *C1 = Builder.CreateIntrinsic(
4563 Intrinsic::arm_mve_pred_v2i,
4565 return Builder.CreateIntrinsic(
4566 Intrinsic::arm_mve_pred_i2v,
4568 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4569 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4570 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4571 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4573 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4574 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4575 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4576 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4578 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4579 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4580 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4581 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4582 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4583 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4584 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4585 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4586 std::vector<Type *> Tys;
4590 case Intrinsic::arm_mve_mull_int_predicated:
4591 case Intrinsic::arm_mve_vqdmull_predicated:
4592 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4595 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4596 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4597 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4601 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4605 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4609 case Intrinsic::arm_cde_vcx1q_predicated:
4610 case Intrinsic::arm_cde_vcx1qa_predicated:
4611 case Intrinsic::arm_cde_vcx2q_predicated:
4612 case Intrinsic::arm_cde_vcx2qa_predicated:
4613 case Intrinsic::arm_cde_vcx3q_predicated:
4614 case Intrinsic::arm_cde_vcx3qa_predicated:
4621 std::vector<Value *>
Ops;
4623 Type *Ty =
Op->getType();
4624 if (Ty->getScalarSizeInBits() == 1) {
4625 Value *C1 = Builder.CreateIntrinsic(
4626 Intrinsic::arm_mve_pred_v2i,
4628 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4633 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4648 auto UpgradeLegacyWMMAIUIntrinsicCall =
4653 Args.push_back(Builder.getFalse());
4657 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4664 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4669 NewCall->copyMetadata(*CI);
4673 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4674 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4675 "intrinsic should have 7 arguments");
4678 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4680 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4681 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4682 "intrinsic should have 8 arguments");
4687 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4707 if (NumOperands < 3)
4720 bool IsVolatile =
false;
4724 if (NumOperands > 3)
4729 if (NumOperands > 5) {
4731 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4745 if (VT->getElementType()->isIntegerTy(16)) {
4748 Val = Builder.CreateBitCast(Val, AsBF16);
4756 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4758 unsigned AddrSpace = PtrTy->getAddressSpace();
4761 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4763 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4768 MDNode *RangeNotPrivate =
4771 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4777 return Builder.CreateBitCast(RMW, RetTy);
4798 return MAV->getMetadata();
4805 return I->getDebugLoc().getAsMDNode();
4813 if (Name ==
"label") {
4816 }
else if (Name ==
"assign") {
4823 }
else if (Name ==
"declare") {
4828 }
else if (Name ==
"addr") {
4838 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4840 }
else if (Name ==
"value") {
4843 unsigned ExprOp = 2;
4857 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4865 int64_t OffsetVal =
Offset->getSExtValue();
4866 return Builder.CreateIntrinsic(OffsetVal >= 0
4867 ? Intrinsic::vector_splice_left
4868 : Intrinsic::vector_splice_right,
4870 {CI->getArgOperand(0), CI->getArgOperand(1),
4871 Builder.getInt32(std::abs(OffsetVal))});
4876 if (Name.starts_with(
"to.fp16")) {
4878 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4879 return Builder.CreateBitCast(Cast, CI->
getType());
4882 if (Name.starts_with(
"from.fp16")) {
4884 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4885 return Builder.CreateFPExt(Cast, CI->
getType());
4910 if (!Name.consume_front(
"llvm."))
4913 bool IsX86 = Name.consume_front(
"x86.");
4914 bool IsNVVM = Name.consume_front(
"nvvm.");
4915 bool IsAArch64 = Name.consume_front(
"aarch64.");
4916 bool IsARM = Name.consume_front(
"arm.");
4917 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4918 bool IsDbg = Name.consume_front(
"dbg.");
4920 (Name.consume_front(
"experimental.vector.splice") ||
4921 Name.consume_front(
"vector.splice")) &&
4922 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
4923 Value *Rep =
nullptr;
4925 if (!IsX86 && Name ==
"stackprotectorcheck") {
4927 }
else if (IsNVVM) {
4931 }
else if (IsAArch64) {
4935 }
else if (IsAMDGCN) {
4939 }
else if (IsOldSplice) {
4941 }
else if (Name.consume_front(
"convert.")) {
4953 const auto &DefaultCase = [&]() ->
void {
4961 "Unknown function for CallBase upgrade and isn't just a name change");
4969 "Return type must have changed");
4970 assert(OldST->getNumElements() ==
4972 "Must have same number of elements");
4975 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4978 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4979 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4980 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4999 case Intrinsic::arm_neon_vst1:
5000 case Intrinsic::arm_neon_vst2:
5001 case Intrinsic::arm_neon_vst3:
5002 case Intrinsic::arm_neon_vst4:
5003 case Intrinsic::arm_neon_vst2lane:
5004 case Intrinsic::arm_neon_vst3lane:
5005 case Intrinsic::arm_neon_vst4lane: {
5007 NewCall = Builder.CreateCall(NewFn, Args);
5010 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5011 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5012 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5017 NewCall = Builder.CreateCall(NewFn, Args);
5020 case Intrinsic::aarch64_sve_ld3_sret:
5021 case Intrinsic::aarch64_sve_ld4_sret:
5022 case Intrinsic::aarch64_sve_ld2_sret: {
5024 Name = Name.substr(5);
5031 unsigned MinElts = RetTy->getMinNumElements() /
N;
5033 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5035 for (
unsigned I = 0;
I <
N;
I++) {
5036 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5037 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5043 case Intrinsic::coro_end: {
5046 NewCall = Builder.CreateCall(NewFn, Args);
5050 case Intrinsic::vector_extract: {
5052 Name = Name.substr(5);
5053 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5058 unsigned MinElts = RetTy->getMinNumElements();
5061 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5065 case Intrinsic::vector_insert: {
5067 Name = Name.substr(5);
5068 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5072 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5077 NewCall = Builder.CreateCall(
5081 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5087 assert(
N > 1 &&
"Create is expected to be between 2-4");
5090 unsigned MinElts = RetTy->getMinNumElements() /
N;
5091 for (
unsigned I = 0;
I <
N;
I++) {
5093 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5100 case Intrinsic::arm_neon_bfdot:
5101 case Intrinsic::arm_neon_bfmmla:
5102 case Intrinsic::arm_neon_bfmlalb:
5103 case Intrinsic::arm_neon_bfmlalt:
5104 case Intrinsic::aarch64_neon_bfdot:
5105 case Intrinsic::aarch64_neon_bfmmla:
5106 case Intrinsic::aarch64_neon_bfmlalb:
5107 case Intrinsic::aarch64_neon_bfmlalt: {
5110 "Mismatch between function args and call args");
5111 size_t OperandWidth =
5113 assert((OperandWidth == 64 || OperandWidth == 128) &&
5114 "Unexpected operand width");
5116 auto Iter = CI->
args().begin();
5117 Args.push_back(*Iter++);
5118 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5119 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5120 NewCall = Builder.CreateCall(NewFn, Args);
5124 case Intrinsic::bitreverse:
5125 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5128 case Intrinsic::ctlz:
5129 case Intrinsic::cttz: {
5136 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5140 case Intrinsic::objectsize: {
5141 Value *NullIsUnknownSize =
5145 NewCall = Builder.CreateCall(
5150 case Intrinsic::ctpop:
5151 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5153 case Intrinsic::dbg_value: {
5155 Name = Name.substr(5);
5157 if (Name.starts_with(
"dbg.addr")) {
5171 if (
Offset->isZeroValue()) {
5172 NewCall = Builder.CreateCall(
5181 case Intrinsic::ptr_annotation:
5189 NewCall = Builder.CreateCall(
5198 case Intrinsic::var_annotation:
5205 NewCall = Builder.CreateCall(
5214 case Intrinsic::riscv_aes32dsi:
5215 case Intrinsic::riscv_aes32dsmi:
5216 case Intrinsic::riscv_aes32esi:
5217 case Intrinsic::riscv_aes32esmi:
5218 case Intrinsic::riscv_sm4ks:
5219 case Intrinsic::riscv_sm4ed: {
5229 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5230 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5236 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5237 Value *Res = NewCall;
5239 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5245 case Intrinsic::nvvm_mapa_shared_cluster: {
5249 Value *Res = NewCall;
5250 Res = Builder.CreateAddrSpaceCast(
5257 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5258 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5261 Args[0] = Builder.CreateAddrSpaceCast(
5264 NewCall = Builder.CreateCall(NewFn, Args);
5270 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5271 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5272 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5273 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5274 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5275 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5276 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5277 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5284 Args[0] = Builder.CreateAddrSpaceCast(
5293 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5295 NewCall = Builder.CreateCall(NewFn, Args);
5301 case Intrinsic::riscv_sha256sig0:
5302 case Intrinsic::riscv_sha256sig1:
5303 case Intrinsic::riscv_sha256sum0:
5304 case Intrinsic::riscv_sha256sum1:
5305 case Intrinsic::riscv_sm3p0:
5306 case Intrinsic::riscv_sm3p1: {
5313 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5315 NewCall = Builder.CreateCall(NewFn, Arg);
5317 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5324 case Intrinsic::x86_xop_vfrcz_ss:
5325 case Intrinsic::x86_xop_vfrcz_sd:
5326 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5329 case Intrinsic::x86_xop_vpermil2pd:
5330 case Intrinsic::x86_xop_vpermil2ps:
5331 case Intrinsic::x86_xop_vpermil2pd_256:
5332 case Intrinsic::x86_xop_vpermil2ps_256: {
5336 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5337 NewCall = Builder.CreateCall(NewFn, Args);
5341 case Intrinsic::x86_sse41_ptestc:
5342 case Intrinsic::x86_sse41_ptestz:
5343 case Intrinsic::x86_sse41_ptestnzc: {
5357 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5358 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5360 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5364 case Intrinsic::x86_rdtscp: {
5370 NewCall = Builder.CreateCall(NewFn);
5372 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5375 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5383 case Intrinsic::x86_sse41_insertps:
5384 case Intrinsic::x86_sse41_dppd:
5385 case Intrinsic::x86_sse41_dpps:
5386 case Intrinsic::x86_sse41_mpsadbw:
5387 case Intrinsic::x86_avx_dp_ps_256:
5388 case Intrinsic::x86_avx2_mpsadbw: {
5394 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5395 NewCall = Builder.CreateCall(NewFn, Args);
5399 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5400 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5401 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5402 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5403 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5404 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5410 NewCall = Builder.CreateCall(NewFn, Args);
5419 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5420 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5421 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5422 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5423 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5424 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5428 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5429 Args[1] = Builder.CreateBitCast(
5432 NewCall = Builder.CreateCall(NewFn, Args);
5433 Value *Res = Builder.CreateBitCast(
5441 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5442 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5443 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5447 Args[1] = Builder.CreateBitCast(
5449 Args[2] = Builder.CreateBitCast(
5452 NewCall = Builder.CreateCall(NewFn, Args);
5456 case Intrinsic::thread_pointer: {
5457 NewCall = Builder.CreateCall(NewFn, {});
5461 case Intrinsic::memcpy:
5462 case Intrinsic::memmove:
5463 case Intrinsic::memset: {
5479 NewCall = Builder.CreateCall(NewFn, Args);
5481 AttributeList NewAttrs = AttributeList::get(
5482 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5483 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5484 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5489 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5492 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5496 case Intrinsic::masked_load:
5497 case Intrinsic::masked_gather:
5498 case Intrinsic::masked_store:
5499 case Intrinsic::masked_scatter: {
5505 auto GetMaybeAlign = [](
Value *
Op) {
5515 auto GetAlign = [&](
Value *
Op) {
5524 case Intrinsic::masked_load:
5525 NewCall = Builder.CreateMaskedLoad(
5529 case Intrinsic::masked_gather:
5530 NewCall = Builder.CreateMaskedGather(
5536 case Intrinsic::masked_store:
5537 NewCall = Builder.CreateMaskedStore(
5541 case Intrinsic::masked_scatter:
5542 NewCall = Builder.CreateMaskedScatter(
5544 DL.getValueOrABITypeAlignment(
5558 case Intrinsic::lifetime_start:
5559 case Intrinsic::lifetime_end: {
5571 NewCall = Builder.CreateLifetimeStart(Ptr);
5573 NewCall = Builder.CreateLifetimeEnd(Ptr);
5582 case Intrinsic::x86_avx512_vpdpbusd_128:
5583 case Intrinsic::x86_avx512_vpdpbusd_256:
5584 case Intrinsic::x86_avx512_vpdpbusd_512:
5585 case Intrinsic::x86_avx512_vpdpbusds_128:
5586 case Intrinsic::x86_avx512_vpdpbusds_256:
5587 case Intrinsic::x86_avx512_vpdpbusds_512:
5588 case Intrinsic::x86_avx2_vpdpbssd_128:
5589 case Intrinsic::x86_avx2_vpdpbssd_256:
5590 case Intrinsic::x86_avx10_vpdpbssd_512:
5591 case Intrinsic::x86_avx2_vpdpbssds_128:
5592 case Intrinsic::x86_avx2_vpdpbssds_256:
5593 case Intrinsic::x86_avx10_vpdpbssds_512:
5594 case Intrinsic::x86_avx2_vpdpbsud_128:
5595 case Intrinsic::x86_avx2_vpdpbsud_256:
5596 case Intrinsic::x86_avx10_vpdpbsud_512:
5597 case Intrinsic::x86_avx2_vpdpbsuds_128:
5598 case Intrinsic::x86_avx2_vpdpbsuds_256:
5599 case Intrinsic::x86_avx10_vpdpbsuds_512:
5600 case Intrinsic::x86_avx2_vpdpbuud_128:
5601 case Intrinsic::x86_avx2_vpdpbuud_256:
5602 case Intrinsic::x86_avx10_vpdpbuud_512:
5603 case Intrinsic::x86_avx2_vpdpbuuds_128:
5604 case Intrinsic::x86_avx2_vpdpbuuds_256:
5605 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5610 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5611 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5613 NewCall = Builder.CreateCall(NewFn, Args);
5616 case Intrinsic::x86_avx512_vpdpwssd_128:
5617 case Intrinsic::x86_avx512_vpdpwssd_256:
5618 case Intrinsic::x86_avx512_vpdpwssd_512:
5619 case Intrinsic::x86_avx512_vpdpwssds_128:
5620 case Intrinsic::x86_avx512_vpdpwssds_256:
5621 case Intrinsic::x86_avx512_vpdpwssds_512:
5622 case Intrinsic::x86_avx2_vpdpwsud_128:
5623 case Intrinsic::x86_avx2_vpdpwsud_256:
5624 case Intrinsic::x86_avx10_vpdpwsud_512:
5625 case Intrinsic::x86_avx2_vpdpwsuds_128:
5626 case Intrinsic::x86_avx2_vpdpwsuds_256:
5627 case Intrinsic::x86_avx10_vpdpwsuds_512:
5628 case Intrinsic::x86_avx2_vpdpwusd_128:
5629 case Intrinsic::x86_avx2_vpdpwusd_256:
5630 case Intrinsic::x86_avx10_vpdpwusd_512:
5631 case Intrinsic::x86_avx2_vpdpwusds_128:
5632 case Intrinsic::x86_avx2_vpdpwusds_256:
5633 case Intrinsic::x86_avx10_vpdpwusds_512:
5634 case Intrinsic::x86_avx2_vpdpwuud_128:
5635 case Intrinsic::x86_avx2_vpdpwuud_256:
5636 case Intrinsic::x86_avx10_vpdpwuud_512:
5637 case Intrinsic::x86_avx2_vpdpwuuds_128:
5638 case Intrinsic::x86_avx2_vpdpwuuds_256:
5639 case Intrinsic::x86_avx10_vpdpwuuds_512:
5644 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5645 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5647 NewCall = Builder.CreateCall(NewFn, Args);
5650 assert(NewCall &&
"Should have either set this variable or returned through "
5651 "the default case");
5658 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5672 F->eraseFromParent();
5678 if (NumOperands == 0)
5686 if (NumOperands == 3) {
5690 Metadata *Elts2[] = {ScalarType, ScalarType,
5704 if (
Opc != Instruction::BitCast)
5708 Type *SrcTy = V->getType();
5725 if (
Opc != Instruction::BitCast)
5728 Type *SrcTy =
C->getType();
5755 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5756 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5757 if (Flag->getNumOperands() < 3)
5759 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5760 return K->getString() ==
"Debug Info Version";
5763 if (OpIt != ModFlags->op_end()) {
5764 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5771 bool BrokenDebugInfo =
false;
5774 if (!BrokenDebugInfo)
5780 M.getContext().diagnose(Diag);
5787 M.getContext().diagnose(DiagVersion);
5797 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5800 if (
F->hasFnAttribute(Attr)) {
5803 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5805 auto [Part, Rest] = S.
split(
',');
5811 const unsigned Dim = DimC -
'x';
5812 assert(Dim < 3 &&
"Unexpected dim char");
5822 F->addFnAttr(Attr, NewAttr);
5826 return S ==
"x" || S ==
"y" || S ==
"z";
5831 if (K ==
"kernel") {
5843 const unsigned Idx = (AlignIdxValuePair >> 16);
5844 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5849 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5854 if (K ==
"minctasm") {
5859 if (K ==
"maxnreg") {
5864 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5868 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5872 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5876 if (K ==
"grid_constant") {
5891 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5898 if (!SeenNodes.
insert(MD).second)
5905 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5912 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5914 const MDOperand &V = MD->getOperand(j + 1);
5917 NewOperands.
append({K, V});
5920 if (NewOperands.
size() > 1)
5933 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5934 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5935 if (ModRetainReleaseMarker) {
5941 ID->getString().split(ValueComp,
"#");
5942 if (ValueComp.
size() == 2) {
5943 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5947 M.eraseNamedMetadata(ModRetainReleaseMarker);
5958 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5984 bool InvalidCast =
false;
5986 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
5999 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6001 Args.push_back(Arg);
6008 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6013 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6026 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6034 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6035 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6036 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6037 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6038 {
"objc_autoreleaseReturnValue",
6039 llvm::Intrinsic::objc_autoreleaseReturnValue},
6040 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6041 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6042 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6043 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6044 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6045 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6046 {
"objc_release", llvm::Intrinsic::objc_release},
6047 {
"objc_retain", llvm::Intrinsic::objc_retain},
6048 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6049 {
"objc_retainAutoreleaseReturnValue",
6050 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6051 {
"objc_retainAutoreleasedReturnValue",
6052 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6053 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6054 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6055 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6056 {
"objc_unsafeClaimAutoreleasedReturnValue",
6057 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6058 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6059 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6060 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6061 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6062 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6063 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6064 {
"objc_arc_annotation_topdown_bbstart",
6065 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6066 {
"objc_arc_annotation_topdown_bbend",
6067 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6068 {
"objc_arc_annotation_bottomup_bbstart",
6069 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6070 {
"objc_arc_annotation_bottomup_bbend",
6071 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6073 for (
auto &
I : RuntimeFuncs)
6074 UpgradeToIntrinsic(
I.first,
I.second);
6078 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6082 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6083 bool HasSwiftVersionFlag =
false;
6084 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6091 if (
Op->getNumOperands() != 3)
6105 if (
ID->getString() ==
"Objective-C Image Info Version")
6107 if (
ID->getString() ==
"Objective-C Class Properties")
6108 HasClassProperties =
true;
6110 if (
ID->getString() ==
"PIC Level") {
6111 if (
auto *Behavior =
6113 uint64_t V = Behavior->getLimitedValue();
6119 if (
ID->getString() ==
"PIE Level")
6120 if (
auto *Behavior =
6127 if (
ID->getString() ==
"branch-target-enforcement" ||
6128 ID->getString().starts_with(
"sign-return-address")) {
6129 if (
auto *Behavior =
6135 Op->getOperand(1),
Op->getOperand(2)};
6145 if (
ID->getString() ==
"Objective-C Image Info Section") {
6148 Value->getString().split(ValueComp,
" ");
6149 if (ValueComp.
size() != 1) {
6150 std::string NewValue;
6151 for (
auto &S : ValueComp)
6152 NewValue += S.str();
6163 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6166 assert(Md->getValue() &&
"Expected non-empty metadata");
6167 auto Type = Md->getValue()->getType();
6170 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6171 if ((Val & 0xff) != Val) {
6172 HasSwiftVersionFlag =
true;
6173 SwiftABIVersion = (Val & 0xff00) >> 8;
6174 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6175 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6186 if (
ID->getString() ==
"amdgpu_code_object_version") {
6189 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6201 if (HasObjCFlag && !HasClassProperties) {
6207 if (HasSwiftVersionFlag) {
6211 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6213 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6221 auto TrimSpaces = [](
StringRef Section) -> std::string {
6223 Section.split(Components,
',');
6228 for (
auto Component : Components)
6229 OS <<
',' << Component.trim();
6234 for (
auto &GV : M.globals()) {
6235 if (!GV.hasSection())
6240 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6245 GV.setSection(TrimSpaces(Section));
6261struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6262 StrictFPUpgradeVisitor() =
default;
6265 if (!
Call.isStrictFP())
6271 Call.removeFnAttr(Attribute::StrictFP);
6272 Call.addFnAttr(Attribute::NoBuiltin);
6277struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6278 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6279 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6281 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6296 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6297 StrictFPUpgradeVisitor SFPV;
6302 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6303 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6304 for (
auto &Arg :
F.args())
6306 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6310 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6311 A.isValid() &&
A.isStringAttribute()) {
6312 F.setSection(
A.getValueAsString());
6313 F.removeFnAttr(
"implicit-section-name");
6320 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6323 if (
A.getValueAsBool()) {
6324 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6330 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
6338 if (!
F.hasFnAttribute(FnAttrName))
6339 F.addFnAttr(FnAttrName,
Value);
6346 if (!
F.hasFnAttribute(FnAttrName)) {
6348 F.addFnAttr(FnAttrName);
6350 auto A =
F.getFnAttribute(FnAttrName);
6351 if (
"false" ==
A.getValueAsString())
6352 F.removeFnAttr(FnAttrName);
6353 else if (
"true" ==
A.getValueAsString()) {
6354 F.removeFnAttr(FnAttrName);
6355 F.addFnAttr(FnAttrName);
6361 Triple T(M.getTargetTriple());
6362 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6372 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6376 if (
Op->getNumOperands() != 3)
6385 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6386 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6387 : IDStr ==
"guarded-control-stack" ? &GCSValue
6388 : IDStr ==
"sign-return-address" ? &SRAValue
6389 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6390 : IDStr ==
"sign-return-address-with-bkey"
6396 *ValPtr = CI->getZExtValue();
6402 bool BTE = BTEValue == 1;
6403 bool BPPLR = BPPLRValue == 1;
6404 bool GCS = GCSValue == 1;
6405 bool SRA = SRAValue == 1;
6408 if (SRA && SRAALLValue == 1)
6409 SignTypeValue =
"all";
6412 if (SRA && SRABKeyValue == 1)
6413 SignKeyValue =
"b_key";
6415 for (
Function &
F : M.getFunctionList()) {
6416 if (
F.isDeclaration())
6423 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6424 A.isValid() &&
"none" ==
A.getValueAsString()) {
6425 F.removeFnAttr(
"sign-return-address");
6426 F.removeFnAttr(
"sign-return-address-key");
6442 if (SRAALLValue == 1)
6444 if (SRABKeyValue == 1)
6453 if (
T->getNumOperands() < 1)
6458 return S->getString().starts_with(
"llvm.vectorizer.");
6462 StringRef OldPrefix =
"llvm.vectorizer.";
6465 if (OldTag ==
"llvm.vectorizer.unroll")
6477 if (
T->getNumOperands() < 1)
6482 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6487 Ops.reserve(
T->getNumOperands());
6489 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6490 Ops.push_back(
T->getOperand(
I));
6504 Ops.reserve(
T->getNumOperands());
6515 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6516 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6517 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6520 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6522 auto I =
DL.find(
"-n64-");
6524 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6529 std::string Res =
DL.str();
6532 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6533 Res.append(Res.empty() ?
"G1" :
"-G1");
6541 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6542 Res.append(
"-ni:7:8:9");
6544 if (
DL.ends_with(
"ni:7"))
6546 if (
DL.ends_with(
"ni:7:8"))
6551 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6552 Res.append(
"-p7:160:256:256:32");
6553 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6554 Res.append(
"-p8:128:128:128:48");
6555 constexpr StringRef OldP8(
"-p8:128:128-");
6556 if (
DL.contains(OldP8))
6557 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6558 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6559 Res.append(
"-p9:192:256:256:32");
6563 if (!
DL.contains(
"m:e"))
6564 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6569 if (
T.isSystemZ() && !
DL.empty()) {
6571 if (!
DL.contains(
"-S64"))
6572 return "E-S64" +
DL.drop_front(1).str();
6576 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6579 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6580 if (!
DL.contains(AddrSpaces)) {
6582 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6583 if (R.match(Res, &
Groups))
6589 if (
T.isAArch64()) {
6591 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6592 Res.append(
"-Fn32");
6593 AddPtr32Ptr64AddrSpaces();
6597 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6601 std::string I64 =
"-i64:64";
6602 std::string I128 =
"-i128:128";
6604 size_t Pos = Res.find(I64);
6605 if (Pos !=
size_t(-1))
6606 Res.insert(Pos + I64.size(), I128);
6610 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6611 size_t Pos = Res.find(
"-S128");
6614 Res.insert(Pos,
"-f64:32:64");
6620 AddPtr32Ptr64AddrSpaces();
6628 if (!
T.isOSIAMCU()) {
6629 std::string I128 =
"-i128:128";
6632 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6633 if (R.match(Res, &
Groups))
6641 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6643 auto I =
Ref.find(
"-f80:32-");
6645 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6653 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6656 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6657 B.removeAttribute(
"no-frame-pointer-elim");
6659 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6661 if (FramePointer !=
"all")
6662 FramePointer =
"non-leaf";
6663 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6665 if (!FramePointer.
empty())
6666 B.addAttribute(
"frame-pointer", FramePointer);
6668 A =
B.getAttribute(
"null-pointer-is-valid");
6671 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6672 B.removeAttribute(
"null-pointer-is-valid");
6673 if (NullPointerIsValid)
6674 B.addAttribute(Attribute::NullPointerIsValid);
6684 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.