34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
61 cl::desc(
"Disable autoupgrade of debug info"));
71 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
86 Type *LastArgType =
F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
102 if (
F->getReturnType()->isVectorTy())
115 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
116 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
133 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
134 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
148 if (
F->getReturnType()->getScalarType()->isBFloatTy())
158 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
172 if (Name.consume_front(
"avx."))
173 return (Name.starts_with(
"blend.p") ||
174 Name ==
"cvt.ps2.pd.256" ||
175 Name ==
"cvtdq2.pd.256" ||
176 Name ==
"cvtdq2.ps.256" ||
177 Name.starts_with(
"movnt.") ||
178 Name.starts_with(
"sqrt.p") ||
179 Name.starts_with(
"storeu.") ||
180 Name.starts_with(
"vbroadcast.s") ||
181 Name.starts_with(
"vbroadcastf128") ||
182 Name.starts_with(
"vextractf128.") ||
183 Name.starts_with(
"vinsertf128.") ||
184 Name.starts_with(
"vperm2f128.") ||
185 Name.starts_with(
"vpermil."));
187 if (Name.consume_front(
"avx2."))
188 return (Name ==
"movntdqa" ||
189 Name.starts_with(
"pabs.") ||
190 Name.starts_with(
"padds.") ||
191 Name.starts_with(
"paddus.") ||
192 Name.starts_with(
"pblendd.") ||
194 Name.starts_with(
"pbroadcast") ||
195 Name.starts_with(
"pcmpeq.") ||
196 Name.starts_with(
"pcmpgt.") ||
197 Name.starts_with(
"pmax") ||
198 Name.starts_with(
"pmin") ||
199 Name.starts_with(
"pmovsx") ||
200 Name.starts_with(
"pmovzx") ||
202 Name ==
"pmulu.dq" ||
203 Name.starts_with(
"psll.dq") ||
204 Name.starts_with(
"psrl.dq") ||
205 Name.starts_with(
"psubs.") ||
206 Name.starts_with(
"psubus.") ||
207 Name.starts_with(
"vbroadcast") ||
208 Name ==
"vbroadcasti128" ||
209 Name ==
"vextracti128" ||
210 Name ==
"vinserti128" ||
211 Name ==
"vperm2i128");
213 if (Name.consume_front(
"avx512.")) {
214 if (Name.consume_front(
"mask."))
216 return (Name.starts_with(
"add.p") ||
217 Name.starts_with(
"and.") ||
218 Name.starts_with(
"andn.") ||
219 Name.starts_with(
"broadcast.s") ||
220 Name.starts_with(
"broadcastf32x4.") ||
221 Name.starts_with(
"broadcastf32x8.") ||
222 Name.starts_with(
"broadcastf64x2.") ||
223 Name.starts_with(
"broadcastf64x4.") ||
224 Name.starts_with(
"broadcasti32x4.") ||
225 Name.starts_with(
"broadcasti32x8.") ||
226 Name.starts_with(
"broadcasti64x2.") ||
227 Name.starts_with(
"broadcasti64x4.") ||
228 Name.starts_with(
"cmp.b") ||
229 Name.starts_with(
"cmp.d") ||
230 Name.starts_with(
"cmp.q") ||
231 Name.starts_with(
"cmp.w") ||
232 Name.starts_with(
"compress.b") ||
233 Name.starts_with(
"compress.d") ||
234 Name.starts_with(
"compress.p") ||
235 Name.starts_with(
"compress.q") ||
236 Name.starts_with(
"compress.store.") ||
237 Name.starts_with(
"compress.w") ||
238 Name.starts_with(
"conflict.") ||
239 Name.starts_with(
"cvtdq2pd.") ||
240 Name.starts_with(
"cvtdq2ps.") ||
241 Name ==
"cvtpd2dq.256" ||
242 Name ==
"cvtpd2ps.256" ||
243 Name ==
"cvtps2pd.128" ||
244 Name ==
"cvtps2pd.256" ||
245 Name.starts_with(
"cvtqq2pd.") ||
246 Name ==
"cvtqq2ps.256" ||
247 Name ==
"cvtqq2ps.512" ||
248 Name ==
"cvttpd2dq.256" ||
249 Name ==
"cvttps2dq.128" ||
250 Name ==
"cvttps2dq.256" ||
251 Name.starts_with(
"cvtudq2pd.") ||
252 Name.starts_with(
"cvtudq2ps.") ||
253 Name.starts_with(
"cvtuqq2pd.") ||
254 Name ==
"cvtuqq2ps.256" ||
255 Name ==
"cvtuqq2ps.512" ||
256 Name.starts_with(
"dbpsadbw.") ||
257 Name.starts_with(
"div.p") ||
258 Name.starts_with(
"expand.b") ||
259 Name.starts_with(
"expand.d") ||
260 Name.starts_with(
"expand.load.") ||
261 Name.starts_with(
"expand.p") ||
262 Name.starts_with(
"expand.q") ||
263 Name.starts_with(
"expand.w") ||
264 Name.starts_with(
"fpclass.p") ||
265 Name.starts_with(
"insert") ||
266 Name.starts_with(
"load.") ||
267 Name.starts_with(
"loadu.") ||
268 Name.starts_with(
"lzcnt.") ||
269 Name.starts_with(
"max.p") ||
270 Name.starts_with(
"min.p") ||
271 Name.starts_with(
"movddup") ||
272 Name.starts_with(
"move.s") ||
273 Name.starts_with(
"movshdup") ||
274 Name.starts_with(
"movsldup") ||
275 Name.starts_with(
"mul.p") ||
276 Name.starts_with(
"or.") ||
277 Name.starts_with(
"pabs.") ||
278 Name.starts_with(
"packssdw.") ||
279 Name.starts_with(
"packsswb.") ||
280 Name.starts_with(
"packusdw.") ||
281 Name.starts_with(
"packuswb.") ||
282 Name.starts_with(
"padd.") ||
283 Name.starts_with(
"padds.") ||
284 Name.starts_with(
"paddus.") ||
285 Name.starts_with(
"palignr.") ||
286 Name.starts_with(
"pand.") ||
287 Name.starts_with(
"pandn.") ||
288 Name.starts_with(
"pavg") ||
289 Name.starts_with(
"pbroadcast") ||
290 Name.starts_with(
"pcmpeq.") ||
291 Name.starts_with(
"pcmpgt.") ||
292 Name.starts_with(
"perm.df.") ||
293 Name.starts_with(
"perm.di.") ||
294 Name.starts_with(
"permvar.") ||
295 Name.starts_with(
"pmaddubs.w.") ||
296 Name.starts_with(
"pmaddw.d.") ||
297 Name.starts_with(
"pmax") ||
298 Name.starts_with(
"pmin") ||
299 Name ==
"pmov.qd.256" ||
300 Name ==
"pmov.qd.512" ||
301 Name ==
"pmov.wb.256" ||
302 Name ==
"pmov.wb.512" ||
303 Name.starts_with(
"pmovsx") ||
304 Name.starts_with(
"pmovzx") ||
305 Name.starts_with(
"pmul.dq.") ||
306 Name.starts_with(
"pmul.hr.sw.") ||
307 Name.starts_with(
"pmulh.w.") ||
308 Name.starts_with(
"pmulhu.w.") ||
309 Name.starts_with(
"pmull.") ||
310 Name.starts_with(
"pmultishift.qb.") ||
311 Name.starts_with(
"pmulu.dq.") ||
312 Name.starts_with(
"por.") ||
313 Name.starts_with(
"prol.") ||
314 Name.starts_with(
"prolv.") ||
315 Name.starts_with(
"pror.") ||
316 Name.starts_with(
"prorv.") ||
317 Name.starts_with(
"pshuf.b.") ||
318 Name.starts_with(
"pshuf.d.") ||
319 Name.starts_with(
"pshufh.w.") ||
320 Name.starts_with(
"pshufl.w.") ||
321 Name.starts_with(
"psll.d") ||
322 Name.starts_with(
"psll.q") ||
323 Name.starts_with(
"psll.w") ||
324 Name.starts_with(
"pslli") ||
325 Name.starts_with(
"psllv") ||
326 Name.starts_with(
"psra.d") ||
327 Name.starts_with(
"psra.q") ||
328 Name.starts_with(
"psra.w") ||
329 Name.starts_with(
"psrai") ||
330 Name.starts_with(
"psrav") ||
331 Name.starts_with(
"psrl.d") ||
332 Name.starts_with(
"psrl.q") ||
333 Name.starts_with(
"psrl.w") ||
334 Name.starts_with(
"psrli") ||
335 Name.starts_with(
"psrlv") ||
336 Name.starts_with(
"psub.") ||
337 Name.starts_with(
"psubs.") ||
338 Name.starts_with(
"psubus.") ||
339 Name.starts_with(
"pternlog.") ||
340 Name.starts_with(
"punpckh") ||
341 Name.starts_with(
"punpckl") ||
342 Name.starts_with(
"pxor.") ||
343 Name.starts_with(
"shuf.f") ||
344 Name.starts_with(
"shuf.i") ||
345 Name.starts_with(
"shuf.p") ||
346 Name.starts_with(
"sqrt.p") ||
347 Name.starts_with(
"store.b.") ||
348 Name.starts_with(
"store.d.") ||
349 Name.starts_with(
"store.p") ||
350 Name.starts_with(
"store.q.") ||
351 Name.starts_with(
"store.w.") ||
352 Name ==
"store.ss" ||
353 Name.starts_with(
"storeu.") ||
354 Name.starts_with(
"sub.p") ||
355 Name.starts_with(
"ucmp.") ||
356 Name.starts_with(
"unpckh.") ||
357 Name.starts_with(
"unpckl.") ||
358 Name.starts_with(
"valign.") ||
359 Name ==
"vcvtph2ps.128" ||
360 Name ==
"vcvtph2ps.256" ||
361 Name.starts_with(
"vextract") ||
362 Name.starts_with(
"vfmadd.") ||
363 Name.starts_with(
"vfmaddsub.") ||
364 Name.starts_with(
"vfnmadd.") ||
365 Name.starts_with(
"vfnmsub.") ||
366 Name.starts_with(
"vpdpbusd.") ||
367 Name.starts_with(
"vpdpbusds.") ||
368 Name.starts_with(
"vpdpwssd.") ||
369 Name.starts_with(
"vpdpwssds.") ||
370 Name.starts_with(
"vpermi2var.") ||
371 Name.starts_with(
"vpermil.p") ||
372 Name.starts_with(
"vpermilvar.") ||
373 Name.starts_with(
"vpermt2var.") ||
374 Name.starts_with(
"vpmadd52") ||
375 Name.starts_with(
"vpshld.") ||
376 Name.starts_with(
"vpshldv.") ||
377 Name.starts_with(
"vpshrd.") ||
378 Name.starts_with(
"vpshrdv.") ||
379 Name.starts_with(
"vpshufbitqmb.") ||
380 Name.starts_with(
"xor."));
382 if (Name.consume_front(
"mask3."))
384 return (Name.starts_with(
"vfmadd.") ||
385 Name.starts_with(
"vfmaddsub.") ||
386 Name.starts_with(
"vfmsub.") ||
387 Name.starts_with(
"vfmsubadd.") ||
388 Name.starts_with(
"vfnmsub."));
390 if (Name.consume_front(
"maskz."))
392 return (Name.starts_with(
"pternlog.") ||
393 Name.starts_with(
"vfmadd.") ||
394 Name.starts_with(
"vfmaddsub.") ||
395 Name.starts_with(
"vpdpbusd.") ||
396 Name.starts_with(
"vpdpbusds.") ||
397 Name.starts_with(
"vpdpwssd.") ||
398 Name.starts_with(
"vpdpwssds.") ||
399 Name.starts_with(
"vpermt2var.") ||
400 Name.starts_with(
"vpmadd52") ||
401 Name.starts_with(
"vpshldv.") ||
402 Name.starts_with(
"vpshrdv."));
405 return (Name ==
"movntdqa" ||
406 Name ==
"pmul.dq.512" ||
407 Name ==
"pmulu.dq.512" ||
408 Name.starts_with(
"broadcastm") ||
409 Name.starts_with(
"cmp.p") ||
410 Name.starts_with(
"cvtb2mask.") ||
411 Name.starts_with(
"cvtd2mask.") ||
412 Name.starts_with(
"cvtmask2") ||
413 Name.starts_with(
"cvtq2mask.") ||
414 Name ==
"cvtusi2sd" ||
415 Name.starts_with(
"cvtw2mask.") ||
420 Name ==
"kortestc.w" ||
421 Name ==
"kortestz.w" ||
422 Name.starts_with(
"kunpck") ||
425 Name.starts_with(
"padds.") ||
426 Name.starts_with(
"pbroadcast") ||
427 Name.starts_with(
"prol") ||
428 Name.starts_with(
"pror") ||
429 Name.starts_with(
"psll.dq") ||
430 Name.starts_with(
"psrl.dq") ||
431 Name.starts_with(
"psubs.") ||
432 Name.starts_with(
"ptestm") ||
433 Name.starts_with(
"ptestnm") ||
434 Name.starts_with(
"storent.") ||
435 Name.starts_with(
"vbroadcast.s") ||
436 Name.starts_with(
"vpshld.") ||
437 Name.starts_with(
"vpshrd."));
440 if (Name.consume_front(
"fma."))
441 return (Name.starts_with(
"vfmadd.") ||
442 Name.starts_with(
"vfmsub.") ||
443 Name.starts_with(
"vfmsubadd.") ||
444 Name.starts_with(
"vfnmadd.") ||
445 Name.starts_with(
"vfnmsub."));
447 if (Name.consume_front(
"fma4."))
448 return Name.starts_with(
"vfmadd.s");
450 if (Name.consume_front(
"sse."))
451 return (Name ==
"add.ss" ||
452 Name ==
"cvtsi2ss" ||
453 Name ==
"cvtsi642ss" ||
456 Name.starts_with(
"sqrt.p") ||
458 Name.starts_with(
"storeu.") ||
461 if (Name.consume_front(
"sse2."))
462 return (Name ==
"add.sd" ||
463 Name ==
"cvtdq2pd" ||
464 Name ==
"cvtdq2ps" ||
465 Name ==
"cvtps2pd" ||
466 Name ==
"cvtsi2sd" ||
467 Name ==
"cvtsi642sd" ||
468 Name ==
"cvtss2sd" ||
471 Name.starts_with(
"padds.") ||
472 Name.starts_with(
"paddus.") ||
473 Name.starts_with(
"pcmpeq.") ||
474 Name.starts_with(
"pcmpgt.") ||
479 Name ==
"pmulu.dq" ||
480 Name.starts_with(
"pshuf") ||
481 Name.starts_with(
"psll.dq") ||
482 Name.starts_with(
"psrl.dq") ||
483 Name.starts_with(
"psubs.") ||
484 Name.starts_with(
"psubus.") ||
485 Name.starts_with(
"sqrt.p") ||
487 Name ==
"storel.dq" ||
488 Name.starts_with(
"storeu.") ||
491 if (Name.consume_front(
"sse41."))
492 return (Name.starts_with(
"blendp") ||
493 Name ==
"movntdqa" ||
503 Name.starts_with(
"pmovsx") ||
504 Name.starts_with(
"pmovzx") ||
507 if (Name.consume_front(
"sse42."))
508 return Name ==
"crc32.64.8";
510 if (Name.consume_front(
"sse4a."))
511 return Name.starts_with(
"movnt.");
513 if (Name.consume_front(
"ssse3."))
514 return (Name ==
"pabs.b.128" ||
515 Name ==
"pabs.d.128" ||
516 Name ==
"pabs.w.128");
518 if (Name.consume_front(
"xop."))
519 return (Name ==
"vpcmov" ||
520 Name ==
"vpcmov.256" ||
521 Name.starts_with(
"vpcom") ||
522 Name.starts_with(
"vprot"));
524 return (Name ==
"addcarry.u32" ||
525 Name ==
"addcarry.u64" ||
526 Name ==
"addcarryx.u32" ||
527 Name ==
"addcarryx.u64" ||
528 Name ==
"subborrow.u32" ||
529 Name ==
"subborrow.u64" ||
530 Name.starts_with(
"vcvtph2ps."));
536 if (!Name.consume_front(
"x86."))
544 if (Name ==
"rdtscp") {
546 if (
F->getFunctionType()->getNumParams() == 0)
551 Intrinsic::x86_rdtscp);
558 if (Name.consume_front(
"sse41.ptest")) {
560 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
561 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
562 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
575 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
576 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
577 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
578 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
579 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
580 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
585 if (Name.consume_front(
"avx512.")) {
586 if (Name.consume_front(
"mask.cmp.")) {
589 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
590 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
591 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
592 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
593 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
594 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 }
else if (Name.starts_with(
"vpdpbusd.") ||
599 Name.starts_with(
"vpdpbusds.")) {
602 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
603 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
604 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
605 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
606 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
607 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 }
else if (Name.starts_with(
"vpdpwssd.") ||
612 Name.starts_with(
"vpdpwssds.")) {
615 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
616 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
617 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
618 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
619 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
620 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
628 if (Name.consume_front(
"avx2.")) {
629 if (Name.consume_front(
"vpdpb")) {
632 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
633 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
634 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
635 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
636 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
637 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
638 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
639 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
640 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
641 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
642 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
643 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 }
else if (Name.consume_front(
"vpdpw")) {
650 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
651 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
652 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
653 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
654 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
655 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
656 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
657 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
658 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
659 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
660 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
661 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
669 if (Name.consume_front(
"avx10.")) {
670 if (Name.consume_front(
"vpdpb")) {
673 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
674 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
675 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
676 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
677 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
678 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 }
else if (Name.consume_front(
"vpdpw")) {
684 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
685 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
686 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
687 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
697 if (Name.consume_front(
"avx512bf16.")) {
700 .
Case(
"cvtne2ps2bf16.128",
701 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
702 .
Case(
"cvtne2ps2bf16.256",
703 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
704 .
Case(
"cvtne2ps2bf16.512",
705 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
706 .
Case(
"mask.cvtneps2bf16.128",
707 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
708 .
Case(
"cvtneps2bf16.256",
709 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
710 .
Case(
"cvtneps2bf16.512",
711 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
718 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
719 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
720 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
727 if (Name.consume_front(
"xop.")) {
729 if (Name.starts_with(
"vpermil2")) {
732 auto Idx =
F->getFunctionType()->getParamType(2);
733 if (Idx->isFPOrFPVectorTy()) {
734 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
735 unsigned EltSize = Idx->getScalarSizeInBits();
736 if (EltSize == 64 && IdxSize == 128)
737 ID = Intrinsic::x86_xop_vpermil2pd;
738 else if (EltSize == 32 && IdxSize == 128)
739 ID = Intrinsic::x86_xop_vpermil2ps;
740 else if (EltSize == 64 && IdxSize == 256)
741 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
else if (
F->arg_size() == 2)
748 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
749 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
760 if (Name ==
"seh.recoverfp") {
762 Intrinsic::eh_recoverfp);
774 if (Name.starts_with(
"rbit")) {
777 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
781 if (Name ==
"thread.pointer") {
784 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
788 bool Neon = Name.consume_front(
"neon.");
793 if (Name.consume_front(
"bfdot.")) {
797 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
802 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
803 assert((OperandWidth == 64 || OperandWidth == 128) &&
804 "Unexpected operand width");
806 std::array<Type *, 2> Tys{
817 if (Name.consume_front(
"bfm")) {
819 if (Name.consume_back(
".v4f32.v16i8")) {
865 F->arg_begin()->getType());
869 if (Name.consume_front(
"vst")) {
871 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
875 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
876 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
879 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
880 Intrinsic::arm_neon_vst4lane};
882 auto fArgs =
F->getFunctionType()->params();
883 Type *Tys[] = {fArgs[0], fArgs[1]};
886 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
889 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
898 if (Name.consume_front(
"mve.")) {
900 if (Name ==
"vctp64") {
910 if (Name.starts_with(
"vrintn.v")) {
912 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
917 if (Name.consume_back(
".v4i1")) {
919 if (Name.consume_back(
".predicated.v2i64.v4i32"))
921 return Name ==
"mull.int" || Name ==
"vqdmull";
923 if (Name.consume_back(
".v2i64")) {
925 bool IsGather = Name.consume_front(
"vldr.gather.");
926 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
927 if (Name.consume_front(
"base.")) {
929 Name.consume_front(
"wb.");
932 return Name ==
"predicated.v2i64";
935 if (Name.consume_front(
"offset.predicated."))
936 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
937 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
950 if (Name.consume_front(
"cde.vcx")) {
952 if (Name.consume_back(
".predicated.v2i64.v4i1"))
954 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
955 Name ==
"3q" || Name ==
"3qa";
969 F->arg_begin()->getType());
973 if (Name.starts_with(
"addp")) {
975 if (
F->arg_size() != 2)
978 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
980 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
986 if (Name.starts_with(
"bfcvt")) {
993 if (Name.consume_front(
"sve.")) {
995 if (Name.consume_front(
"bf")) {
996 if (Name.consume_back(
".lane")) {
1000 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1001 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1002 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1014 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1019 if (Name.consume_front(
"addqv")) {
1021 if (!
F->getReturnType()->isFPOrFPVectorTy())
1024 auto Args =
F->getFunctionType()->params();
1025 Type *Tys[] = {
F->getReturnType(), Args[1]};
1027 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1031 if (Name.consume_front(
"ld")) {
1033 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1034 if (LdRegex.
match(Name)) {
1041 Intrinsic::aarch64_sve_ld2_sret,
1042 Intrinsic::aarch64_sve_ld3_sret,
1043 Intrinsic::aarch64_sve_ld4_sret,
1046 LoadIDs[Name[0] -
'2'], Ty);
1052 if (Name.consume_front(
"tuple.")) {
1054 if (Name.starts_with(
"get")) {
1056 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1058 F->getParent(), Intrinsic::vector_extract, Tys);
1062 if (Name.starts_with(
"set")) {
1064 auto Args =
F->getFunctionType()->params();
1065 Type *Tys[] = {Args[0], Args[2], Args[1]};
1067 F->getParent(), Intrinsic::vector_insert, Tys);
1071 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1072 if (CreateTupleRegex.
match(Name)) {
1074 auto Args =
F->getFunctionType()->params();
1075 Type *Tys[] = {
F->getReturnType(), Args[1]};
1077 F->getParent(), Intrinsic::vector_insert, Tys);
1083 if (Name.starts_with(
"rev.nxv")) {
1086 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1098 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1102 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1107 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1108 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1109 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1110 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1111 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1120 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1134 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1135 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1145 if (Name.consume_front(
"mapa.shared.cluster"))
1146 if (
F->getReturnType()->getPointerAddressSpace() ==
1148 return Intrinsic::nvvm_mapa_shared_cluster;
1150 if (Name.consume_front(
"cp.async.bulk.")) {
1153 .
Case(
"global.to.shared.cluster",
1154 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1155 .
Case(
"shared.cta.to.cluster",
1156 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1160 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1169 if (Name.consume_front(
"fma.rn."))
1171 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1172 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1173 .
Case(
"ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1174 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1175 .
Case(
"ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1176 .
Case(
"ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1177 .
Case(
"ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1178 .
Case(
"ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1179 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1180 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1181 .
Case(
"sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1182 .
Case(
"sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1185 if (Name.consume_front(
"fmax."))
1187 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1188 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1189 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1190 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1191 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1192 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1193 .
Case(
"ftz.nan.xorsign.abs.bf16",
1194 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1195 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1196 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1197 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1198 .
Case(
"ftz.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1200 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1201 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1202 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1203 .
Case(
"nan.xorsign.abs.bf16x2",
1204 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1205 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1206 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1209 if (Name.consume_front(
"fmin."))
1211 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1212 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1213 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1214 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1215 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1216 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1217 .
Case(
"ftz.nan.xorsign.abs.bf16",
1218 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1219 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1220 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1221 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1222 .
Case(
"ftz.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1224 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1225 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1226 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1227 .
Case(
"nan.xorsign.abs.bf16x2",
1228 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1229 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1230 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1233 if (Name.consume_front(
"neg."))
1235 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1236 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1243 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1244 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1245 Name.consume_front(
"param");
1249 bool CanUpgradeDebugIntrinsicsToRecords) {
1250 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1255 if (!Name.consume_front(
"llvm.") || Name.empty())
1261 bool IsArm = Name.consume_front(
"arm.");
1262 if (IsArm || Name.consume_front(
"aarch64.")) {
1268 if (Name.consume_front(
"amdgcn.")) {
1269 if (Name ==
"alignbit") {
1272 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1276 if (Name.consume_front(
"atomic.")) {
1277 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1278 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1287 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1288 Name.consume_front(
"flat.atomic.")) {
1289 if (Name.starts_with(
"fadd") ||
1291 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1292 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1300 if (Name.starts_with(
"ldexp.")) {
1303 F->getParent(), Intrinsic::ldexp,
1304 {F->getReturnType(), F->getArg(1)->getType()});
1313 if (
F->arg_size() == 1) {
1321 F->arg_begin()->getType());
1326 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1329 Intrinsic::coro_end);
1336 if (Name.consume_front(
"dbg.")) {
1338 if (CanUpgradeDebugIntrinsicsToRecords) {
1339 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1340 Name ==
"declare" || Name ==
"label") {
1349 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1352 Intrinsic::dbg_value);
1359 if (Name.consume_front(
"experimental.vector.")) {
1365 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1366 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1367 .
StartsWith(
"splice.", Intrinsic::vector_splice)
1368 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1369 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1370 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1372 Intrinsic::vector_partial_reduce_add)
1375 const auto *FT =
F->getFunctionType();
1377 if (
ID == Intrinsic::vector_extract ||
1378 ID == Intrinsic::vector_interleave2)
1381 if (
ID != Intrinsic::vector_interleave2)
1383 if (
ID == Intrinsic::vector_insert ||
1384 ID == Intrinsic::vector_partial_reduce_add)
1392 if (Name.consume_front(
"reduce.")) {
1394 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1395 if (R.match(Name, &
Groups))
1397 .
Case(
"add", Intrinsic::vector_reduce_add)
1398 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1399 .
Case(
"and", Intrinsic::vector_reduce_and)
1400 .
Case(
"or", Intrinsic::vector_reduce_or)
1401 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1402 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1403 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1404 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1405 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1406 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1407 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1412 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1417 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1418 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1423 auto Args =
F->getFunctionType()->params();
1425 {Args[V2 ? 1 : 0]});
1432 if (Name.consume_front(
"experimental.stepvector.")) {
1436 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1441 if (Name.starts_with(
"flt.rounds")) {
1444 Intrinsic::get_rounding);
1449 if (Name.starts_with(
"invariant.group.barrier")) {
1451 auto Args =
F->getFunctionType()->params();
1452 Type* ObjectPtr[1] = {Args[0]};
1455 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1460 if ((Name.starts_with(
"lifetime.start") ||
1461 Name.starts_with(
"lifetime.end")) &&
1462 F->arg_size() == 2) {
1464 ? Intrinsic::lifetime_start
1465 : Intrinsic::lifetime_end;
1468 F->getArg(0)->getType());
1477 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1478 .StartsWith(
"memmove.", Intrinsic::memmove)
1480 if (
F->arg_size() == 5) {
1484 F->getFunctionType()->params().slice(0, 3);
1490 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1493 const auto *FT =
F->getFunctionType();
1494 Type *ParamTypes[2] = {
1495 FT->getParamType(0),
1499 Intrinsic::memset, ParamTypes);
1505 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1506 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1507 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1508 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1510 if (MaskedID &&
F->arg_size() == 4) {
1512 if (MaskedID == Intrinsic::masked_load ||
1513 MaskedID == Intrinsic::masked_gather) {
1515 F->getParent(), MaskedID,
1516 {F->getReturnType(), F->getArg(0)->getType()});
1520 F->getParent(), MaskedID,
1521 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1527 if (Name.consume_front(
"nvvm.")) {
1529 if (
F->arg_size() == 1) {
1532 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1533 .Case(
"clz.i", Intrinsic::ctlz)
1534 .
Case(
"popc.i", Intrinsic::ctpop)
1538 {F->getReturnType()});
1544 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1572 bool Expand =
false;
1573 if (Name.consume_front(
"abs."))
1576 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1577 else if (Name.consume_front(
"fabs."))
1579 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1580 else if (Name.consume_front(
"ex2.approx."))
1583 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1584 else if (Name.consume_front(
"max.") || Name.consume_front(
"min."))
1586 Expand = Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
1587 Name ==
"ui" || Name ==
"ull";
1588 else if (Name.consume_front(
"atomic.load."))
1597 else if (Name.consume_front(
"bitcast."))
1600 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1601 else if (Name.consume_front(
"rotate."))
1603 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1604 else if (Name.consume_front(
"ptr.gen.to."))
1607 else if (Name.consume_front(
"ptr."))
1610 else if (Name.consume_front(
"ldg.global."))
1612 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1613 Name.starts_with(
"p."));
1616 .
Case(
"barrier0",
true)
1617 .
Case(
"barrier.n",
true)
1618 .
Case(
"barrier.sync.cnt",
true)
1619 .
Case(
"barrier.sync",
true)
1620 .
Case(
"barrier",
true)
1621 .
Case(
"bar.sync",
true)
1622 .
Case(
"barrier0.popc",
true)
1623 .
Case(
"barrier0.and",
true)
1624 .
Case(
"barrier0.or",
true)
1625 .
Case(
"clz.ll",
true)
1626 .
Case(
"popc.ll",
true)
1628 .
Case(
"swap.lo.hi.b64",
true)
1629 .
Case(
"tanh.approx.f32",
true)
1641 if (Name.starts_with(
"objectsize.")) {
1642 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1643 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1646 Intrinsic::objectsize, Tys);
1653 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1656 F->getParent(), Intrinsic::ptr_annotation,
1657 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1663 if (Name.consume_front(
"riscv.")) {
1666 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1667 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1668 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1669 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1672 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1685 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1686 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1695 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1696 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1697 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1698 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1703 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1715 if (Name ==
"stackprotectorcheck") {
1722 if (Name ==
"thread.pointer") {
1724 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1730 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1733 F->getParent(), Intrinsic::var_annotation,
1734 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1741 if (Name.consume_front(
"wasm.")) {
1744 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1745 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1746 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1751 F->getReturnType());
1755 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1757 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1759 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1778 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1787 auto *FT =
F->getFunctionType();
1790 std::string
Name =
F->getName().str();
1793 Name,
F->getParent());
1804 if (Result != std::nullopt) {
1817 bool CanUpgradeDebugIntrinsicsToRecords) {
1837 GV->
getName() ==
"llvm.global_dtors")) ||
1852 unsigned N =
Init->getNumOperands();
1853 std::vector<Constant *> NewCtors(
N);
1854 for (
unsigned i = 0; i !=
N; ++i) {
1857 Ctor->getAggregateElement(1),
1871 unsigned NumElts = ResultTy->getNumElements() * 8;
1875 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1885 for (
unsigned l = 0; l != NumElts; l += 16)
1886 for (
unsigned i = 0; i != 16; ++i) {
1887 unsigned Idx = NumElts + i - Shift;
1889 Idx -= NumElts - 16;
1890 Idxs[l + i] = Idx + l;
1893 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1897 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1905 unsigned NumElts = ResultTy->getNumElements() * 8;
1909 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1919 for (
unsigned l = 0; l != NumElts; l += 16)
1920 for (
unsigned i = 0; i != 16; ++i) {
1921 unsigned Idx = i + Shift;
1923 Idx += NumElts - 16;
1924 Idxs[l + i] = Idx + l;
1927 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1931 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1939 Mask = Builder.CreateBitCast(Mask, MaskTy);
1945 for (
unsigned i = 0; i != NumElts; ++i)
1947 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
1958 if (
C->isAllOnesValue())
1963 return Builder.CreateSelect(Mask, Op0, Op1);
1970 if (
C->isAllOnesValue())
1974 Mask->getType()->getIntegerBitWidth());
1975 Mask = Builder.CreateBitCast(Mask, MaskTy);
1976 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
1977 return Builder.CreateSelect(Mask, Op0, Op1);
1990 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
1991 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
1996 ShiftVal &= (NumElts - 1);
2005 if (ShiftVal > 16) {
2013 for (
unsigned l = 0; l < NumElts; l += 16) {
2014 for (
unsigned i = 0; i != 16; ++i) {
2015 unsigned Idx = ShiftVal + i;
2016 if (!IsVALIGN && Idx >= 16)
2017 Idx += NumElts - 16;
2018 Indices[l + i] = Idx + l;
2023 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2029 bool ZeroMask,
bool IndexForm) {
2032 unsigned EltWidth = Ty->getScalarSizeInBits();
2033 bool IsFloat = Ty->isFPOrFPVectorTy();
2035 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2036 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2037 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2038 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2039 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2040 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2041 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2042 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2043 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2044 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2045 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2046 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2047 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2048 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2049 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2050 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2051 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2052 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2053 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2054 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2055 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2056 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2057 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2058 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2059 else if (VecWidth == 128 && EltWidth == 16)
2060 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2061 else if (VecWidth == 256 && EltWidth == 16)
2062 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2063 else if (VecWidth == 512 && EltWidth == 16)
2064 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2065 else if (VecWidth == 128 && EltWidth == 8)
2066 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2067 else if (VecWidth == 256 && EltWidth == 8)
2068 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2069 else if (VecWidth == 512 && EltWidth == 8)
2070 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2081 Value *V = Builder.CreateIntrinsic(IID, Args);
2093 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2104 bool IsRotateRight) {
2114 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2115 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2118 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2119 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2164 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2169 bool IsShiftRight,
bool ZeroMask) {
2183 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2184 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2187 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2188 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2203 const Align Alignment =
2205 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2210 if (
C->isAllOnesValue())
2211 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2216 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2222 const Align Alignment =
2231 if (
C->isAllOnesValue())
2232 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2237 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2243 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2244 {Op0, Builder.getInt1(
false)});
2259 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2260 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2261 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2262 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2263 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2266 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2267 LHS = Builder.CreateAnd(
LHS, Mask);
2268 RHS = Builder.CreateAnd(
RHS, Mask);
2285 if (!
C || !
C->isAllOnesValue())
2286 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2291 for (
unsigned i = 0; i != NumElts; ++i)
2293 for (
unsigned i = NumElts; i != 8; ++i)
2294 Indices[i] = NumElts + i % NumElts;
2295 Vec = Builder.CreateShuffleVector(Vec,
2299 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2303 unsigned CC,
bool Signed) {
2311 }
else if (CC == 7) {
2347 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2348 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2350 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2351 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2360 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2366 Name = Name.substr(12);
2371 if (Name.starts_with(
"max.p")) {
2372 if (VecWidth == 128 && EltWidth == 32)
2373 IID = Intrinsic::x86_sse_max_ps;
2374 else if (VecWidth == 128 && EltWidth == 64)
2375 IID = Intrinsic::x86_sse2_max_pd;
2376 else if (VecWidth == 256 && EltWidth == 32)
2377 IID = Intrinsic::x86_avx_max_ps_256;
2378 else if (VecWidth == 256 && EltWidth == 64)
2379 IID = Intrinsic::x86_avx_max_pd_256;
2382 }
else if (Name.starts_with(
"min.p")) {
2383 if (VecWidth == 128 && EltWidth == 32)
2384 IID = Intrinsic::x86_sse_min_ps;
2385 else if (VecWidth == 128 && EltWidth == 64)
2386 IID = Intrinsic::x86_sse2_min_pd;
2387 else if (VecWidth == 256 && EltWidth == 32)
2388 IID = Intrinsic::x86_avx_min_ps_256;
2389 else if (VecWidth == 256 && EltWidth == 64)
2390 IID = Intrinsic::x86_avx_min_pd_256;
2393 }
else if (Name.starts_with(
"pshuf.b.")) {
2394 if (VecWidth == 128)
2395 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2396 else if (VecWidth == 256)
2397 IID = Intrinsic::x86_avx2_pshuf_b;
2398 else if (VecWidth == 512)
2399 IID = Intrinsic::x86_avx512_pshuf_b_512;
2402 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2403 if (VecWidth == 128)
2404 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2405 else if (VecWidth == 256)
2406 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2407 else if (VecWidth == 512)
2408 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2411 }
else if (Name.starts_with(
"pmulh.w.")) {
2412 if (VecWidth == 128)
2413 IID = Intrinsic::x86_sse2_pmulh_w;
2414 else if (VecWidth == 256)
2415 IID = Intrinsic::x86_avx2_pmulh_w;
2416 else if (VecWidth == 512)
2417 IID = Intrinsic::x86_avx512_pmulh_w_512;
2420 }
else if (Name.starts_with(
"pmulhu.w.")) {
2421 if (VecWidth == 128)
2422 IID = Intrinsic::x86_sse2_pmulhu_w;
2423 else if (VecWidth == 256)
2424 IID = Intrinsic::x86_avx2_pmulhu_w;
2425 else if (VecWidth == 512)
2426 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2429 }
else if (Name.starts_with(
"pmaddw.d.")) {
2430 if (VecWidth == 128)
2431 IID = Intrinsic::x86_sse2_pmadd_wd;
2432 else if (VecWidth == 256)
2433 IID = Intrinsic::x86_avx2_pmadd_wd;
2434 else if (VecWidth == 512)
2435 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2438 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2439 if (VecWidth == 128)
2440 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2441 else if (VecWidth == 256)
2442 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2443 else if (VecWidth == 512)
2444 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2447 }
else if (Name.starts_with(
"packsswb.")) {
2448 if (VecWidth == 128)
2449 IID = Intrinsic::x86_sse2_packsswb_128;
2450 else if (VecWidth == 256)
2451 IID = Intrinsic::x86_avx2_packsswb;
2452 else if (VecWidth == 512)
2453 IID = Intrinsic::x86_avx512_packsswb_512;
2456 }
else if (Name.starts_with(
"packssdw.")) {
2457 if (VecWidth == 128)
2458 IID = Intrinsic::x86_sse2_packssdw_128;
2459 else if (VecWidth == 256)
2460 IID = Intrinsic::x86_avx2_packssdw;
2461 else if (VecWidth == 512)
2462 IID = Intrinsic::x86_avx512_packssdw_512;
2465 }
else if (Name.starts_with(
"packuswb.")) {
2466 if (VecWidth == 128)
2467 IID = Intrinsic::x86_sse2_packuswb_128;
2468 else if (VecWidth == 256)
2469 IID = Intrinsic::x86_avx2_packuswb;
2470 else if (VecWidth == 512)
2471 IID = Intrinsic::x86_avx512_packuswb_512;
2474 }
else if (Name.starts_with(
"packusdw.")) {
2475 if (VecWidth == 128)
2476 IID = Intrinsic::x86_sse41_packusdw;
2477 else if (VecWidth == 256)
2478 IID = Intrinsic::x86_avx2_packusdw;
2479 else if (VecWidth == 512)
2480 IID = Intrinsic::x86_avx512_packusdw_512;
2483 }
else if (Name.starts_with(
"vpermilvar.")) {
2484 if (VecWidth == 128 && EltWidth == 32)
2485 IID = Intrinsic::x86_avx_vpermilvar_ps;
2486 else if (VecWidth == 128 && EltWidth == 64)
2487 IID = Intrinsic::x86_avx_vpermilvar_pd;
2488 else if (VecWidth == 256 && EltWidth == 32)
2489 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2490 else if (VecWidth == 256 && EltWidth == 64)
2491 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2492 else if (VecWidth == 512 && EltWidth == 32)
2493 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2494 else if (VecWidth == 512 && EltWidth == 64)
2495 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2498 }
else if (Name ==
"cvtpd2dq.256") {
2499 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2500 }
else if (Name ==
"cvtpd2ps.256") {
2501 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2502 }
else if (Name ==
"cvttpd2dq.256") {
2503 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2504 }
else if (Name ==
"cvttps2dq.128") {
2505 IID = Intrinsic::x86_sse2_cvttps2dq;
2506 }
else if (Name ==
"cvttps2dq.256") {
2507 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2508 }
else if (Name.starts_with(
"permvar.")) {
2510 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2511 IID = Intrinsic::x86_avx2_permps;
2512 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2513 IID = Intrinsic::x86_avx2_permd;
2514 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2515 IID = Intrinsic::x86_avx512_permvar_df_256;
2516 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2517 IID = Intrinsic::x86_avx512_permvar_di_256;
2518 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2519 IID = Intrinsic::x86_avx512_permvar_sf_512;
2520 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2521 IID = Intrinsic::x86_avx512_permvar_si_512;
2522 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2523 IID = Intrinsic::x86_avx512_permvar_df_512;
2524 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2525 IID = Intrinsic::x86_avx512_permvar_di_512;
2526 else if (VecWidth == 128 && EltWidth == 16)
2527 IID = Intrinsic::x86_avx512_permvar_hi_128;
2528 else if (VecWidth == 256 && EltWidth == 16)
2529 IID = Intrinsic::x86_avx512_permvar_hi_256;
2530 else if (VecWidth == 512 && EltWidth == 16)
2531 IID = Intrinsic::x86_avx512_permvar_hi_512;
2532 else if (VecWidth == 128 && EltWidth == 8)
2533 IID = Intrinsic::x86_avx512_permvar_qi_128;
2534 else if (VecWidth == 256 && EltWidth == 8)
2535 IID = Intrinsic::x86_avx512_permvar_qi_256;
2536 else if (VecWidth == 512 && EltWidth == 8)
2537 IID = Intrinsic::x86_avx512_permvar_qi_512;
2540 }
else if (Name.starts_with(
"dbpsadbw.")) {
2541 if (VecWidth == 128)
2542 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2543 else if (VecWidth == 256)
2544 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2545 else if (VecWidth == 512)
2546 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2549 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2550 if (VecWidth == 128)
2551 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2552 else if (VecWidth == 256)
2553 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2554 else if (VecWidth == 512)
2555 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2558 }
else if (Name.starts_with(
"conflict.")) {
2559 if (Name[9] ==
'd' && VecWidth == 128)
2560 IID = Intrinsic::x86_avx512_conflict_d_128;
2561 else if (Name[9] ==
'd' && VecWidth == 256)
2562 IID = Intrinsic::x86_avx512_conflict_d_256;
2563 else if (Name[9] ==
'd' && VecWidth == 512)
2564 IID = Intrinsic::x86_avx512_conflict_d_512;
2565 else if (Name[9] ==
'q' && VecWidth == 128)
2566 IID = Intrinsic::x86_avx512_conflict_q_128;
2567 else if (Name[9] ==
'q' && VecWidth == 256)
2568 IID = Intrinsic::x86_avx512_conflict_q_256;
2569 else if (Name[9] ==
'q' && VecWidth == 512)
2570 IID = Intrinsic::x86_avx512_conflict_q_512;
2573 }
else if (Name.starts_with(
"pavg.")) {
2574 if (Name[5] ==
'b' && VecWidth == 128)
2575 IID = Intrinsic::x86_sse2_pavg_b;
2576 else if (Name[5] ==
'b' && VecWidth == 256)
2577 IID = Intrinsic::x86_avx2_pavg_b;
2578 else if (Name[5] ==
'b' && VecWidth == 512)
2579 IID = Intrinsic::x86_avx512_pavg_b_512;
2580 else if (Name[5] ==
'w' && VecWidth == 128)
2581 IID = Intrinsic::x86_sse2_pavg_w;
2582 else if (Name[5] ==
'w' && VecWidth == 256)
2583 IID = Intrinsic::x86_avx2_pavg_w;
2584 else if (Name[5] ==
'w' && VecWidth == 512)
2585 IID = Intrinsic::x86_avx512_pavg_w_512;
2594 Rep = Builder.CreateIntrinsic(IID, Args);
2605 if (AsmStr->find(
"mov\tfp") == 0 &&
2606 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2607 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2608 AsmStr->replace(Pos, 1,
";");
2614 Value *Rep =
nullptr;
2616 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2618 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2619 Value *Cmp = Builder.CreateICmpSGE(
2621 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2622 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2623 Type *Ty = (Name ==
"abs.bf16")
2627 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2628 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2629 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2630 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2631 : Intrinsic::nvvm_fabs;
2632 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2633 }
else if (Name.consume_front(
"ex2.approx.")) {
2635 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2636 : Intrinsic::nvvm_ex2_approx;
2637 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2638 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2639 Name.starts_with(
"atomic.load.add.f64.p")) {
2644 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2645 Name.starts_with(
"atomic.load.dec.32.p")) {
2650 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2652 }
else if (Name.consume_front(
"max.") &&
2653 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2654 Name ==
"ui" || Name ==
"ull")) {
2657 Value *Cmp = Name.starts_with(
"u")
2658 ? Builder.CreateICmpUGE(Arg0, Arg1,
"max.cond")
2659 : Builder.CreateICmpSGE(Arg0, Arg1,
"max.cond");
2660 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"max");
2661 }
else if (Name.consume_front(
"min.") &&
2662 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2663 Name ==
"ui" || Name ==
"ull")) {
2666 Value *Cmp = Name.starts_with(
"u")
2667 ? Builder.CreateICmpULE(Arg0, Arg1,
"min.cond")
2668 : Builder.CreateICmpSLE(Arg0, Arg1,
"min.cond");
2669 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"min");
2670 }
else if (Name ==
"clz.ll") {
2673 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2674 {Arg, Builder.getFalse()},
2676 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2677 }
else if (Name ==
"popc.ll") {
2681 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2682 Arg,
nullptr,
"ctpop");
2683 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2684 }
else if (Name ==
"h2f") {
2685 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2688 }
else if (Name.consume_front(
"bitcast.") &&
2689 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2692 }
else if (Name ==
"rotate.b32") {
2695 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2696 {Arg, Arg, ShiftAmt});
2697 }
else if (Name ==
"rotate.b64") {
2701 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2702 {Arg, Arg, ZExtShiftAmt});
2703 }
else if (Name ==
"rotate.right.b64") {
2707 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2708 {Arg, Arg, ZExtShiftAmt});
2709 }
else if (Name ==
"swap.lo.hi.b64") {
2712 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2713 {Arg, Arg, Builder.getInt64(32)});
2714 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2717 Name.starts_with(
".to.gen"))) {
2719 }
else if (Name.consume_front(
"ldg.global")) {
2723 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2726 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2728 }
else if (Name ==
"tanh.approx.f32") {
2732 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2734 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2736 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2737 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2739 }
else if (Name ==
"barrier") {
2740 Rep = Builder.CreateIntrinsic(
2741 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2743 }
else if (Name ==
"barrier.sync") {
2744 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2746 }
else if (Name ==
"barrier.sync.cnt") {
2747 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2749 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2750 Name ==
"barrier0.or") {
2752 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2756 .
Case(
"barrier0.popc",
2757 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2758 .
Case(
"barrier0.and",
2759 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2760 .
Case(
"barrier0.or",
2761 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2762 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2763 Rep = Builder.CreateZExt(Bar, CI->
getType());
2767 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2777 ? Builder.CreateBitCast(Arg, NewType)
2780 Rep = Builder.CreateCall(NewFn, Args);
2781 if (
F->getReturnType()->isIntegerTy())
2782 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2792 Value *Rep =
nullptr;
2794 if (Name.starts_with(
"sse4a.movnt.")) {
2806 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2809 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2810 }
else if (Name.starts_with(
"avx.movnt.") ||
2811 Name.starts_with(
"avx512.storent.")) {
2823 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2824 }
else if (Name ==
"sse2.storel.dq") {
2829 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2830 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2831 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2832 }
else if (Name.starts_with(
"sse.storeu.") ||
2833 Name.starts_with(
"sse2.storeu.") ||
2834 Name.starts_with(
"avx.storeu.")) {
2837 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2838 }
else if (Name ==
"avx512.mask.store.ss") {
2842 }
else if (Name.starts_with(
"avx512.mask.store")) {
2844 bool Aligned = Name[17] !=
'u';
2847 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2850 bool CmpEq = Name[9] ==
'e';
2853 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2854 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2861 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2862 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2864 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2865 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2866 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2867 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2868 Name.starts_with(
"sse2.sqrt.p") ||
2869 Name.starts_with(
"sse.sqrt.p")) {
2870 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2871 {CI->getArgOperand(0)});
2872 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2876 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2877 : Intrinsic::x86_avx512_sqrt_pd_512;
2880 Rep = Builder.CreateIntrinsic(IID, Args);
2882 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2883 {CI->getArgOperand(0)});
2887 }
else if (Name.starts_with(
"avx512.ptestm") ||
2888 Name.starts_with(
"avx512.ptestnm")) {
2892 Rep = Builder.CreateAnd(Op0, Op1);
2898 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2900 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2903 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2906 }
else if (Name.starts_with(
"avx512.kunpck")) {
2911 for (
unsigned i = 0; i != NumElts; ++i)
2920 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2921 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2922 }
else if (Name ==
"avx512.kand.w") {
2925 Rep = Builder.CreateAnd(
LHS,
RHS);
2926 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2927 }
else if (Name ==
"avx512.kandn.w") {
2930 LHS = Builder.CreateNot(
LHS);
2931 Rep = Builder.CreateAnd(
LHS,
RHS);
2932 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2933 }
else if (Name ==
"avx512.kor.w") {
2936 Rep = Builder.CreateOr(
LHS,
RHS);
2937 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2938 }
else if (Name ==
"avx512.kxor.w") {
2941 Rep = Builder.CreateXor(
LHS,
RHS);
2942 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2943 }
else if (Name ==
"avx512.kxnor.w") {
2946 LHS = Builder.CreateNot(
LHS);
2947 Rep = Builder.CreateXor(
LHS,
RHS);
2948 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2949 }
else if (Name ==
"avx512.knot.w") {
2951 Rep = Builder.CreateNot(Rep);
2952 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2953 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2956 Rep = Builder.CreateOr(
LHS,
RHS);
2957 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2959 if (Name[14] ==
'c')
2963 Rep = Builder.CreateICmpEQ(Rep,
C);
2964 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2965 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
2966 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
2967 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
2968 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
2971 ConstantInt::get(I32Ty, 0));
2973 ConstantInt::get(I32Ty, 0));
2975 if (Name.contains(
".add."))
2976 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2977 else if (Name.contains(
".sub."))
2978 EltOp = Builder.CreateFSub(Elt0, Elt1);
2979 else if (Name.contains(
".mul."))
2980 EltOp = Builder.CreateFMul(Elt0, Elt1);
2982 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2983 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
2984 ConstantInt::get(I32Ty, 0));
2985 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
2987 bool CmpEq = Name[16] ==
'e';
2989 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
2997 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3000 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3003 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3010 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3015 if (VecWidth == 128 && EltWidth == 32)
3016 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3017 else if (VecWidth == 256 && EltWidth == 32)
3018 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3019 else if (VecWidth == 512 && EltWidth == 32)
3020 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3021 else if (VecWidth == 128 && EltWidth == 64)
3022 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3023 else if (VecWidth == 256 && EltWidth == 64)
3024 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3025 else if (VecWidth == 512 && EltWidth == 64)
3026 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3033 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3035 Type *OpTy = Args[0]->getType();
3039 if (VecWidth == 128 && EltWidth == 32)
3040 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3041 else if (VecWidth == 256 && EltWidth == 32)
3042 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3043 else if (VecWidth == 512 && EltWidth == 32)
3044 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3045 else if (VecWidth == 128 && EltWidth == 64)
3046 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3047 else if (VecWidth == 256 && EltWidth == 64)
3048 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3049 else if (VecWidth == 512 && EltWidth == 64)
3050 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3055 if (VecWidth == 512)
3057 Args.push_back(Mask);
3059 Rep = Builder.CreateIntrinsic(IID, Args);
3060 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3064 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3067 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3068 Name.starts_with(
"avx512.cvtw2mask.") ||
3069 Name.starts_with(
"avx512.cvtd2mask.") ||
3070 Name.starts_with(
"avx512.cvtq2mask.")) {
3075 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3076 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3077 Name.starts_with(
"avx512.mask.pabs")) {
3079 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3080 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3081 Name.starts_with(
"avx512.mask.pmaxs")) {
3083 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3084 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3085 Name.starts_with(
"avx512.mask.pmaxu")) {
3087 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3088 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3089 Name.starts_with(
"avx512.mask.pmins")) {
3091 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3092 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3093 Name.starts_with(
"avx512.mask.pminu")) {
3095 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3096 Name ==
"avx512.pmulu.dq.512" ||
3097 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3099 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3100 Name ==
"avx512.pmul.dq.512" ||
3101 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3103 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3104 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3109 }
else if (Name ==
"avx512.cvtusi2sd") {
3114 }
else if (Name ==
"sse2.cvtss2sd") {
3116 Rep = Builder.CreateFPExt(
3119 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3120 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3121 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3122 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3123 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3124 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3125 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3126 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3127 Name ==
"avx512.mask.cvtqq2ps.256" ||
3128 Name ==
"avx512.mask.cvtqq2ps.512" ||
3129 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3130 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3131 Name ==
"avx.cvt.ps2.pd.256" ||
3132 Name ==
"avx512.mask.cvtps2pd.128" ||
3133 Name ==
"avx512.mask.cvtps2pd.256") {
3138 unsigned NumDstElts = DstTy->getNumElements();
3140 assert(NumDstElts == 2 &&
"Unexpected vector size");
3141 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3144 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3145 bool IsUnsigned = Name.contains(
"cvtu");
3147 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3151 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3152 : Intrinsic::x86_avx512_sitofp_round;
3153 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3156 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3157 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3163 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3164 Name.starts_with(
"vcvtph2ps.")) {
3168 unsigned NumDstElts = DstTy->getNumElements();
3169 if (NumDstElts != SrcTy->getNumElements()) {
3170 assert(NumDstElts == 4 &&
"Unexpected vector size");
3171 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3173 Rep = Builder.CreateBitCast(
3175 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3179 }
else if (Name.starts_with(
"avx512.mask.load")) {
3181 bool Aligned = Name[16] !=
'u';
3184 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3187 ResultTy->getNumElements());
3189 Rep = Builder.CreateIntrinsic(
3190 Intrinsic::masked_expandload, ResultTy,
3192 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3198 Rep = Builder.CreateIntrinsic(
3199 Intrinsic::masked_compressstore, ResultTy,
3201 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3202 Name.starts_with(
"avx512.mask.expand.")) {
3206 ResultTy->getNumElements());
3208 bool IsCompress = Name[12] ==
'c';
3209 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3210 : Intrinsic::x86_avx512_mask_expand;
3211 Rep = Builder.CreateIntrinsic(
3213 }
else if (Name.starts_with(
"xop.vpcom")) {
3215 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3216 Name.ends_with(
"uq"))
3218 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3219 Name.ends_with(
"d") || Name.ends_with(
"q"))
3228 Name = Name.substr(9);
3229 if (Name.starts_with(
"lt"))
3231 else if (Name.starts_with(
"le"))
3233 else if (Name.starts_with(
"gt"))
3235 else if (Name.starts_with(
"ge"))
3237 else if (Name.starts_with(
"eq"))
3239 else if (Name.starts_with(
"ne"))
3241 else if (Name.starts_with(
"false"))
3243 else if (Name.starts_with(
"true"))
3250 }
else if (Name.starts_with(
"xop.vpcmov")) {
3252 Value *NotSel = Builder.CreateNot(Sel);
3255 Rep = Builder.CreateOr(Sel0, Sel1);
3256 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3257 Name.starts_with(
"avx512.mask.prol")) {
3259 }
else if (Name.starts_with(
"avx512.pror") ||
3260 Name.starts_with(
"avx512.mask.pror")) {
3262 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3263 Name.starts_with(
"avx512.mask.vpshld") ||
3264 Name.starts_with(
"avx512.maskz.vpshld")) {
3265 bool ZeroMask = Name[11] ==
'z';
3267 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3268 Name.starts_with(
"avx512.mask.vpshrd") ||
3269 Name.starts_with(
"avx512.maskz.vpshrd")) {
3270 bool ZeroMask = Name[11] ==
'z';
3272 }
else if (Name ==
"sse42.crc32.64.8") {
3275 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3277 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3278 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3279 Name.starts_with(
"avx512.vbroadcast.s")) {
3282 Type *EltTy = VecTy->getElementType();
3283 unsigned EltNum = VecTy->getNumElements();
3287 for (
unsigned I = 0;
I < EltNum; ++
I)
3288 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3289 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3290 Name.starts_with(
"sse41.pmovzx") ||
3291 Name.starts_with(
"avx2.pmovsx") ||
3292 Name.starts_with(
"avx2.pmovzx") ||
3293 Name.starts_with(
"avx512.mask.pmovsx") ||
3294 Name.starts_with(
"avx512.mask.pmovzx")) {
3296 unsigned NumDstElts = DstTy->getNumElements();
3300 for (
unsigned i = 0; i != NumDstElts; ++i)
3305 bool DoSext = Name.contains(
"pmovsx");
3307 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3312 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3313 Name ==
"avx512.mask.pmov.qd.512" ||
3314 Name ==
"avx512.mask.pmov.wb.256" ||
3315 Name ==
"avx512.mask.pmov.wb.512") {
3320 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3321 Name ==
"avx2.vbroadcasti128") {
3327 if (NumSrcElts == 2)
3328 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3330 Rep = Builder.CreateShuffleVector(Load,
3332 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3333 Name.starts_with(
"avx512.mask.shuf.f")) {
3338 unsigned ControlBitsMask = NumLanes - 1;
3339 unsigned NumControlBits = NumLanes / 2;
3342 for (
unsigned l = 0; l != NumLanes; ++l) {
3343 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3345 if (l >= NumLanes / 2)
3346 LaneMask += NumLanes;
3347 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3348 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3354 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3355 Name.starts_with(
"avx512.mask.broadcasti")) {
3358 unsigned NumDstElts =
3362 for (
unsigned i = 0; i != NumDstElts; ++i)
3363 ShuffleMask[i] = i % NumSrcElts;
3369 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3370 Name.starts_with(
"avx2.vbroadcast") ||
3371 Name.starts_with(
"avx512.pbroadcast") ||
3372 Name.starts_with(
"avx512.mask.broadcast.s")) {
3379 Rep = Builder.CreateShuffleVector(
Op, M);
3384 }
else if (Name.starts_with(
"sse2.padds.") ||
3385 Name.starts_with(
"avx2.padds.") ||
3386 Name.starts_with(
"avx512.padds.") ||
3387 Name.starts_with(
"avx512.mask.padds.")) {
3389 }
else if (Name.starts_with(
"sse2.psubs.") ||
3390 Name.starts_with(
"avx2.psubs.") ||
3391 Name.starts_with(
"avx512.psubs.") ||
3392 Name.starts_with(
"avx512.mask.psubs.")) {
3394 }
else if (Name.starts_with(
"sse2.paddus.") ||
3395 Name.starts_with(
"avx2.paddus.") ||
3396 Name.starts_with(
"avx512.mask.paddus.")) {
3398 }
else if (Name.starts_with(
"sse2.psubus.") ||
3399 Name.starts_with(
"avx2.psubus.") ||
3400 Name.starts_with(
"avx512.mask.psubus.")) {
3402 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3407 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3411 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3416 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3421 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3422 Name ==
"avx512.psll.dq.512") {
3426 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3427 Name ==
"avx512.psrl.dq.512") {
3431 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3432 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3433 Name.starts_with(
"avx2.pblendd.")) {
3438 unsigned NumElts = VecTy->getNumElements();
3441 for (
unsigned i = 0; i != NumElts; ++i)
3442 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3444 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3445 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3446 Name ==
"avx2.vinserti128" ||
3447 Name.starts_with(
"avx512.mask.insert")) {
3451 unsigned DstNumElts =
3453 unsigned SrcNumElts =
3455 unsigned Scale = DstNumElts / SrcNumElts;
3462 for (
unsigned i = 0; i != SrcNumElts; ++i)
3464 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3465 Idxs[i] = SrcNumElts;
3466 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3480 for (
unsigned i = 0; i != DstNumElts; ++i)
3483 for (
unsigned i = 0; i != SrcNumElts; ++i)
3484 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3485 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3491 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3492 Name ==
"avx2.vextracti128" ||
3493 Name.starts_with(
"avx512.mask.vextract")) {
3496 unsigned DstNumElts =
3498 unsigned SrcNumElts =
3500 unsigned Scale = SrcNumElts / DstNumElts;
3507 for (
unsigned i = 0; i != DstNumElts; ++i) {
3508 Idxs[i] = i + (Imm * DstNumElts);
3510 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3516 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3517 Name.starts_with(
"avx512.mask.perm.di.")) {
3521 unsigned NumElts = VecTy->getNumElements();
3524 for (
unsigned i = 0; i != NumElts; ++i)
3525 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3527 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3532 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3544 unsigned HalfSize = NumElts / 2;
3556 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3557 for (
unsigned i = 0; i < HalfSize; ++i)
3558 ShuffleMask[i] = StartIndex + i;
3561 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3562 for (
unsigned i = 0; i < HalfSize; ++i)
3563 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3565 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3567 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3568 Name.starts_with(
"avx512.mask.vpermil.p") ||
3569 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3573 unsigned NumElts = VecTy->getNumElements();
3575 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3576 unsigned IdxMask = ((1 << IdxSize) - 1);
3582 for (
unsigned i = 0; i != NumElts; ++i)
3583 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3585 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3590 }
else if (Name ==
"sse2.pshufl.w" ||
3591 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3597 for (
unsigned l = 0; l != NumElts; l += 8) {
3598 for (
unsigned i = 0; i != 4; ++i)
3599 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3600 for (
unsigned i = 4; i != 8; ++i)
3601 Idxs[i + l] = i + l;
3604 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3609 }
else if (Name ==
"sse2.pshufh.w" ||
3610 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3616 for (
unsigned l = 0; l != NumElts; l += 8) {
3617 for (
unsigned i = 0; i != 4; ++i)
3618 Idxs[i + l] = i + l;
3619 for (
unsigned i = 0; i != 4; ++i)
3620 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3623 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3628 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3635 unsigned HalfLaneElts = NumLaneElts / 2;
3638 for (
unsigned i = 0; i != NumElts; ++i) {
3640 Idxs[i] = i - (i % NumLaneElts);
3642 if ((i % NumLaneElts) >= HalfLaneElts)
3646 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3649 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3653 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3654 Name.starts_with(
"avx512.mask.movshdup") ||
3655 Name.starts_with(
"avx512.mask.movsldup")) {
3661 if (Name.starts_with(
"avx512.mask.movshdup."))
3665 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3666 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3667 Idxs[i + l + 0] = i + l +
Offset;
3668 Idxs[i + l + 1] = i + l +
Offset;
3671 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3675 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3676 Name.starts_with(
"avx512.mask.unpckl.")) {
3683 for (
int l = 0; l != NumElts; l += NumLaneElts)
3684 for (
int i = 0; i != NumLaneElts; ++i)
3685 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3687 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3691 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3692 Name.starts_with(
"avx512.mask.unpckh.")) {
3699 for (
int l = 0; l != NumElts; l += NumLaneElts)
3700 for (
int i = 0; i != NumLaneElts; ++i)
3701 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3703 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3707 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3708 Name.starts_with(
"avx512.mask.pand.")) {
3711 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3713 Rep = Builder.CreateBitCast(Rep, FTy);
3716 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3717 Name.starts_with(
"avx512.mask.pandn.")) {
3720 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3721 Rep = Builder.CreateAnd(Rep,
3723 Rep = Builder.CreateBitCast(Rep, FTy);
3726 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3727 Name.starts_with(
"avx512.mask.por.")) {
3730 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3732 Rep = Builder.CreateBitCast(Rep, FTy);
3735 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3736 Name.starts_with(
"avx512.mask.pxor.")) {
3739 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3741 Rep = Builder.CreateBitCast(Rep, FTy);
3744 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3748 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3752 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3756 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3757 if (Name.ends_with(
".512")) {
3759 if (Name[17] ==
's')
3760 IID = Intrinsic::x86_avx512_add_ps_512;
3762 IID = Intrinsic::x86_avx512_add_pd_512;
3764 Rep = Builder.CreateIntrinsic(
3772 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3773 if (Name.ends_with(
".512")) {
3775 if (Name[17] ==
's')
3776 IID = Intrinsic::x86_avx512_div_ps_512;
3778 IID = Intrinsic::x86_avx512_div_pd_512;
3780 Rep = Builder.CreateIntrinsic(
3788 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3789 if (Name.ends_with(
".512")) {
3791 if (Name[17] ==
's')
3792 IID = Intrinsic::x86_avx512_mul_ps_512;
3794 IID = Intrinsic::x86_avx512_mul_pd_512;
3796 Rep = Builder.CreateIntrinsic(
3804 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3805 if (Name.ends_with(
".512")) {
3807 if (Name[17] ==
's')
3808 IID = Intrinsic::x86_avx512_sub_ps_512;
3810 IID = Intrinsic::x86_avx512_sub_pd_512;
3812 Rep = Builder.CreateIntrinsic(
3820 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3821 Name.starts_with(
"avx512.mask.min.p")) &&
3822 Name.drop_front(18) ==
".512") {
3823 bool IsDouble = Name[17] ==
'd';
3824 bool IsMin = Name[13] ==
'i';
3826 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3827 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3830 Rep = Builder.CreateIntrinsic(
3835 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3837 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3838 {CI->getArgOperand(0), Builder.getInt1(false)});
3841 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3842 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3843 bool IsVariable = Name[16] ==
'v';
3844 char Size = Name[16] ==
'.' ? Name[17]
3845 : Name[17] ==
'.' ? Name[18]
3846 : Name[18] ==
'.' ? Name[19]
3850 if (IsVariable && Name[17] !=
'.') {
3851 if (
Size ==
'd' && Name[17] ==
'2')
3852 IID = Intrinsic::x86_avx2_psllv_q;
3853 else if (
Size ==
'd' && Name[17] ==
'4')
3854 IID = Intrinsic::x86_avx2_psllv_q_256;
3855 else if (
Size ==
's' && Name[17] ==
'4')
3856 IID = Intrinsic::x86_avx2_psllv_d;
3857 else if (
Size ==
's' && Name[17] ==
'8')
3858 IID = Intrinsic::x86_avx2_psllv_d_256;
3859 else if (
Size ==
'h' && Name[17] ==
'8')
3860 IID = Intrinsic::x86_avx512_psllv_w_128;
3861 else if (
Size ==
'h' && Name[17] ==
'1')
3862 IID = Intrinsic::x86_avx512_psllv_w_256;
3863 else if (Name[17] ==
'3' && Name[18] ==
'2')
3864 IID = Intrinsic::x86_avx512_psllv_w_512;
3867 }
else if (Name.ends_with(
".128")) {
3869 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3870 : Intrinsic::x86_sse2_psll_d;
3871 else if (
Size ==
'q')
3872 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3873 : Intrinsic::x86_sse2_psll_q;
3874 else if (
Size ==
'w')
3875 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3876 : Intrinsic::x86_sse2_psll_w;
3879 }
else if (Name.ends_with(
".256")) {
3881 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3882 : Intrinsic::x86_avx2_psll_d;
3883 else if (
Size ==
'q')
3884 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3885 : Intrinsic::x86_avx2_psll_q;
3886 else if (
Size ==
'w')
3887 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3888 : Intrinsic::x86_avx2_psll_w;
3893 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3894 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3895 : Intrinsic::x86_avx512_psll_d_512;
3896 else if (
Size ==
'q')
3897 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3898 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3899 : Intrinsic::x86_avx512_psll_q_512;
3900 else if (
Size ==
'w')
3901 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3902 : Intrinsic::x86_avx512_psll_w_512;
3908 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3909 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3910 bool IsVariable = Name[16] ==
'v';
3911 char Size = Name[16] ==
'.' ? Name[17]
3912 : Name[17] ==
'.' ? Name[18]
3913 : Name[18] ==
'.' ? Name[19]
3917 if (IsVariable && Name[17] !=
'.') {
3918 if (
Size ==
'd' && Name[17] ==
'2')
3919 IID = Intrinsic::x86_avx2_psrlv_q;
3920 else if (
Size ==
'd' && Name[17] ==
'4')
3921 IID = Intrinsic::x86_avx2_psrlv_q_256;
3922 else if (
Size ==
's' && Name[17] ==
'4')
3923 IID = Intrinsic::x86_avx2_psrlv_d;
3924 else if (
Size ==
's' && Name[17] ==
'8')
3925 IID = Intrinsic::x86_avx2_psrlv_d_256;
3926 else if (
Size ==
'h' && Name[17] ==
'8')
3927 IID = Intrinsic::x86_avx512_psrlv_w_128;
3928 else if (
Size ==
'h' && Name[17] ==
'1')
3929 IID = Intrinsic::x86_avx512_psrlv_w_256;
3930 else if (Name[17] ==
'3' && Name[18] ==
'2')
3931 IID = Intrinsic::x86_avx512_psrlv_w_512;
3934 }
else if (Name.ends_with(
".128")) {
3936 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3937 : Intrinsic::x86_sse2_psrl_d;
3938 else if (
Size ==
'q')
3939 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3940 : Intrinsic::x86_sse2_psrl_q;
3941 else if (
Size ==
'w')
3942 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3943 : Intrinsic::x86_sse2_psrl_w;
3946 }
else if (Name.ends_with(
".256")) {
3948 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3949 : Intrinsic::x86_avx2_psrl_d;
3950 else if (
Size ==
'q')
3951 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3952 : Intrinsic::x86_avx2_psrl_q;
3953 else if (
Size ==
'w')
3954 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3955 : Intrinsic::x86_avx2_psrl_w;
3960 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3961 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3962 : Intrinsic::x86_avx512_psrl_d_512;
3963 else if (
Size ==
'q')
3964 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3965 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3966 : Intrinsic::x86_avx512_psrl_q_512;
3967 else if (
Size ==
'w')
3968 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3969 : Intrinsic::x86_avx512_psrl_w_512;
3975 }
else if (Name.starts_with(
"avx512.mask.psra")) {
3976 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3977 bool IsVariable = Name[16] ==
'v';
3978 char Size = Name[16] ==
'.' ? Name[17]
3979 : Name[17] ==
'.' ? Name[18]
3980 : Name[18] ==
'.' ? Name[19]
3984 if (IsVariable && Name[17] !=
'.') {
3985 if (
Size ==
's' && Name[17] ==
'4')
3986 IID = Intrinsic::x86_avx2_psrav_d;
3987 else if (
Size ==
's' && Name[17] ==
'8')
3988 IID = Intrinsic::x86_avx2_psrav_d_256;
3989 else if (
Size ==
'h' && Name[17] ==
'8')
3990 IID = Intrinsic::x86_avx512_psrav_w_128;
3991 else if (
Size ==
'h' && Name[17] ==
'1')
3992 IID = Intrinsic::x86_avx512_psrav_w_256;
3993 else if (Name[17] ==
'3' && Name[18] ==
'2')
3994 IID = Intrinsic::x86_avx512_psrav_w_512;
3997 }
else if (Name.ends_with(
".128")) {
3999 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4000 : Intrinsic::x86_sse2_psra_d;
4001 else if (
Size ==
'q')
4002 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4003 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4004 : Intrinsic::x86_avx512_psra_q_128;
4005 else if (
Size ==
'w')
4006 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4007 : Intrinsic::x86_sse2_psra_w;
4010 }
else if (Name.ends_with(
".256")) {
4012 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4013 : Intrinsic::x86_avx2_psra_d;
4014 else if (
Size ==
'q')
4015 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4016 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4017 : Intrinsic::x86_avx512_psra_q_256;
4018 else if (
Size ==
'w')
4019 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4020 : Intrinsic::x86_avx2_psra_w;
4025 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4026 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4027 : Intrinsic::x86_avx512_psra_d_512;
4028 else if (
Size ==
'q')
4029 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4030 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4031 : Intrinsic::x86_avx512_psra_q_512;
4032 else if (
Size ==
'w')
4033 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4034 : Intrinsic::x86_avx512_psra_w_512;
4040 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4042 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4044 }
else if (Name.ends_with(
".movntdqa")) {
4048 LoadInst *LI = Builder.CreateAlignedLoad(
4053 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4054 Name.starts_with(
"fma.vfmsub.") ||
4055 Name.starts_with(
"fma.vfnmadd.") ||
4056 Name.starts_with(
"fma.vfnmsub.")) {
4057 bool NegMul = Name[6] ==
'n';
4058 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4059 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4070 if (NegMul && !IsScalar)
4071 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4072 if (NegMul && IsScalar)
4073 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4075 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4077 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4081 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4089 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4093 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4094 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4095 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4096 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4097 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4098 bool IsMask3 = Name[11] ==
'3';
4099 bool IsMaskZ = Name[11] ==
'z';
4101 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4102 bool NegMul = Name[2] ==
'n';
4103 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4109 if (NegMul && (IsMask3 || IsMaskZ))
4110 A = Builder.CreateFNeg(
A);
4111 if (NegMul && !(IsMask3 || IsMaskZ))
4112 B = Builder.CreateFNeg(
B);
4114 C = Builder.CreateFNeg(
C);
4116 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4117 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4118 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4125 if (Name.back() ==
'd')
4126 IID = Intrinsic::x86_avx512_vfmadd_f64;
4128 IID = Intrinsic::x86_avx512_vfmadd_f32;
4129 Rep = Builder.CreateIntrinsic(IID,
Ops);
4131 Rep = Builder.CreateFMA(
A,
B,
C);
4140 if (NegAcc && IsMask3)
4145 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4147 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4148 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4149 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4150 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4151 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4152 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4153 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4154 bool IsMask3 = Name[11] ==
'3';
4155 bool IsMaskZ = Name[11] ==
'z';
4157 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4158 bool NegMul = Name[2] ==
'n';
4159 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4165 if (NegMul && (IsMask3 || IsMaskZ))
4166 A = Builder.CreateFNeg(
A);
4167 if (NegMul && !(IsMask3 || IsMaskZ))
4168 B = Builder.CreateFNeg(
B);
4170 C = Builder.CreateFNeg(
C);
4177 if (Name[Name.size() - 5] ==
's')
4178 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4180 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4184 Rep = Builder.CreateFMA(
A,
B,
C);
4192 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4196 if (VecWidth == 128 && EltWidth == 32)
4197 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4198 else if (VecWidth == 256 && EltWidth == 32)
4199 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4200 else if (VecWidth == 128 && EltWidth == 64)
4201 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4202 else if (VecWidth == 256 && EltWidth == 64)
4203 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4209 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4210 Rep = Builder.CreateIntrinsic(IID,
Ops);
4211 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4212 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4213 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4214 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4215 bool IsMask3 = Name[11] ==
'3';
4216 bool IsMaskZ = Name[11] ==
'z';
4218 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4219 bool IsSubAdd = Name[3] ==
's';
4223 if (Name[Name.size() - 5] ==
's')
4224 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4226 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4231 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4233 Rep = Builder.CreateIntrinsic(IID,
Ops);
4242 Value *Odd = Builder.CreateCall(FMA,
Ops);
4243 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4244 Value *Even = Builder.CreateCall(FMA,
Ops);
4250 for (
int i = 0; i != NumElts; ++i)
4251 Idxs[i] = i + (i % 2) * NumElts;
4253 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4261 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4262 Name.starts_with(
"avx512.maskz.pternlog.")) {
4263 bool ZeroMask = Name[11] ==
'z';
4267 if (VecWidth == 128 && EltWidth == 32)
4268 IID = Intrinsic::x86_avx512_pternlog_d_128;
4269 else if (VecWidth == 256 && EltWidth == 32)
4270 IID = Intrinsic::x86_avx512_pternlog_d_256;
4271 else if (VecWidth == 512 && EltWidth == 32)
4272 IID = Intrinsic::x86_avx512_pternlog_d_512;
4273 else if (VecWidth == 128 && EltWidth == 64)
4274 IID = Intrinsic::x86_avx512_pternlog_q_128;
4275 else if (VecWidth == 256 && EltWidth == 64)
4276 IID = Intrinsic::x86_avx512_pternlog_q_256;
4277 else if (VecWidth == 512 && EltWidth == 64)
4278 IID = Intrinsic::x86_avx512_pternlog_q_512;
4284 Rep = Builder.CreateIntrinsic(IID, Args);
4288 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4289 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4290 bool ZeroMask = Name[11] ==
'z';
4291 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4294 if (VecWidth == 128 && !
High)
4295 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4296 else if (VecWidth == 256 && !
High)
4297 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4298 else if (VecWidth == 512 && !
High)
4299 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4300 else if (VecWidth == 128 &&
High)
4301 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4302 else if (VecWidth == 256 &&
High)
4303 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4304 else if (VecWidth == 512 &&
High)
4305 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4311 Rep = Builder.CreateIntrinsic(IID, Args);
4315 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4316 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4317 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4318 bool ZeroMask = Name[11] ==
'z';
4319 bool IndexForm = Name[17] ==
'i';
4321 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4322 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4323 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4324 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4325 bool ZeroMask = Name[11] ==
'z';
4326 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4329 if (VecWidth == 128 && !IsSaturating)
4330 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4331 else if (VecWidth == 256 && !IsSaturating)
4332 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4333 else if (VecWidth == 512 && !IsSaturating)
4334 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4335 else if (VecWidth == 128 && IsSaturating)
4336 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4337 else if (VecWidth == 256 && IsSaturating)
4338 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4339 else if (VecWidth == 512 && IsSaturating)
4340 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4350 if (Args[1]->
getType()->isVectorTy() &&
4353 ->isIntegerTy(32) &&
4354 Args[2]->
getType()->isVectorTy() &&
4357 ->isIntegerTy(32)) {
4358 Type *NewArgType =
nullptr;
4359 if (VecWidth == 128)
4361 else if (VecWidth == 256)
4363 else if (VecWidth == 512)
4368 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4369 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4372 Rep = Builder.CreateIntrinsic(IID, Args);
4376 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4377 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4378 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4379 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4380 bool ZeroMask = Name[11] ==
'z';
4381 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4384 if (VecWidth == 128 && !IsSaturating)
4385 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4386 else if (VecWidth == 256 && !IsSaturating)
4387 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4388 else if (VecWidth == 512 && !IsSaturating)
4389 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4390 else if (VecWidth == 128 && IsSaturating)
4391 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4392 else if (VecWidth == 256 && IsSaturating)
4393 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4394 else if (VecWidth == 512 && IsSaturating)
4395 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4405 if (Args[1]->
getType()->isVectorTy() &&
4408 ->isIntegerTy(32) &&
4409 Args[2]->
getType()->isVectorTy() &&
4412 ->isIntegerTy(32)) {
4413 Type *NewArgType =
nullptr;
4414 if (VecWidth == 128)
4416 else if (VecWidth == 256)
4418 else if (VecWidth == 512)
4423 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4424 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4427 Rep = Builder.CreateIntrinsic(IID, Args);
4431 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4432 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4433 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4435 if (Name[0] ==
'a' && Name.back() ==
'2')
4436 IID = Intrinsic::x86_addcarry_32;
4437 else if (Name[0] ==
'a' && Name.back() ==
'4')
4438 IID = Intrinsic::x86_addcarry_64;
4439 else if (Name[0] ==
's' && Name.back() ==
'2')
4440 IID = Intrinsic::x86_subborrow_32;
4441 else if (Name[0] ==
's' && Name.back() ==
'4')
4442 IID = Intrinsic::x86_subborrow_64;
4449 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4452 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4455 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4459 }
else if (Name.starts_with(
"avx512.mask.") &&
4469 if (Name.starts_with(
"neon.bfcvt")) {
4470 if (Name.starts_with(
"neon.bfcvtn2")) {
4472 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4474 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4475 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4478 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4479 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4481 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4485 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4486 return Builder.CreateShuffleVector(
4489 return Builder.CreateFPTrunc(CI->
getOperand(0),
4492 }
else if (Name.starts_with(
"sve.fcvt")) {
4495 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4496 .
Case(
"sve.fcvtnt.bf16f32",
4497 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4509 if (Args[1]->
getType() != BadPredTy)
4512 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4513 BadPredTy, Args[1]);
4514 Args[1] = Builder.CreateIntrinsic(
4515 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4517 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4526 if (Name ==
"mve.vctp64.old") {
4529 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4532 Value *C1 = Builder.CreateIntrinsic(
4533 Intrinsic::arm_mve_pred_v2i,
4535 return Builder.CreateIntrinsic(
4536 Intrinsic::arm_mve_pred_i2v,
4538 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4539 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4540 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4541 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4543 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4544 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4545 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4546 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4548 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4549 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4550 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4551 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4552 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4553 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4554 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4555 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4556 std::vector<Type *> Tys;
4560 case Intrinsic::arm_mve_mull_int_predicated:
4561 case Intrinsic::arm_mve_vqdmull_predicated:
4562 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4565 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4566 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4567 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4571 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4575 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4579 case Intrinsic::arm_cde_vcx1q_predicated:
4580 case Intrinsic::arm_cde_vcx1qa_predicated:
4581 case Intrinsic::arm_cde_vcx2q_predicated:
4582 case Intrinsic::arm_cde_vcx2qa_predicated:
4583 case Intrinsic::arm_cde_vcx3q_predicated:
4584 case Intrinsic::arm_cde_vcx3qa_predicated:
4591 std::vector<Value *>
Ops;
4593 Type *Ty =
Op->getType();
4594 if (Ty->getScalarSizeInBits() == 1) {
4595 Value *C1 = Builder.CreateIntrinsic(
4596 Intrinsic::arm_mve_pred_v2i,
4598 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4603 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4633 if (NumOperands < 3)
4646 bool IsVolatile =
false;
4650 if (NumOperands > 3)
4655 if (NumOperands > 5) {
4657 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4671 if (VT->getElementType()->isIntegerTy(16)) {
4674 Val = Builder.CreateBitCast(Val, AsBF16);
4682 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4684 unsigned AddrSpace = PtrTy->getAddressSpace();
4687 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4689 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4694 MDNode *RangeNotPrivate =
4697 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4703 return Builder.CreateBitCast(RMW, RetTy);
4724 return MAV->getMetadata();
4731 return I->getDebugLoc().getAsMDNode();
4739 if (Name ==
"label") {
4742 }
else if (Name ==
"assign") {
4749 }
else if (Name ==
"declare") {
4754 }
else if (Name ==
"addr") {
4764 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4766 }
else if (Name ==
"value") {
4769 unsigned ExprOp = 2;
4783 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4805 assert(Name.starts_with(
"llvm.") &&
"Intrinsic doesn't start with 'llvm.'");
4806 Name = Name.substr(5);
4808 bool IsX86 = Name.consume_front(
"x86.");
4809 bool IsNVVM = Name.consume_front(
"nvvm.");
4810 bool IsAArch64 = Name.consume_front(
"aarch64.");
4811 bool IsARM = Name.consume_front(
"arm.");
4812 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4813 bool IsDbg = Name.consume_front(
"dbg.");
4814 Value *Rep =
nullptr;
4816 if (!IsX86 && Name ==
"stackprotectorcheck") {
4818 }
else if (IsNVVM) {
4822 }
else if (IsAArch64) {
4826 }
else if (IsAMDGCN) {
4840 const auto &DefaultCase = [&]() ->
void {
4848 "Unknown function for CallBase upgrade and isn't just a name change");
4856 "Return type must have changed");
4857 assert(OldST->getNumElements() ==
4859 "Must have same number of elements");
4862 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4865 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4866 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4867 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4886 case Intrinsic::arm_neon_vst1:
4887 case Intrinsic::arm_neon_vst2:
4888 case Intrinsic::arm_neon_vst3:
4889 case Intrinsic::arm_neon_vst4:
4890 case Intrinsic::arm_neon_vst2lane:
4891 case Intrinsic::arm_neon_vst3lane:
4892 case Intrinsic::arm_neon_vst4lane: {
4894 NewCall = Builder.CreateCall(NewFn, Args);
4897 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4898 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4899 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4904 NewCall = Builder.CreateCall(NewFn, Args);
4907 case Intrinsic::aarch64_sve_ld3_sret:
4908 case Intrinsic::aarch64_sve_ld4_sret:
4909 case Intrinsic::aarch64_sve_ld2_sret: {
4911 Name = Name.substr(5);
4918 unsigned MinElts = RetTy->getMinNumElements() /
N;
4920 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4922 for (
unsigned I = 0;
I <
N;
I++) {
4923 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
4924 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
4930 case Intrinsic::coro_end: {
4933 NewCall = Builder.CreateCall(NewFn, Args);
4937 case Intrinsic::vector_extract: {
4939 Name = Name.substr(5);
4940 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
4945 unsigned MinElts = RetTy->getMinNumElements();
4948 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
4952 case Intrinsic::vector_insert: {
4954 Name = Name.substr(5);
4955 if (!Name.starts_with(
"aarch64.sve.tuple")) {
4959 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
4964 NewCall = Builder.CreateCall(
4968 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
4974 assert(
N > 1 &&
"Create is expected to be between 2-4");
4977 unsigned MinElts = RetTy->getMinNumElements() /
N;
4978 for (
unsigned I = 0;
I <
N;
I++) {
4980 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
4987 case Intrinsic::arm_neon_bfdot:
4988 case Intrinsic::arm_neon_bfmmla:
4989 case Intrinsic::arm_neon_bfmlalb:
4990 case Intrinsic::arm_neon_bfmlalt:
4991 case Intrinsic::aarch64_neon_bfdot:
4992 case Intrinsic::aarch64_neon_bfmmla:
4993 case Intrinsic::aarch64_neon_bfmlalb:
4994 case Intrinsic::aarch64_neon_bfmlalt: {
4997 "Mismatch between function args and call args");
4998 size_t OperandWidth =
5000 assert((OperandWidth == 64 || OperandWidth == 128) &&
5001 "Unexpected operand width");
5003 auto Iter = CI->
args().begin();
5004 Args.push_back(*Iter++);
5005 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5006 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5007 NewCall = Builder.CreateCall(NewFn, Args);
5011 case Intrinsic::bitreverse:
5012 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5015 case Intrinsic::ctlz:
5016 case Intrinsic::cttz: {
5023 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5027 case Intrinsic::objectsize: {
5028 Value *NullIsUnknownSize =
5032 NewCall = Builder.CreateCall(
5037 case Intrinsic::ctpop:
5038 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5041 case Intrinsic::convert_from_fp16:
5042 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5045 case Intrinsic::dbg_value: {
5047 Name = Name.substr(5);
5049 if (Name.starts_with(
"dbg.addr")) {
5063 if (
Offset->isZeroValue()) {
5064 NewCall = Builder.CreateCall(
5073 case Intrinsic::ptr_annotation:
5081 NewCall = Builder.CreateCall(
5090 case Intrinsic::var_annotation:
5097 NewCall = Builder.CreateCall(
5106 case Intrinsic::riscv_aes32dsi:
5107 case Intrinsic::riscv_aes32dsmi:
5108 case Intrinsic::riscv_aes32esi:
5109 case Intrinsic::riscv_aes32esmi:
5110 case Intrinsic::riscv_sm4ks:
5111 case Intrinsic::riscv_sm4ed: {
5121 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5122 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5128 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5129 Value *Res = NewCall;
5131 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5137 case Intrinsic::nvvm_mapa_shared_cluster: {
5141 Value *Res = NewCall;
5142 Res = Builder.CreateAddrSpaceCast(
5149 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5150 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5153 Args[0] = Builder.CreateAddrSpaceCast(
5156 NewCall = Builder.CreateCall(NewFn, Args);
5162 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5163 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5164 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5165 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5166 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5167 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5168 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5169 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5176 Args[0] = Builder.CreateAddrSpaceCast(
5185 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5187 NewCall = Builder.CreateCall(NewFn, Args);
5193 case Intrinsic::riscv_sha256sig0:
5194 case Intrinsic::riscv_sha256sig1:
5195 case Intrinsic::riscv_sha256sum0:
5196 case Intrinsic::riscv_sha256sum1:
5197 case Intrinsic::riscv_sm3p0:
5198 case Intrinsic::riscv_sm3p1: {
5205 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5207 NewCall = Builder.CreateCall(NewFn, Arg);
5209 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5216 case Intrinsic::x86_xop_vfrcz_ss:
5217 case Intrinsic::x86_xop_vfrcz_sd:
5218 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5221 case Intrinsic::x86_xop_vpermil2pd:
5222 case Intrinsic::x86_xop_vpermil2ps:
5223 case Intrinsic::x86_xop_vpermil2pd_256:
5224 case Intrinsic::x86_xop_vpermil2ps_256: {
5228 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5229 NewCall = Builder.CreateCall(NewFn, Args);
5233 case Intrinsic::x86_sse41_ptestc:
5234 case Intrinsic::x86_sse41_ptestz:
5235 case Intrinsic::x86_sse41_ptestnzc: {
5249 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5250 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5252 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5256 case Intrinsic::x86_rdtscp: {
5262 NewCall = Builder.CreateCall(NewFn);
5264 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5267 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5275 case Intrinsic::x86_sse41_insertps:
5276 case Intrinsic::x86_sse41_dppd:
5277 case Intrinsic::x86_sse41_dpps:
5278 case Intrinsic::x86_sse41_mpsadbw:
5279 case Intrinsic::x86_avx_dp_ps_256:
5280 case Intrinsic::x86_avx2_mpsadbw: {
5286 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5287 NewCall = Builder.CreateCall(NewFn, Args);
5291 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5292 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5293 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5294 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5295 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5296 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5302 NewCall = Builder.CreateCall(NewFn, Args);
5311 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5312 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5313 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5314 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5315 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5316 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5320 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5321 Args[1] = Builder.CreateBitCast(
5324 NewCall = Builder.CreateCall(NewFn, Args);
5325 Value *Res = Builder.CreateBitCast(
5333 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5334 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5335 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5339 Args[1] = Builder.CreateBitCast(
5341 Args[2] = Builder.CreateBitCast(
5344 NewCall = Builder.CreateCall(NewFn, Args);
5348 case Intrinsic::thread_pointer: {
5349 NewCall = Builder.CreateCall(NewFn, {});
5353 case Intrinsic::memcpy:
5354 case Intrinsic::memmove:
5355 case Intrinsic::memset: {
5371 NewCall = Builder.CreateCall(NewFn, Args);
5373 AttributeList NewAttrs = AttributeList::get(
5374 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5375 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5376 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5381 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5384 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5388 case Intrinsic::masked_load:
5389 case Intrinsic::masked_gather:
5390 case Intrinsic::masked_store:
5391 case Intrinsic::masked_scatter: {
5397 auto GetMaybeAlign = [](
Value *
Op) {
5407 auto GetAlign = [&](
Value *
Op) {
5416 case Intrinsic::masked_load:
5417 NewCall = Builder.CreateMaskedLoad(
5421 case Intrinsic::masked_gather:
5422 NewCall = Builder.CreateMaskedGather(
5428 case Intrinsic::masked_store:
5429 NewCall = Builder.CreateMaskedStore(
5433 case Intrinsic::masked_scatter:
5434 NewCall = Builder.CreateMaskedScatter(
5436 DL.getValueOrABITypeAlignment(
5450 case Intrinsic::lifetime_start:
5451 case Intrinsic::lifetime_end: {
5463 NewCall = Builder.CreateLifetimeStart(Ptr);
5465 NewCall = Builder.CreateLifetimeEnd(Ptr);
5474 case Intrinsic::x86_avx512_vpdpbusd_128:
5475 case Intrinsic::x86_avx512_vpdpbusd_256:
5476 case Intrinsic::x86_avx512_vpdpbusd_512:
5477 case Intrinsic::x86_avx512_vpdpbusds_128:
5478 case Intrinsic::x86_avx512_vpdpbusds_256:
5479 case Intrinsic::x86_avx512_vpdpbusds_512:
5480 case Intrinsic::x86_avx2_vpdpbssd_128:
5481 case Intrinsic::x86_avx2_vpdpbssd_256:
5482 case Intrinsic::x86_avx10_vpdpbssd_512:
5483 case Intrinsic::x86_avx2_vpdpbssds_128:
5484 case Intrinsic::x86_avx2_vpdpbssds_256:
5485 case Intrinsic::x86_avx10_vpdpbssds_512:
5486 case Intrinsic::x86_avx2_vpdpbsud_128:
5487 case Intrinsic::x86_avx2_vpdpbsud_256:
5488 case Intrinsic::x86_avx10_vpdpbsud_512:
5489 case Intrinsic::x86_avx2_vpdpbsuds_128:
5490 case Intrinsic::x86_avx2_vpdpbsuds_256:
5491 case Intrinsic::x86_avx10_vpdpbsuds_512:
5492 case Intrinsic::x86_avx2_vpdpbuud_128:
5493 case Intrinsic::x86_avx2_vpdpbuud_256:
5494 case Intrinsic::x86_avx10_vpdpbuud_512:
5495 case Intrinsic::x86_avx2_vpdpbuuds_128:
5496 case Intrinsic::x86_avx2_vpdpbuuds_256:
5497 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5502 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5503 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5505 NewCall = Builder.CreateCall(NewFn, Args);
5508 case Intrinsic::x86_avx512_vpdpwssd_128:
5509 case Intrinsic::x86_avx512_vpdpwssd_256:
5510 case Intrinsic::x86_avx512_vpdpwssd_512:
5511 case Intrinsic::x86_avx512_vpdpwssds_128:
5512 case Intrinsic::x86_avx512_vpdpwssds_256:
5513 case Intrinsic::x86_avx512_vpdpwssds_512:
5514 case Intrinsic::x86_avx2_vpdpwsud_128:
5515 case Intrinsic::x86_avx2_vpdpwsud_256:
5516 case Intrinsic::x86_avx10_vpdpwsud_512:
5517 case Intrinsic::x86_avx2_vpdpwsuds_128:
5518 case Intrinsic::x86_avx2_vpdpwsuds_256:
5519 case Intrinsic::x86_avx10_vpdpwsuds_512:
5520 case Intrinsic::x86_avx2_vpdpwusd_128:
5521 case Intrinsic::x86_avx2_vpdpwusd_256:
5522 case Intrinsic::x86_avx10_vpdpwusd_512:
5523 case Intrinsic::x86_avx2_vpdpwusds_128:
5524 case Intrinsic::x86_avx2_vpdpwusds_256:
5525 case Intrinsic::x86_avx10_vpdpwusds_512:
5526 case Intrinsic::x86_avx2_vpdpwuud_128:
5527 case Intrinsic::x86_avx2_vpdpwuud_256:
5528 case Intrinsic::x86_avx10_vpdpwuud_512:
5529 case Intrinsic::x86_avx2_vpdpwuuds_128:
5530 case Intrinsic::x86_avx2_vpdpwuuds_256:
5531 case Intrinsic::x86_avx10_vpdpwuuds_512:
5536 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5537 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5539 NewCall = Builder.CreateCall(NewFn, Args);
5542 assert(NewCall &&
"Should have either set this variable or returned through "
5543 "the default case");
5550 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5564 F->eraseFromParent();
5570 if (NumOperands == 0)
5578 if (NumOperands == 3) {
5582 Metadata *Elts2[] = {ScalarType, ScalarType,
5596 if (
Opc != Instruction::BitCast)
5600 Type *SrcTy = V->getType();
5617 if (
Opc != Instruction::BitCast)
5620 Type *SrcTy =
C->getType();
5647 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5648 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5649 if (Flag->getNumOperands() < 3)
5651 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5652 return K->getString() ==
"Debug Info Version";
5655 if (OpIt != ModFlags->op_end()) {
5656 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5663 bool BrokenDebugInfo =
false;
5666 if (!BrokenDebugInfo)
5672 M.getContext().diagnose(Diag);
5679 M.getContext().diagnose(DiagVersion);
5689 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5692 if (
F->hasFnAttribute(Attr)) {
5695 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5697 auto [Part, Rest] = S.
split(
',');
5703 const unsigned Dim = DimC -
'x';
5704 assert(Dim < 3 &&
"Unexpected dim char");
5714 F->addFnAttr(Attr, NewAttr);
5718 return S ==
"x" || S ==
"y" || S ==
"z";
5723 if (K ==
"kernel") {
5735 const unsigned Idx = (AlignIdxValuePair >> 16);
5736 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5741 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5746 if (K ==
"minctasm") {
5751 if (K ==
"maxnreg") {
5756 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5760 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5764 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5768 if (K ==
"grid_constant") {
5783 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5790 if (!SeenNodes.
insert(MD).second)
5797 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5804 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5806 const MDOperand &V = MD->getOperand(j + 1);
5809 NewOperands.
append({K, V});
5812 if (NewOperands.
size() > 1)
5825 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5826 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5827 if (ModRetainReleaseMarker) {
5833 ID->getString().split(ValueComp,
"#");
5834 if (ValueComp.
size() == 2) {
5835 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5839 M.eraseNamedMetadata(ModRetainReleaseMarker);
5850 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5876 bool InvalidCast =
false;
5878 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
5891 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
5893 Args.push_back(Arg);
5900 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5905 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
5918 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5926 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5927 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5928 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5929 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5930 {
"objc_autoreleaseReturnValue",
5931 llvm::Intrinsic::objc_autoreleaseReturnValue},
5932 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5933 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5934 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5935 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5936 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5937 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5938 {
"objc_release", llvm::Intrinsic::objc_release},
5939 {
"objc_retain", llvm::Intrinsic::objc_retain},
5940 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5941 {
"objc_retainAutoreleaseReturnValue",
5942 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5943 {
"objc_retainAutoreleasedReturnValue",
5944 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5945 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5946 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5947 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5948 {
"objc_unsafeClaimAutoreleasedReturnValue",
5949 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5950 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5951 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5952 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5953 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5954 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5955 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5956 {
"objc_arc_annotation_topdown_bbstart",
5957 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5958 {
"objc_arc_annotation_topdown_bbend",
5959 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5960 {
"objc_arc_annotation_bottomup_bbstart",
5961 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5962 {
"objc_arc_annotation_bottomup_bbend",
5963 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5965 for (
auto &
I : RuntimeFuncs)
5966 UpgradeToIntrinsic(
I.first,
I.second);
5970 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5974 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
5975 bool HasSwiftVersionFlag =
false;
5976 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5983 if (
Op->getNumOperands() != 3)
5997 if (
ID->getString() ==
"Objective-C Image Info Version")
5999 if (
ID->getString() ==
"Objective-C Class Properties")
6000 HasClassProperties =
true;
6002 if (
ID->getString() ==
"PIC Level") {
6003 if (
auto *Behavior =
6005 uint64_t V = Behavior->getLimitedValue();
6011 if (
ID->getString() ==
"PIE Level")
6012 if (
auto *Behavior =
6019 if (
ID->getString() ==
"branch-target-enforcement" ||
6020 ID->getString().starts_with(
"sign-return-address")) {
6021 if (
auto *Behavior =
6027 Op->getOperand(1),
Op->getOperand(2)};
6037 if (
ID->getString() ==
"Objective-C Image Info Section") {
6040 Value->getString().split(ValueComp,
" ");
6041 if (ValueComp.
size() != 1) {
6042 std::string NewValue;
6043 for (
auto &S : ValueComp)
6044 NewValue += S.str();
6055 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6058 assert(Md->getValue() &&
"Expected non-empty metadata");
6059 auto Type = Md->getValue()->getType();
6062 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6063 if ((Val & 0xff) != Val) {
6064 HasSwiftVersionFlag =
true;
6065 SwiftABIVersion = (Val & 0xff00) >> 8;
6066 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6067 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6078 if (
ID->getString() ==
"amdgpu_code_object_version") {
6081 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6093 if (HasObjCFlag && !HasClassProperties) {
6099 if (HasSwiftVersionFlag) {
6103 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6105 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6113 auto TrimSpaces = [](
StringRef Section) -> std::string {
6115 Section.split(Components,
',');
6120 for (
auto Component : Components)
6121 OS <<
',' << Component.trim();
6126 for (
auto &GV : M.globals()) {
6127 if (!GV.hasSection())
6132 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6137 GV.setSection(TrimSpaces(Section));
6153struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6154 StrictFPUpgradeVisitor() =
default;
6157 if (!
Call.isStrictFP())
6163 Call.removeFnAttr(Attribute::StrictFP);
6164 Call.addFnAttr(Attribute::NoBuiltin);
6169struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6170 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6171 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6173 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6188 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6189 StrictFPUpgradeVisitor SFPV;
6194 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6195 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6196 for (
auto &Arg :
F.args())
6198 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6202 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6203 A.isValid() &&
A.isStringAttribute()) {
6204 F.setSection(
A.getValueAsString());
6205 F.removeFnAttr(
"implicit-section-name");
6212 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6215 if (
A.getValueAsBool()) {
6216 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6222 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
6230 if (!
F.hasFnAttribute(FnAttrName))
6231 F.addFnAttr(FnAttrName,
Value);
6238 if (!
F.hasFnAttribute(FnAttrName)) {
6240 F.addFnAttr(FnAttrName);
6242 auto A =
F.getFnAttribute(FnAttrName);
6243 if (
"false" ==
A.getValueAsString())
6244 F.removeFnAttr(FnAttrName);
6245 else if (
"true" ==
A.getValueAsString()) {
6246 F.removeFnAttr(FnAttrName);
6247 F.addFnAttr(FnAttrName);
6253 Triple T(M.getTargetTriple());
6254 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6264 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6268 if (
Op->getNumOperands() != 3)
6277 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6278 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6279 : IDStr ==
"guarded-control-stack" ? &GCSValue
6280 : IDStr ==
"sign-return-address" ? &SRAValue
6281 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6282 : IDStr ==
"sign-return-address-with-bkey"
6288 *ValPtr = CI->getZExtValue();
6294 bool BTE = BTEValue == 1;
6295 bool BPPLR = BPPLRValue == 1;
6296 bool GCS = GCSValue == 1;
6297 bool SRA = SRAValue == 1;
6300 if (SRA && SRAALLValue == 1)
6301 SignTypeValue =
"all";
6304 if (SRA && SRABKeyValue == 1)
6305 SignKeyValue =
"b_key";
6307 for (
Function &
F : M.getFunctionList()) {
6308 if (
F.isDeclaration())
6315 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6316 A.isValid() &&
"none" ==
A.getValueAsString()) {
6317 F.removeFnAttr(
"sign-return-address");
6318 F.removeFnAttr(
"sign-return-address-key");
6334 if (SRAALLValue == 1)
6336 if (SRABKeyValue == 1)
6345 if (
T->getNumOperands() < 1)
6350 return S->getString().starts_with(
"llvm.vectorizer.");
6354 StringRef OldPrefix =
"llvm.vectorizer.";
6357 if (OldTag ==
"llvm.vectorizer.unroll")
6369 if (
T->getNumOperands() < 1)
6374 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6379 Ops.reserve(
T->getNumOperands());
6381 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6382 Ops.push_back(
T->getOperand(
I));
6396 Ops.reserve(
T->getNumOperands());
6407 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6408 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6409 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6412 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6414 auto I =
DL.find(
"-n64-");
6416 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6421 std::string Res =
DL.str();
6424 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6425 Res.append(Res.empty() ?
"G1" :
"-G1");
6433 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6434 Res.append(
"-ni:7:8:9");
6436 if (
DL.ends_with(
"ni:7"))
6438 if (
DL.ends_with(
"ni:7:8"))
6443 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6444 Res.append(
"-p7:160:256:256:32");
6445 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6446 Res.append(
"-p8:128:128:128:48");
6447 constexpr StringRef OldP8(
"-p8:128:128-");
6448 if (
DL.contains(OldP8))
6449 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6450 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6451 Res.append(
"-p9:192:256:256:32");
6455 if (!
DL.contains(
"m:e"))
6456 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6461 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6464 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6465 if (!
DL.contains(AddrSpaces)) {
6467 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6468 if (R.match(Res, &
Groups))
6474 if (
T.isAArch64()) {
6476 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6477 Res.append(
"-Fn32");
6478 AddPtr32Ptr64AddrSpaces();
6482 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6486 std::string I64 =
"-i64:64";
6487 std::string I128 =
"-i128:128";
6489 size_t Pos = Res.find(I64);
6490 if (Pos !=
size_t(-1))
6491 Res.insert(Pos + I64.size(), I128);
6495 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6496 size_t Pos = Res.find(
"-S128");
6499 Res.insert(Pos,
"-f64:32:64");
6505 AddPtr32Ptr64AddrSpaces();
6513 if (!
T.isOSIAMCU()) {
6514 std::string I128 =
"-i128:128";
6517 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6518 if (R.match(Res, &
Groups))
6526 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6528 auto I =
Ref.find(
"-f80:32-");
6530 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6538 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6541 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6542 B.removeAttribute(
"no-frame-pointer-elim");
6544 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6546 if (FramePointer !=
"all")
6547 FramePointer =
"non-leaf";
6548 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6550 if (!FramePointer.
empty())
6551 B.addAttribute(
"frame-pointer", FramePointer);
6553 A =
B.getAttribute(
"null-pointer-is-valid");
6556 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6557 B.removeAttribute(
"null-pointer-is-valid");
6558 if (NullPointerIsValid)
6559 B.addAttribute(Attribute::NullPointerIsValid);
6569 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
constexpr bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.