Created attachment 20856 [details] preprocesed source for lib/raid6/vpermxor1.c When building lib/raid6/vpermxor1.c from the Linux kernel: lib/raid6/vpermxor1.c:64:8: error: couldn't allocate output register for constraint 'v' asm(VPERMXOR(%0,%1,%2,%3) : "=v"(wq0) : "v"(gf_high), "v"(gf_low), "v"(wq0)); ^ clang-8 -Wp,-MD,lib/raid6/.vpermxor1.o.d -nostdinc -isystem /scratch/joel/llvm-build/lib/clang/8.0.0/include -I./arch/powerpc/include -I./arch/powerpc/include/generated -I./include -I./arch/powerpc/include/uapi -I./arch/powerpc/include/generated/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/kconfig.h -include ./include/linux/compiler_types.h -D__KERNEL__ -Iarch/powerpc -DHAVE_AS_ATHIGH=1 -Qunused-arguments -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fshort-wchar -Werror-implicit-function-declaration -Wno-format-security -std=gnu89 -no-integrated-as -fno-PIE -mlittle-endian -m64 -msoft-float -pipe -Iarch/powerpc -mabi=elfv2 -mcmodel=medium -mcpu=power8 -mtune=power9 -mno-altivec -mno-vsx -funit-at-a-time -fno-dwarf2-cfi-asm -Wa,-maltivec -Wa,-mpower8 -mlittle-endian -fno-delete-null-pointer-checks -O2 -Wframe-larger-than=2048 -fno-stack-protector -Wno-format-invalid-specifier -Wno-gnu -Wno-address-of-packed-member -Wno-tautological-compare -mno-global-merge -Wno-unused-const-variable -fomit-frame-pointer -Wdeclaration-after-statement -Wno-pointer-sign -fno-strict-overflow -fno-merge-all-constants -fno-stack-check -Werror=implicit-int -Werror=strict-prototypes -Werror=date-time -Werror=incompatible-pointer-types -Wno-initializer-overrides -Wno-unused-value -Wno-format -Wno-sign-compare -Wno-format-zero-length -Wno-uninitialized -maltivec -mabi=altivec -DMODULE -mcmodel=large -DKBUILD_BASENAME='"vpermxor1"' -DKBUILD_MODNAME='"raid6_pq"' -c -o vpermxor1.o vpermxor1.i GCC can build the same code. clang can compile a simple program that uses vpermxor, so I suspect the issue is related to the other operations in the for loop.
Looks like also duplicate (or at least part of the clean up work) to https://bugs.llvm.org/show_bug.cgi?id=31177. Reduced case: $ cat reduced.ll ; ModuleID = 'bugpoint-reduced-simplified.ll' source_filename = "t.c" target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" ; Function Attrs: noinline nounwind define void @raid6_vpermxor1_gen_syndrome_real() local_unnamed_addr #0 { %1 = tail call <16 x i8> asm ".long (0x1000002d $| ((($0) & 0x1f) << 21) $| ((($1) & 0x1f) << 16) $| ((($2) & 0x1f) << 11) $| ((($3) & 0x1f) << 6)) ", "=v,v,v,v"(<16 x i8> <i8 -3, i8 -35, i8 -67, i8 -99, i8 125, i8 93, i8 61, i8 29, i8 -32, i8 -64, i8 -96, i8 -128, i8 96, i8 64, i8 32, i8 0>, <16 x i8> <i8 30, i8 28, i8 26, i8 24, i8 22, i8 20, i8 18, i8 16, i8 14, i8 12, i8 10, i8 8, i8 6, i8 4, i8 2, i8 0>, <16 x i8> undef) #1, !srcloc !1 %2 = xor <16 x i8> %1, undef unreachable } attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" } attributes #1 = { nounwind readnone } !llvm.ident = !{!0} !0 = !{!"clang version 8.0.0 "} !1 = !{i32 923} $ llc reduced.ll -mattr=-hard-float error: couldn't allocate output register for constraint 'v' at line 923 $ llc reduced.ll
This seems to have dropped off the radar. Is this still an issue? If not, can we close the PR?