void i32(long long x, int* p) { int a = x; int b = x >> 32; p[0] = a; p[1] = b; } define void @i32(i64 %0, i32* nocapture %1) { %3 = trunc i64 %0 to i32 %4 = lshr i64 %0, 32 %5 = trunc i64 %4 to i32 store i32 %3, i32* %1, align 4 %6 = getelementptr inbounds i32, i32* %1, i64 1 store i32 %5, i32* %6, align 4 ret void } -------------------------------------------------------------------------- Compiles to x86-64: movl %edi, (%rsi) shrq $32, %rdi movl %edi, 4(%rsi) retq -------------------------------------------------------------------------- But that could be only: movq %rdi, (%rsi) In DAGCombiner, we have MatchStoreCombine(), but it only handles 8-bit; we have mergeConsecutiveStores(), but it doesn't look at trunc patterns. https://godbolt.org/z/xMPxT9
Here's a more involved IR example (aggregate type + extra use of the loaded value), but hopefully no harder to match in SDAG: %struct.TypeSize = type { i16, i16 } @s_typeSizeLookup_1 = external dso_local local_unnamed_addr constant [0 x i32], align 4 define dso_local zeroext i1 @_Z11loadToken_1hP8TypeSize(i8 zeroext %0, %struct.TypeSize* nocapture %1) local_unnamed_addr #0 { %3 = zext i8 %0 to i64 %4 = getelementptr inbounds [0 x i32], [0 x i32]* @s_typeSizeLookup_1, i64 0, i64 %3 %5 = load i32, i32* %4, align 4, !tbaa !2 %6 = trunc i32 %5 to i16 %7 = getelementptr inbounds %struct.TypeSize, %struct.TypeSize* %1, i64 0, i32 0 store i16 %6, i16* %7, align 2, !tbaa !6 %8 = lshr i32 %5, 16 %9 = trunc i32 %8 to i16 %10 = getelementptr inbounds %struct.TypeSize, %struct.TypeSize* %1, i64 0, i32 1 store i16 %9, i16* %10, align 2, !tbaa !9 %11 = and i32 %5, 255 %12 = icmp ne i32 %11, 11 ret i1 %12 }
(In reply to Sanjay Patel from comment #1) > Here's a more involved IR example (aggregate type + extra use of the loaded > value), but hopefully no harder to match in SDAG: > > %struct.TypeSize = type { i16, i16 } > > @s_typeSizeLookup_1 = external dso_local local_unnamed_addr constant [0 x > i32], align 4 > > define dso_local zeroext i1 @_Z11loadToken_1hP8TypeSize(i8 zeroext %0, > %struct.TypeSize* nocapture %1) local_unnamed_addr #0 { > %3 = zext i8 %0 to i64 > %4 = getelementptr inbounds [0 x i32], [0 x i32]* @s_typeSizeLookup_1, i64 > 0, i64 %3 > %5 = load i32, i32* %4, align 4, !tbaa !2 > %6 = trunc i32 %5 to i16 > %7 = getelementptr inbounds %struct.TypeSize, %struct.TypeSize* %1, i64 0, > i32 0 > store i16 %6, i16* %7, align 2, !tbaa !6 > %8 = lshr i32 %5, 16 > %9 = trunc i32 %8 to i16 > %10 = getelementptr inbounds %struct.TypeSize, %struct.TypeSize* %1, i64 > 0, i32 1 > store i16 %9, i16* %10, align 2, !tbaa !9 > %11 = and i32 %5, 255 > %12 = icmp ne i32 %11, 11 > ret i1 %12 > } For reference, this asm is currently: movl %edi, %eax leaq _s_typeSizeLookup_1(%rip), %rcx movl (%rcx,%rax,4), %eax movw %ax, (%rsi) movl %eax, %ecx shrl $16, %ecx movw %cx, 2(%rsi) cmpb $11, %al setne %al retq With the change proposed in https://reviews.llvm.org/D86420, we merge to 32-bit store: movl %edi, %eax leaq _s_typeSizeLookup_1(%rip), %rcx movl (%rcx,%rax,4), %eax movl %eax, (%rsi) cmpb $11, %al setne %al retq
D86420 landed at rG54a5dd485c4d04d142a58c9349ada0c897cbeae6 - followed by D87112 and rG7a06b166b1afb457a7df6ad73a6710b4dde4db68 OK to resolve?
Thanks for the reminder. There are likely other merge patterns that we still don't get, but I just forgot to mark this example as fixed.