-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve store merging #46367
Comments
struct T { char a[1024]; }; void bar (struct T *); void Clang: GCC: Can we merge to ? |
struct T { void bar(struct T *t); void bad6() {
=>
|
@spatel: something for DAGCombiner? |
That's the only place that I know of where we do store merging. But there may be multiple problems to fix. We have this IR: ...with 5 non-zero constant stores ahead of that. So that means we are asking DAGCombiner to merge constant stores of different sizes, and I don't think we try that. If I run that IR back through the -O1 pipeline, I get the optimal: |
SROA seems to undo the unnecessary memory ops, so this might be similar to bug 42794. |
Seems to be fixed in LLVM 12 (probably second run of SROA) |
Yes, it was likely the pipeline change that took care of these examples. I added all of these as IR phase ordering tests, so we won't break these specific tests again: So I'll mark this as fixed. x86 codegen is not identical to gcc's, but let's open another report if that is a concern. |
Extended Description
struct data_t {
char data[16];
};
void process(data_t);
void process3(data_t, data_t, data_t);
void bad1() { process({1, 2, 3, 4, 5}); }
void bad2() { process({1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); }
void bad3() {
process3({1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{11, 12, 13, 14, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
{21, 22, 23, 24, 25, 20, 20, 20, 10, 20, 20, 20, 20, 20, 20});
}
void bad4() {
process3({1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{11, 12, 13, 14, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
{21, 22, 23, 24, 25, 20, 20, 20, 10, 20, 20, 20, 20, 20, 20, 11});
}
Clang:
bad1(): # @bad1()
sub rsp, 24
mov dword ptr [rsp + 8], 67305985
mov byte ptr [rsp + 12], 5
mov qword ptr [rsp + 13], 0
mov dword ptr [rsp + 20], 0
mov rdi, qword ptr [rsp + 8]
xor esi, esi
call process(data_t)
add rsp, 24
ret
.LCPI1_0:
.byte 1 # 0x1
.byte 2 # 0x2
.byte 3 # 0x3
.byte 4 # 0x4
.byte 5 # 0x5
.byte 1 # 0x1
.byte 2 # 0x2
.byte 3 # 0x3
.byte 4 # 0x4
.byte 5 # 0x5
.byte 1 # 0x1
.byte 2 # 0x2
.byte 3 # 0x3
.byte 4 # 0x4
.byte 5 # 0x5
.byte 0 # 0x0
bad2(): # @bad2()
sub rsp, 24
movaps xmm0, xmmword ptr [rip + .LCPI1_0] # xmm0 = [1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,0]
movaps xmmword ptr [rsp], xmm0
mov rdi, qword ptr [rsp]
mov rsi, qword ptr [rsp + 8]
call process(data_t)
add rsp, 24
ret
bad3(): # @bad3()
sub rsp, 24
movabs r8, 1446803478303675925
mov qword ptr [rsp + 8], r8
mov byte ptr [rsp + 16], 10
mov dword ptr [rsp + 17], 336860180
mov word ptr [rsp + 21], 5140
mov byte ptr [rsp + 23], 0
mov r9, qword ptr [rsp + 16]
movabs rdi, 21542142465
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
mov esi, 0
call process3(data_t, data_t, data_t)
add rsp, 24
ret
bad4(): # @bad4()
movabs rdi, 21542142465
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
movabs r8, 1446803478303675925
movabs r9, 798285110420182026
mov esi, 0
jmp process3(data_t, data_t, data_t)
GCC:
bad1():
movabs rdi, 21542142465
xor esi, esi
jmp process(data_t)
bad2():
mov rdi, QWORD PTR .LC0[rip]
movabs rsi, 1411785848587524
jmp process(data_t)
bad3():
mov r8, QWORD PTR .LC1[rip]
xor esi, esi
movabs r9, 5651576002974730
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
movabs rdi, 21542142465
jmp process3(data_t, data_t, data_t)
bad4():
movabs r8, 1446803478303675925
xor esi, esi
movabs r9, 798285110420182026
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
movabs rdi, 21542142465
jmp process3(data_t, data_t, data_t)
.LC0:
.byte 1
.byte 2
.byte 3
.byte 4
.byte 5
.byte 1
.byte 2
.byte 3
.LC1:
.byte 21
.byte 22
.byte 23
.byte 24
.byte 25
.byte 20
.byte 20
.byte 20
GCC's codegen is better here..
Godbolt: https://godbolt.org/z/xGY79q
The text was updated successfully, but these errors were encountered: