Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve store merging #46367

Closed
davidbolvansky opened this issue Aug 6, 2020 · 7 comments
Closed

Improve store merging #46367

davidbolvansky opened this issue Aug 6, 2020 · 7 comments
Labels
bugzilla Issues migrated from bugzilla llvm:codegen

Comments

@davidbolvansky
Copy link
Collaborator

Bugzilla Link 47023
Resolution FIXED
Resolved on Sep 15, 2021 05:49
Version trunk
OS Linux
CC @topperc,@efriedma-quic,@fhahn,@nikic,@rotateright

Extended Description

struct data_t {
char data[16];
};

void process(data_t);
void process3(data_t, data_t, data_t);

void bad1() { process({1, 2, 3, 4, 5}); }

void bad2() { process({1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); }

void bad3() {
process3({1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{11, 12, 13, 14, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
{21, 22, 23, 24, 25, 20, 20, 20, 10, 20, 20, 20, 20, 20, 20});
}

void bad4() {
process3({1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{11, 12, 13, 14, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
{21, 22, 23, 24, 25, 20, 20, 20, 10, 20, 20, 20, 20, 20, 20, 11});
}

Clang:
bad1(): # @​bad1()
sub rsp, 24
mov dword ptr [rsp + 8], 67305985
mov byte ptr [rsp + 12], 5
mov qword ptr [rsp + 13], 0
mov dword ptr [rsp + 20], 0
mov rdi, qword ptr [rsp + 8]
xor esi, esi
call process(data_t)
add rsp, 24
ret
.LCPI1_0:
.byte 1 # 0x1
.byte 2 # 0x2
.byte 3 # 0x3
.byte 4 # 0x4
.byte 5 # 0x5
.byte 1 # 0x1
.byte 2 # 0x2
.byte 3 # 0x3
.byte 4 # 0x4
.byte 5 # 0x5
.byte 1 # 0x1
.byte 2 # 0x2
.byte 3 # 0x3
.byte 4 # 0x4
.byte 5 # 0x5
.byte 0 # 0x0
bad2(): # @​bad2()
sub rsp, 24
movaps xmm0, xmmword ptr [rip + .LCPI1_0] # xmm0 = [1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,0]
movaps xmmword ptr [rsp], xmm0
mov rdi, qword ptr [rsp]
mov rsi, qword ptr [rsp + 8]
call process(data_t)
add rsp, 24
ret
bad3(): # @​bad3()
sub rsp, 24
movabs r8, 1446803478303675925
mov qword ptr [rsp + 8], r8
mov byte ptr [rsp + 16], 10
mov dword ptr [rsp + 17], 336860180
mov word ptr [rsp + 21], 5140
mov byte ptr [rsp + 23], 0
mov r9, qword ptr [rsp + 16]
movabs rdi, 21542142465
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
mov esi, 0
call process3(data_t, data_t, data_t)
add rsp, 24
ret
bad4(): # @​bad4()
movabs rdi, 21542142465
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
movabs r8, 1446803478303675925
movabs r9, 798285110420182026
mov esi, 0
jmp process3(data_t, data_t, data_t)

GCC:
bad1():
movabs rdi, 21542142465
xor esi, esi
jmp process(data_t)
bad2():
mov rdi, QWORD PTR .LC0[rip]
movabs rsi, 1411785848587524
jmp process(data_t)
bad3():
mov r8, QWORD PTR .LC1[rip]
xor esi, esi
movabs r9, 5651576002974730
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
movabs rdi, 21542142465
jmp process3(data_t, data_t, data_t)
bad4():
movabs r8, 1446803478303675925
xor esi, esi
movabs r9, 798285110420182026
movabs rdx, 723401749922909195
movabs rcx, 723401728380766730
movabs rdi, 21542142465
jmp process3(data_t, data_t, data_t)
.LC0:
.byte 1
.byte 2
.byte 3
.byte 4
.byte 5
.byte 1
.byte 2
.byte 3
.LC1:
.byte 21
.byte 22
.byte 23
.byte 24
.byte 25
.byte 20
.byte 20
.byte 20

GCC's codegen is better here..

Godbolt: https://godbolt.org/z/xGY79q

@davidbolvansky
Copy link
Collaborator Author

struct T { char a[1024]; };

void bar (struct T *);

void
bad5 (void)
{
struct T t = {};
t.a[50] = 1;
t.a[54] = 2;
bar (&t);
}

Clang:
bad5(): # @​bad5()
push rbx
sub rsp, 1024
mov rbx, rsp
mov edx, 1024
mov rdi, rbx
xor esi, esi
call memset
mov byte ptr [rsp + 52], 1
mov byte ptr [rsp + 54], 2
mov rdi, rbx
call bar(T*)
add rsp, 1024
pop rbx
ret

GCC:
bad5():
sub rsp, 1032
xor eax, eax
mov ecx, 128
mov rdi, rsp
rep stosq
mov rdi, rsp
mov DWORD PTR [rsp+52], 131073
call bar(T*)
add rsp, 1032
ret

Can we merge
mov byte ptr [rsp + 52], 1
mov byte ptr [rsp + 54], 2

to
mov DWORD PTR [rsp + 52], 131073

?

@davidbolvansky
Copy link
Collaborator Author

struct T {
char a[1024];
};

void bar(struct T *t);

void bad6() {
struct T t = {};
t.a[54] = 1;
t.a[52] = 2;
t.a[95] = 3;
t.a[96] = 4;
int x = 0x506;
__builtin_memcpy(&t.a[97], &x, sizeof(x));
bar(&t);
}

    mov     byte ptr [rsp + 54], 1
    mov     byte ptr [rsp + 52], 2
    mov     word ptr [rsp + 95], 1027
    mov     dword ptr [rsp + 97], 1286

=>

    mov     DWORD PTR [rsp+52], 65538
    mov     DWORD PTR [rsp+95], 84280323

@davidbolvansky
Copy link
Collaborator Author

@​spatel: something for DAGCombiner?

@rotateright
Copy link
Contributor

@​spatel: something for DAGCombiner?

That's the only place that I know of where we do store merging. But there may be multiple problems to fix.

We have this IR:
call void @​llvm.memset.p0i8.i64(i8* nonnull align 1 dereferenceable(11) %uglygep, i8 0, i64 11, i1 false)

...with 5 non-zero constant stores ahead of that.

So that means we are asking DAGCombiner to merge constant stores of different sizes, and I don't think we try that.

If I run that IR back through the -O1 pipeline, I get the optimal:
call void @​_Z7process6data_t(i64 21542142465, i64 0)

@rotateright
Copy link
Contributor

SROA seems to undo the unnecessary memory ops, so this might be similar to bug 42794.

@davidbolvansky
Copy link
Collaborator Author

Seems to be fixed in LLVM 12 (probably second run of SROA)

@rotateright
Copy link
Contributor

Seems to be fixed in LLVM 12 (probably second run of SROA)

Yes, it was likely the pipeline change that took care of these examples.

I added all of these as IR phase ordering tests, so we won't break these specific tests again:
https://reviews.llvm.org/rGbe1028053e93

So I'll mark this as fixed. x86 codegen is not identical to gcc's, but let's open another report if that is a concern.

@llvmbot llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 10, 2021
This issue was closed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bugzilla Issues migrated from bugzilla llvm:codegen
Projects
None yet
Development

No branches or pull requests

2 participants