Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LLVM has no clue how to lower memcpy, memset, or memmove intrinsics with 0 alignment #15720

Closed
chandlerc opened this issue Feb 25, 2013 · 5 comments
Labels
backend:X86 bugzilla Issues migrated from bugzilla

Comments

@chandlerc
Copy link
Member

Bugzilla Link 15348
Resolution FIXED
Resolved on Feb 27, 2013 02:27
Version trunk
OS All
CC @lhames

Extended Description

My test case is currently C++, and triggered due to r175389 emitting memcpy more often. It was reduced (very greatly) from TargetInfo in a bootstrap...

% cat bitfield.cpp
struct S {
S() {}
S(const S&) {}
~S() {}
void *ptr1, *ptr2;
unsigned long x;
};

struct X {
unsigned b00 : 1;
unsigned b01 : 1;
unsigned b02 : 1;
unsigned b03 : 1;
unsigned b04 : 1;
unsigned b05 : 1;
unsigned b06 : 1;
unsigned b07 : 1;
unsigned b08 : 1;
unsigned b09 : 1;
unsigned b10 : 1;
unsigned b11 : 1;
unsigned b12 : 1;
unsigned b13 : 1;
unsigned b14 : 1;
unsigned b15 : 1;
unsigned u;
unsigned b16 : 1;
unsigned b17 : 1;
unsigned b18 : 1;
unsigned b19 : 1;
unsigned u2;
unsigned b20 : 1;
unsigned b21 : 1;
unsigned b22 : 1;
unsigned b23 : 1;

S s;

enum { A, B, C } e1;
enum { AA, BB, CC } e2;
};

X f(const X &x) { return x; }

int main() {
X x;
X y = f(x);
}

% ./bin/clang++ -m32 -march=pentium3 -msse2 -O0 -std=c++03 -o bitfield bitfield.cpp

% ./bitfield
zsh: segmentation fault ./bitfield

% gdb --args ./bitfield
(gdb) r
Starting program: ./bitfield

Program received signal SIGSEGV, Segmentation fault.
0x08048774 in X::X(X const&) ()
(gdb) disass
Dump of assembler code for function ZN1XC2ERKS:
0x08048750 <+0>: push %ebp
0x08048751 <+1>: mov %esp,%ebp
0x08048753 <+3>: push %ebx
0x08048754 <+4>: sub $0x14,%esp
0x08048757 <+7>: mov 0xc(%ebp),%eax
0x0804875a <+10>: mov 0x8(%ebp),%ecx
0x0804875d <+13>: mov %ecx,-0x8(%ebp)
0x08048760 <+16>: mov %eax,-0xc(%ebp)
0x08048763 <+19>: mov -0x8(%ebp),%eax
0x08048766 <+22>: mov %eax,%ecx
0x08048768 <+24>: mov -0xc(%ebp),%edx
0x0804876b <+27>: mov 0x10(%edx),%bl
0x0804876e <+30>: mov %bl,0x10(%ecx)
0x08048771 <+33>: movaps (%edx),%xmm0
=> 0x08048774 <+36>: movaps %xmm0,(%ecx)
0x08048777 <+39>: mov %eax,%ecx
0x08048779 <+41>: add $0x14,%ecx
0x0804877f <+47>: mov -0xc(%ebp),%edx
0x08048782 <+50>: add $0x14,%edx
0x08048788 <+56>: mov %ecx,(%esp)
0x0804878b <+59>: mov %edx,0x4(%esp)
0x0804878f <+63>: mov %eax,-0x10(%ebp)
0x08048792 <+66>: call 0x80487b0 <ZN1SC1ERKS>
0x08048797 <+71>: mov -0xc(%ebp),%eax
0x0804879a <+74>: mov 0x20(%eax),%ecx
0x0804879d <+77>: mov -0x10(%ebp),%edx
0x080487a0 <+80>: mov %ecx,0x20(%edx)
0x080487a3 <+83>: mov 0x24(%eax),%eax
0x080487a6 <+86>: mov %eax,0x24(%edx)
0x080487a9 <+89>: add $0x14,%esp
0x080487ac <+92>: pop %ebx
0x080487ad <+93>: pop %ebp
0x080487ae <+94>: ret
End of assembler dump.

@chandlerc
Copy link
Member Author

Reduced fully. This one goes into my "how did this ever work???" category:

% cat memcpy.ll
; ModuleID = 'bitfield.cpp'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"

define void @​f(i8* %a, i8* %b) #​12 {
entry:
call void @​llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
ret void
}

declare void @​llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)

attributes #​12 = { inlinehint "target-cpu"="pentium3" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,-sse3" }

% ./bin/llc -o - memcpy.ll
.file "memcpy.ll"
.text
.globl f
.align 16, 0x90
.type f,@function
f: # @​f
.cfi_startproc

BB#0: # %entry

    movl    8(%esp), %eax
    movb    16(%eax), %dl
    movl    4(%esp), %ecx
    movb    %dl, 16(%ecx)
    movaps  (%eax), %xmm0
    movaps  %xmm0, (%ecx)
    ret

.Ltmp0:
.size f, .Ltmp0-f
.cfi_endproc

    .section        ".note.GNU-stack","",@progbits

Magically, we know that this raw i8* out of no where is 16-byte aligned. Special that...

@chandlerc
Copy link
Member Author

This is a totally generic goof:

% cat memcpy.ll
define void @​f(i8* %a, i8* %b) {
entry:
call void @​llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
ret void
}

declare void @​llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)

% ./bin/llc -mtriple x86_64-unknown-linux-gnu -o - memcpy.ll
.file "memcpy.ll"
.text
.globl f
.align 16, 0x90
.type f,@function
f: # @​f
.cfi_startproc

BB#0: # %entry

    movb    16(%rsi), %al
    movb    %al, 16(%rdi)
    movaps  (%rsi), %xmm0
    movaps  %xmm0, (%rdi)
    ret

.Ltmp0:
.size f, .Ltmp0-f
.cfi_endproc

    .section        ".note.GNU-stack","",@progbits

the SDAG does exactly the wrong thing with alignment-0 mem* intrinsics... fix incoming...

@chandlerc
Copy link
Member Author

Should be fixed in r176022.

@lhames
Copy link
Contributor

lhames commented Feb 27, 2013

The related issue of clang emitting underaligned memcpys has been fixed in r176163.

@llvmbot
Copy link
Collaborator

llvmbot commented Feb 27, 2013

I have to wonder why mem intrinsics allow an alignment of zero in the first
place... Can't it just be declared illegal and caught by the verifier?

@llvmbot llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 4, 2021
This issue was closed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 bugzilla Issues migrated from bugzilla
Projects
None yet
Development

No branches or pull requests

3 participants