LLVM has no clue how to lower memcpy, memset, or memmove intrinsics with 0 alignment #15720

chandlerc · 2013-02-25T15:02:38Z


Bugzilla Link	15348
Resolution	FIXED
Resolved on	Feb 27, 2013 02:27
Version	trunk
OS	All
CC	@lhames

Extended Description

My test case is currently C++, and triggered due to r175389 emitting memcpy more often. It was reduced (very greatly) from TargetInfo in a bootstrap...

% cat bitfield.cpp
struct S {
S() {}
S(const S&) {}
~S() {}
void *ptr1, *ptr2;
unsigned long x;
};

struct X {
unsigned b00 : 1;
unsigned b01 : 1;
unsigned b02 : 1;
unsigned b03 : 1;
unsigned b04 : 1;
unsigned b05 : 1;
unsigned b06 : 1;
unsigned b07 : 1;
unsigned b08 : 1;
unsigned b09 : 1;
unsigned b10 : 1;
unsigned b11 : 1;
unsigned b12 : 1;
unsigned b13 : 1;
unsigned b14 : 1;
unsigned b15 : 1;
unsigned u;
unsigned b16 : 1;
unsigned b17 : 1;
unsigned b18 : 1;
unsigned b19 : 1;
unsigned u2;
unsigned b20 : 1;
unsigned b21 : 1;
unsigned b22 : 1;
unsigned b23 : 1;

S s;

enum { A, B, C } e1;
enum { AA, BB, CC } e2;
};

X f(const X &x) { return x; }

int main() {
X x;
X y = f(x);
}

% ./bin/clang++ -m32 -march=pentium3 -msse2 -O0 -std=c++03 -o bitfield bitfield.cpp

% ./bitfield
zsh: segmentation fault ./bitfield

% gdb --args ./bitfield
(gdb) r
Starting program: ./bitfield

Program received signal SIGSEGV, Segmentation fault.
0x08048774 in X::X(X const&) ()
(gdb) disass
Dump of assembler code for function ZN1XC2ERKS:
0x08048750 <+0>: push %ebp
0x08048751 <+1>: mov %esp,%ebp
0x08048753 <+3>: push %ebx
0x08048754 <+4>: sub $0x14,%esp
0x08048757 <+7>: mov 0xc(%ebp),%eax
0x0804875a <+10>: mov 0x8(%ebp),%ecx
0x0804875d <+13>: mov %ecx,-0x8(%ebp)
0x08048760 <+16>: mov %eax,-0xc(%ebp)
0x08048763 <+19>: mov -0x8(%ebp),%eax
0x08048766 <+22>: mov %eax,%ecx
0x08048768 <+24>: mov -0xc(%ebp),%edx
0x0804876b <+27>: mov 0x10(%edx),%bl
0x0804876e <+30>: mov %bl,0x10(%ecx)
0x08048771 <+33>: movaps (%edx),%xmm0
=> 0x08048774 <+36>: movaps %xmm0,(%ecx)
0x08048777 <+39>: mov %eax,%ecx
0x08048779 <+41>: add $0x14,%ecx
0x0804877f <+47>: mov -0xc(%ebp),%edx
0x08048782 <+50>: add $0x14,%edx
0x08048788 <+56>: mov %ecx,(%esp)
0x0804878b <+59>: mov %edx,0x4(%esp)
0x0804878f <+63>: mov %eax,-0x10(%ebp)
0x08048792 <+66>: call 0x80487b0 <ZN1SC1ERKS>
0x08048797 <+71>: mov -0xc(%ebp),%eax
0x0804879a <+74>: mov 0x20(%eax),%ecx
0x0804879d <+77>: mov -0x10(%ebp),%edx
0x080487a0 <+80>: mov %ecx,0x20(%edx)
0x080487a3 <+83>: mov 0x24(%eax),%eax
0x080487a6 <+86>: mov %eax,0x24(%edx)
0x080487a9 <+89>: add $0x14,%esp
0x080487ac <+92>: pop %ebx
0x080487ad <+93>: pop %ebp
0x080487ae <+94>: ret
End of assembler dump.

chandlerc · 2013-02-25T15:19:13Z

Reduced fully. This one goes into my "how did this ever work???" category:

% cat memcpy.ll
; ModuleID = 'bitfield.cpp'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"

define void @f(i8* %a, i8* %b) #12 {
entry:
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
ret void
}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)

attributes #12 = { inlinehint "target-cpu"="pentium3" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,-sse3" }

% ./bin/llc -o - memcpy.ll
.file "memcpy.ll"
.text
.globl f
.align 16, 0x90
.type f,@function
f: # @f
.cfi_startproc

BB#0: # %entry

    movl    8(%esp), %eax
    movb    16(%eax), %dl
    movl    4(%esp), %ecx
    movb    %dl, 16(%ecx)
    movaps  (%eax), %xmm0
    movaps  %xmm0, (%ecx)
    ret

.Ltmp0:
.size f, .Ltmp0-f
.cfi_endproc

    .section        ".note.GNU-stack","",@progbits

Magically, we know that this raw i8* out of no where is 16-byte aligned. Special that...

chandlerc · 2013-02-25T15:42:53Z

This is a totally generic goof:

% cat memcpy.ll
define void @f(i8* %a, i8* %b) {
entry:
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
ret void
}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)

% ./bin/llc -mtriple x86_64-unknown-linux-gnu -o - memcpy.ll
.file "memcpy.ll"
.text
.globl f
.align 16, 0x90
.type f,@function
f: # @f
.cfi_startproc

BB#0: # %entry

    movb    16(%rsi), %al
    movb    %al, 16(%rdi)
    movaps  (%rsi), %xmm0
    movaps  %xmm0, (%rdi)
    ret

.Ltmp0:
.size f, .Ltmp0-f
.cfi_endproc

    .section        ".note.GNU-stack","",@progbits

the SDAG does exactly the wrong thing with alignment-0 mem* intrinsics... fix incoming...

chandlerc · 2013-02-25T16:21:25Z

Should be fixed in r176022.

lhames · 2013-02-27T06:28:25Z

The related issue of clang emitting underaligned memcpys has been fixed in r176163.

llvmbot · 2013-02-27T10:27:56Z

I have to wonder why mem intrinsics allow an alignment of zero in the first
place... Can't it just be declared illegal and caught by the verifier?

llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 4, 2021

This issue was closed.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

LLVM has no clue how to lower memcpy, memset, or memmove intrinsics with 0 alignment #15720

LLVM has no clue how to lower memcpy, memset, or memmove intrinsics with 0 alignment #15720

chandlerc commented Feb 25, 2013

chandlerc commented Feb 25, 2013

chandlerc commented Feb 25, 2013

chandlerc commented Feb 25, 2013

lhames commented Feb 27, 2013

llvmbot commented Feb 27, 2013

LLVM has no clue how to lower memcpy, memset, or memmove intrinsics with 0 alignment #15720

LLVM has no clue how to lower memcpy, memset, or memmove intrinsics with 0 alignment #15720

Comments

chandlerc commented Feb 25, 2013

Extended Description

chandlerc commented Feb 25, 2013

BB#0: # %entry

chandlerc commented Feb 25, 2013

BB#0: # %entry

chandlerc commented Feb 25, 2013

lhames commented Feb 27, 2013

llvmbot commented Feb 27, 2013