Miscompile of fabs due to vectorization #20728

nico · 2014-07-18T06:42:06Z


Bugzilla Link	20354
Resolution	FIXED
Resolved on	Aug 05, 2014 12:55
Version	trunk
OS	All
CC	@chandlerc,@hfinkel,@isanbard,@rotateright

Extended Description

Nicos-MacBook-Pro:clang thakis$ cat test.cc
#include <math.h>
#include <stdio.h>

int main() {
float v[2] = {-1, -1};
printf("%p\n", v); // (needed to trigger the bug, else not needed)
float f = fabs(v[0]);
printf("%f\n", f < fabs(v[1]) ? f : 0);
return 0;
}
Nicos-MacBook-Pro:clang thakis$ ~/src/llvm-build/bin/clang -o foo -O2 test.cc -isysroot $(xcrun --show-sdk-path)
Nicos-MacBook-Pro:clang thakis$ ./foo
0x7fff53f3aaf8
-1.000000

What! Works fine with minor changes to the code.

Generated llvm looks fine:
Nicos-MacBook-Pro:clang thakis$ ~/src/llvm-build/bin/clang -o foo -O2 test.cc -isysroot $(xcrun --show-sdk-path) -S -o - -emit-llvm
; ModuleID = 'test.cc'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"

@.str = private unnamed_addr constant [4 x i8] c"%p\0A\00", align 1
@.str1 = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1

; Function Attrs: nounwind ssp uwtable
define i32 @main() #0 {
entry:
%v = alloca i64, align 8
store i64 -4647714812233515008, i64* %v, align 8
%call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i64* %v)
%0 = load i64* %v, align 8
%1 = bitcast i64 %0 to <2 x float>
%2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %1)
%3 = extractelement <2 x float> %2, i32 0
%4 = extractelement <2 x float> %2, i32 1
%cmp = fcmp olt float %3, %4
%5 = fpext float %3 to double
%conv7 = select i1 %cmp, double %5, double 0.000000e+00
%call8 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), double %conv7)
ret i32 0
}

; Function Attrs: nounwind
declare i32 @printf(i8* nocapture readonly, ...) #1

; Function Attrs: nounwind readnone
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #2

attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }

!llvm.ident = !{#0}

!0 = metadata !{metadata !"clang version 3.5.0 (213220)"}

Generated asm:
Nicos-MacBook-Pro:clang thakis$ ~/src/llvm-build/bin/clang -o foo -O2 test.cc -isysroot $(xcrun --show-sdk-path) -S -o -
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 9
.globl _main
.align 4, 0x90
_main: ## @main
.cfi_startproc

BB#0: ## %entry

pushq	%rbp

Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $16, %rsp
movabsq $-4647714812233515008, %rax ## imm = 0xBF800000BF800000
movq %rax, -8(%rbp)
leaq L_.str(%rip), %rdi
leaq -8(%rbp), %rsi
xorl %eax, %eax
callq _printf
movabsq $9223372036854775807, %rax ## imm = 0x7FFFFFFFFFFFFFFF
andq -8(%rbp), %rax
movd %rax, %xmm0
pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0]
ucomiss %xmm0, %xmm1
ja LBB0_1

BB#2: ## %entry

pxor	%xmm0, %xmm0
jmp	LBB0_3

LBB0_1:
cvtss2sd %xmm0, %xmm0
LBB0_3: ## %entry
leaq L_.str1(%rip), %rdi
movb $1, %al
callq _printf
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc

.section	__TEXT,__cstring,cstring_literals

L_.str: ## @.str
.asciz "%p\n"

L_.str1: ## @.str1
.asciz "%f\n"

.subsections_via_symbols

The text was updated successfully, but these errors were encountered:

nico · 2014-07-18T06:43:12Z

Chandler, svn log lib/Target/X86/X86ISelLowering.cpp shows a recent commit from you mentioning "pshufd" – do you know anything about this, or do you know who knows about this?

Is there some flag to disable vectorization? This is affecting production code, and I'd like to have some easy-to-deploy workaround.

nico · 2014-07-18T07:01:10Z

majnemer told me that -fno-slp-vectorize disables vectorization, so we (chromium) will build with that until this is resolved.

rotateright · 2014-07-19T00:09:26Z

movabsq $9223372036854775807, %rax ## imm = 0x7FFFFFFFFFFFFFFF

That's a clue that this isn't a shuffling problem.

$ cat bad_abs.ll
define i64 @foo() #0 {
%high_bits = bitcast i64 9223372039002259456 to <2 x float> ; 0x8000000080000000
%zero = call <2 x float> @llvm.fabs.v2f32(<2 x float> %high_bits)
%ret = bitcast <2 x float> %zero to i64
ret i64 %ret
}

; Function Attrs: nounwind readnone
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #2

$ ./llc bad_abs.ll -o -
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 13, 3
.globl _foo
.align 4, 0x90
_foo: ## @foo
.cfi_startproc

BB#0:

movl	$2147483648, %eax       ## imm = 0x80000000
retq

Oops...that's not the zero we were looking for!

rotateright · 2014-07-19T02:07:55Z

Here's a patch to fix this bug. If nobody else gets to it first, I'll add the testcase and submit to the commits list sometime next week.

Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp

--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 213097)
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp (working copy)
@@ -7354,17 +7354,22 @@
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
if (!TLI.isFAbsFree(VT) &&

 N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&

 N0.getOperand(0).getValueType().isInteger() &&

 !N0.getOperand(0).getValueType().isVector()) {

```
 N0.getOpcode() == ISD::BITCAST &&
```
```
 N0.getNode()->hasOneUse()) {
```
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
```
 APInt SignMask;
```
```
 if (N0.getValueType().isVector()) {
```

   SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());

   SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);

```
 } else {
```

   SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());

 }
 Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,

        DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));

                   DAG.getConstant(SignMask, IntVT));
 AddToWorkList(Int.getNode());

 return DAG.getNode(ISD::BITCAST, SDLoc(N),

                    N->getValueType(0), Int);

 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);

}
}

rotateright · 2014-07-22T19:37:31Z

I was expecting the FNEG case to have the same bug, but that case has a different vector check:
!VT.isVector()

visitFABS is doing:
!N0.getOperand(0).getValueType().isVector()

So if we want to generate better code for vectors in visitFNEG, I think we have to remove that check and handle the vector case similarly to my proposed patch in comment 4.

rotateright · 2014-07-23T00:02:16Z

Patch submitted for review:
http://reviews.llvm.org/D4633

nico · 2014-08-04T00:17:47Z

Is it possible to get the fix merged into 3.5, given that this miscompile is affecting real applications in practice? (I see that you're planning to split your patch into several; merging just the fix and not the optimizations would be good.)

rotateright · 2014-08-04T00:35:17Z

I abandoned the previous patch proposal because it was trying to do too much at once.

New patch that just fixes the miscompile here:
http://reviews.llvm.org/D4770

rotateright · 2014-08-04T00:37:15Z

Hi Nico -

Yes, I think we should certainly try to get this fixed for 3.5 if it's not too late.

Cc'ing Bill Wendling for approval (assuming the patch itself is reviewed soon).

rotateright · 2014-08-04T00:50:08Z

Fixed in trunk with r214670 (LLVM):
http://llvm.org/viewvc/llvm-project?view=revision&revision=214670

Bill if you can merge this into the 3.5 branch, I'd appreciate it very much. Thanks!

rotateright · 2014-08-04T01:28:02Z

The test case broke some build bots (leading '.' when using the const pool labels). I hope to have fixed that with:
http://llvm.org/viewvc/llvm-project?view=revision&revision=214674

chandlerc · 2014-08-04T02:49:20Z

The test case broke some build bots (leading '.' when using the const pool
labels). I hope to have fixed that with:
http://llvm.org/viewvc/llvm-project?view=revision&revision=214674

The test case is still broken. I'm trying to fix it but it is quite unclear. Give me a bit to sift through what this test case is actually doing.

rotateright · 2014-08-04T03:47:39Z

The test case broke some build bots (leading '.' when using the const pool
labels). I hope to have fixed that with:
http://llvm.org/viewvc/llvm-project?view=revision&revision=214674

The test case is still broken. I'm trying to fix it but it is quite unclear.
Give me a bit to sift through what this test case is actually doing.

I'm not getting any fail mails since r214674. I see other failures for test cases unrelated to the one that I added.

rotateright · 2014-08-04T17:31:41Z

The test case was improved again with:
http://llvm.org/viewvc/llvm-project?view=revision&revision=214679

Thanks, Chandler!

rotateright · 2014-08-05T19:55:09Z

r214892 gets trunk to generate the optimized code; it's basically the same patch as shown in comment 4.

http://llvm.org/viewvc/llvm-project?view=revision&revision=214892

llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 9, 2021

This issue was closed.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Miscompile of fabs due to vectorization #20728

Miscompile of fabs due to vectorization #20728

nico commented Jul 18, 2014

nico commented Jul 18, 2014

nico commented Jul 18, 2014

rotateright commented Jul 19, 2014

rotateright commented Jul 19, 2014

rotateright commented Jul 22, 2014

rotateright commented Jul 23, 2014

nico commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

chandlerc commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 5, 2014

Miscompile of fabs due to vectorization #20728

Miscompile of fabs due to vectorization #20728

Comments

nico commented Jul 18, 2014

Extended Description

BB#0: ## %entry

BB#2: ## %entry

nico commented Jul 18, 2014

nico commented Jul 18, 2014

rotateright commented Jul 19, 2014

BB#0:

rotateright commented Jul 19, 2014

Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp

rotateright commented Jul 22, 2014

rotateright commented Jul 23, 2014

nico commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

chandlerc commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 4, 2014

rotateright commented Aug 5, 2014