SimpleLoopUnswitch hoists branches on poison/undef (Wrong code at -O3 on x86_64-linux-gnu) #50387

zhendongsu · 2021-07-09T21:50:57Z


Bugzilla Link	51043
Version	trunk
OS	All
CC	@aeubanks,@alinas,@aqjune,@nunoplopes,@regehr,@sjoerdmeijer,@rotateright

Extended Description

[706] % clangtk -v
clang version 13.0.0 (https://github.com/llvm/llvm-project.git 4a3b055)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /local/suz-local/opfuzz/bin
Found candidate GCC installation: /usr/lib/gcc/i686-linux-gnu/8
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/6
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/6.5.0
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/7
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/7.5.0
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8
Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/7.5.0
Candidate multilib: .;@m64
Candidate multilib: 32;@m32
Candidate multilib: x32;@MX32
Selected multilib: .;@m64
[707] %
[707] % clangtk -O2 small.c; ./a.out
[708] %
[708] % clangtk -O3 small.c
[709] % ./a.out
Aborted
[710] %
[710] % cat small.c
int a = 1, b = 1;
int main() {
short d, g, i;
int e = 34000, h;
d = b;
g = 100 | b;
L1:
i = g;
L2:
g = ~(d / e);
e = ~((2 / g) & d);
h = a;
while (!e) {
a = b;
e = ~(1L << i);
}
if (g > 0)
goto L2;
if (!g)
goto L1;
if (h < e)
__builtin_abort ();
return 0;
}

The text was updated successfully, but these errors were encountered:

zhendongsu · 2021-07-09T21:54:26Z

Compiler Explorer: https://godbolt.org/z/oao731Ksc

rotateright · 2021-07-12T23:21:08Z

I think the bug is in or uncovered by the SimpleLoopUnswitchPass. At -O3, the pass differs because it takes a parameter to allow "non-trivial" unswitching.

This can be controlled with a debug flag, so we can avoid the bug by changing the clang invocation with:
-mllvm -enable-npm-O3-nontrivial-unswitch=0

rotateright · 2021-07-12T23:25:52Z

I don't know anything about the SimpleLoopUnswitch pass, so cc'ing some people based on where this probably became visible:
https://reviews.llvm.org/D94559

rotateright · 2021-07-12T23:27:09Z

I managed to show the bug in a reduction for Alive2 by pasting the before/after IR with SimpleLoopUnswitch:

https://alive2.llvm.org/ce/z/kUAyji

aeubanks · 2021-07-13T18:07:50Z

Managed to compile alive locally and use llvm-reduce:

$ cat /tmp/reduce.sh
#!/bin/bash
$H O M E / r e p o s / l l v m - p r o j e c t 2 / b u i l d / r e l / b i n / o p t - p a s s e s =^{'} s i m p l e - l o o p - u n s w i t c h^{'}$ @ -S -o /tmp/b.ll
$H O M E / r e p o s / a l i v e 2 / b u i l d / a l i v e - t v$ @ /tmp/b.ll | rg -F "Transformation doesn't verify!"

$ ./build/rel/bin/llvm-reduce --test=/tmp/reduce.sh /tmp/a.ll
$ cat reduce.ll
define i32 @src() {
entry:
%conv1 = shl i32 undef, 16
%sext31 = ashr exact i32 %conv1, 16
br label %L1

L1: ; preds = %if.end, %entry
%e.0 = phi i32 [ 34000, %entry ], [ undef, %if.end ]
%tobool.not = icmp eq i32 undef, -1
br label %L2

L2: ; preds = %while.end, %L1
%e.1 = phi i32 [ %e.0, %L1 ], [ undef, %while.end ]
%div = sdiv i32 %sext31, %e.1
%0 = trunc i32 %div to i16
%1 = trunc i32 %div to i16
%div5.rhs.trunc = xor i16 %1, -1
%div534 = sdiv i16 2, %div5.rhs.trunc
%div5.sext = sext i16 %div534 to i32
%and = and i32 %div5.sext, %sext31
%tobool.not36 = icmp eq i32 %and, -1
br i1 %tobool.not36, label %while.body.lr.ph, label %while.end

while.body.lr.ph: ; preds = %L2
br i1 %tobool.not, label %while.body.lr.ph.split, label %while.cond.while.end_crit_edge

while.body.lr.ph.split: ; preds = %while.body.lr.ph
ret i32 undef

while.cond.while.end_crit_edge: ; preds = %while.body.lr.ph
br label %while.end

while.end: ; preds = %while.cond.while.end_crit_edge, %L2
%cmp = icmp slt i16 %0, -1
br i1 %cmp, label %L2, label %if.end

if.end: ; preds = %while.end
%div.lcssa = phi i32 [ %div, %while.end ]
%2 = trunc i32 %div.lcssa to i16
%tobool13.not = icmp eq i16 %2, -1
br i1 %tobool13.not, label %L1, label %if.end15

if.end15: ; preds = %if.end
ret i32 undef
}

aeubanks · 2021-07-13T22:10:08Z

Manually reduced further to

define i32 @foo(i1 %arg) {
bb:
br label %bb1

bb1: ; preds = %bb
br label %bb2

bb2: ; preds = %bb5, %bb4, %bb1
br i1 false, label %bb3, label %bb4

bb3: ; preds = %bb2
br i1 %arg, label %bb6, label %bb4

bb4: ; preds = %bb3, %bb2
br i1 false, label %bb2, label %bb5

bb5: ; preds = %bb4
br i1 false, label %bb2, label %bb6

bb6: ; preds = %bb5, %bb3
ret i32 1
}

========================>

define i32 @foo(i1 %arg) {
bb:
br label %bb1

bb1: ; preds = %bb
br i1 %arg, label %bb1.split.us, label %bb1.split

bb1.split.us: ; preds = %bb1
br label %bb2.us

bb2.us: ; preds = %bb2.backedge.us, %bb1.split.us
br i1 false, label %bb3.us, label %bb4.us

bb3.us: ; preds = %bb2.us
br label %bb6.split.us

bb4.us: ; preds = %bb2.us
br i1 false, label %bb2.backedge.us, label %bb5.us

bb5.us: ; preds = %bb4.us
br i1 false, label %bb2.backedge.us, label %bb6.split.us.loopexit

bb2.backedge.us: ; preds = %bb5.us, %bb4.us
br label %bb2.us

bb6.split.us.loopexit: ; preds = %bb5.us
br label %bb6.split.us

bb6.split.us: ; preds = %bb6.split.us.loopexit, %bb3.us
br label %bb6

bb1.split: ; preds = %bb1
br label %bb2

bb2: ; preds = %bb2.backedge, %bb1.split
br i1 false, label %bb3, label %bb4

bb3: ; preds = %bb2
br label %bb4

bb4: ; preds = %bb3, %bb2
br i1 false, label %bb2.backedge, label %bb5

bb2.backedge: ; preds = %bb4, %bb5
br label %bb2

bb5: ; preds = %bb4
br i1 false, label %bb2.backedge, label %bb6.split

bb6.split: ; preds = %bb5
br label %bb6

bb6: ; preds = %bb6.split.us, %bb6.split
ret i32 1
}

aeubanks · 2021-07-13T22:14:28Z

Alive complains when %arg is undef, adding noundef makes alive happy.

aeubanks · 2021-07-13T22:21:14Z

I'm having trouble understanding the reduced test case and Alive's issue with it.

simplifycfg simplifies both the pre and post unswitching cases to just ret i32 1.

nunoplopes · 2021-07-14T11:30:23Z

The issue is a known bug in loop unswitching.

The original code (the last reduced case) doesn't reach bb3, hence it never branches on %arg.
However, the optimized code hoists the branch on %arg such that bb1 is reachable.

Since branching on undef/poison is UB, the optimization introduces UB if %arg happens to be undef/poison.

The fix is easy: add freeze on any hoisted branches. We've been working on reducing potential perf regressions around freeze as last time Juneyoung tried to fix loop unswitch someone complained about a perf regression. We have a GSoC student focused just on freeze perf, so I guess we can move forward with the fix now.

llvmbot · 2021-07-14T11:37:55Z

I think I can solve it without difficulty.
Is it okay if I upload a patch that solves this problem?

rotateright · 2021-07-14T12:48:57Z

I think I can solve it without difficulty.
Is it okay if I upload a patch that solves this problem?

That sounds great to me. I don't know enough details, so I'm not the best reviewer.

Some links I dug up for reference:
https://reviews.llvm.org/D29015 [SimpleLoopUnswitch] Fix introduction of UB when hoisted condition may be undef or poison
https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20200224/748416.html - commit and revert discussion
bug 31652
bug 27506

regehr · 2021-07-14T16:57:54Z

hi I just want to throw in that I have a fork of llvm-reduce that is specifically intended to do a better job on inputs to Alive2. just build this branch:

https://github.com/regehr/llvm-project/tree/new-llvm-reduce-pass

I didn't yet have time to try it out on this example.

llvmbot · 2021-07-16T11:24:56Z

While investigating regressions after a loop-unswitch patch, I found that scalar evolution can be improved in this case.

define void @f(i32 %n) {
entry:
%n.fr = freeze i32 %n
%nonzero = icmp slt i32 0, %n.fr
br i1 %nonzero, label %loop, label %exit
exit:
ret void
loop:
%i = phi i32 [%i.next, %loop], [0, %entry]
call void @g() willreturn
%i.next = add i32 %i, 1
%cond_next = icmp slt i32 %i, %n
br i1 %cond_next, label %loop, label %exit2
exit2:
ret void
}

declare void @g() willreturn

Currently, scalar evolution says backedge-taken count is (0 smax %n), but this can be %n.

https://alive2.llvm.org/ce/z/EamykF

If %n is poison, it will anyway raise UB because there is br i1 %cond_next.
If %n is undef, icmp slt i32 %i, %n is also undef, so it is UB.
I'm not sure whether this will still work if %i starts from a non-constant value though.

llvmbot · 2021-07-16T11:28:30Z

To check that the backedge count is correct, I mimiced the alive2 test that is linked at https://reviews.llvm.org/D93882.

aqjune · 2021-07-17T05:24:59Z

(I found that the Alive2 test needed small correction: -src-unroll and -tgt-unroll had to be given, and minor fixes should have been done in the test. https://alive2.llvm.org/ce/z/7bHWAZ is the right one.
The previous test was actually written by myself, so this is my mistake.)

Not sure whether it can be generalized further (modeling freeze in SCEV is a hard problem), but at least for the simple case, the loop iteration count seems correct to me.
This can be explained without relying on complex reasoning about undef/poison values.

I'm going to show that the backedge-taken count result of the loop must be equivalent to the result of a loop without freeze.

To reach the exit block, it should at least go through two conditional branches:
(1) The branch one at entry: 'br (icmp slt i32 0, freeze(%n))'
(2) The branch in the first iteration of loop: 'br (icmp slt i32 0, %n)' (since %i = 0 in the first iter.)

For (1), it is known that 'icmp slt i32 0, freeze(%n)' = 'freeze (icmp slt i32 0, %n)' (https://alive2.llvm.org/ce/z/ecPDzU).
I'll slightly simplify (1)'s branch as 'br (freeze (icmp slt i32 0, %n))', which is the exactly frozen form of (2).

IIUC, the backedge-taken count makes sense only if there was no UB before reaching the exit block.
Therefore, both (1) and (2) must not have raised UB at the exit block point.
And, (2) being well-defined implies that (1) did not need freeze..!

Again, it isn't clear whether this can be generalized; for this specific case, it seems fine.

aeubanks · 2021-08-02T17:32:19Z

Any update on this?

aqjune · 2021-08-03T04:24:06Z

Hi Aurthur,

Hyeongyu wrote https://reviews.llvm.org/D106041 and is running experiments to see performance regressions.

But would it be better if the patch is landed first? Perhaps leaving LLVM in an unsound state is not a good idea.

aeubanks · 2021-08-03T06:18:29Z

Non-trivial unswitching was only enabled for -O3 for the new PM in https://reviews.llvm.org/D94559, many years after it had been in use in production. I think that only microbenchmarks should be largely affected, but those may be important. For example, making sure that something like #48059 doesn't regress is important.

nunoplopes · 2021-08-30T18:21:25Z

*** Bug llvm/llvm-bugzilla-archive#51671 has been marked as a duplicate of this bug. ***

aeubanks · 2021-11-27T04:43:27Z

mentioned in issue llvm/llvm-bugzilla-archive#51671

nunoplopes · 2021-11-27T04:43:28Z

mentioned in issue llvm/llvm-bugzilla-archive#51671

fhahn · 2022-03-21T19:27:14Z

the original clang reproducer doesn't cause any issues now, but the underlying issue is still there. @aqjune do you know if there are any remaining blockers for enabling -freeze-loop-unswitch-cond=true?

aqjune · 2022-03-21T21:29:45Z

No, I am not aware of any blockers..!
Time to finally set the flag to true? (sorry for typo ;)

fhahn · 2022-03-22T20:37:23Z

No, I am not aware of any blockers..!

let me collect some numbers to sanity check

fhahn · 2022-04-22T12:15:16Z

No, I am not aware of any blockers..!

let me collect some numbers to sanity check

Finally got around to that. There were no performance regressions on the AArch64 system I tried with O3 in SPEC2006,SPEC2017 and an internal benchmark suite.

Patch to flip the default: https://reviews.llvm.org/D124252

nunoplopes · 2022-04-25T16:38:06Z

Thanks, this fixed 3 tests:

LLVM :: Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll
LLVM :: Transforms/SimpleLoopUnswitch/guards.ll
LLVM :: Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll

3 more to go, though!

fhahn · 2022-04-26T07:54:31Z

@nunoplopes what are the remaining 3?

nunoplopes · 2022-04-26T09:01:42Z

Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll
Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch.ll

See here: https://web.ist.utl.pt/nuno.lopes/alive2/index.php?hash=1fb52562ffabc05d

fhahn · 2022-04-26T21:20:30Z

Sounds good, I'll look in to trivial unswitching case tomorrow. Is there an issue for those?

nunoplopes · 2022-04-26T21:27:22Z

Thank you!
There's no bug report on those AFAIK. I didn't reduce those test cases as I thought they were all the same, sorry.

fhahn · 2022-04-27T20:53:49Z

I put up https://reviews.llvm.org/D124554, https://reviews.llvm.org/D124549, https://reviews.llvm.org/D124526 to fix some of the remaining issues. After that, there's one failure left I think. Lots of tests seem to time out though.

fhahn · 2022-05-06T10:11:10Z

I put up https://reviews.llvm.org/D124554, https://reviews.llvm.org/D124549, https://reviews.llvm.org/D124526 to fix some of the remaining issues. After that, there's one failure left I think. Lots of tests seem to time out though.

All patches have now landed, I'm not seeing any verification failures for SimpleLoopUnswitch locally any longer.

nunoplopes · 2022-05-10T10:31:07Z

I put up https://reviews.llvm.org/D124554, https://reviews.llvm.org/D124549, https://reviews.llvm.org/D124526 to fix some of the remaining issues. After that, there's one failure left I think. Lots of tests seem to time out though.

All patches have now landed, I'm not seeing any verification failures for SimpleLoopUnswitch locally any longer.

Confirmed, all SLU tests pass now. Thank you @fhahn!

llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 11, 2021

llvmbot added the confirmed label Jan 26, 2022

EugeneZelenko added the llvm:optimizations label Mar 21, 2022

fhahn mentioned this issue Mar 22, 2022

wrong code at -O3 on x86_64-linux-gnu (generated code hangs) #38846

Closed

fhahn changed the title ~~wrong code at -O3 on x86_64-linux-gnu~~ SimpleLoopUnswitch hoists branches on poison/undef (Wrong code at -O3 on x86_64-linux-gnu) Mar 22, 2022

fhahn added the miscompilation label Mar 24, 2022

fhahn mentioned this issue Apr 11, 2022

wrong code at -O3 on x86_64-linux-gnu #50430

Closed

fhahn closed this as completed in 6a6cc55 Apr 25, 2022

tavianator mentioned this issue Oct 14, 2022

Valgrind triggered on correct rust code - Potential miscompilation rust-lang/rust#102712

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

SimpleLoopUnswitch hoists branches on poison/undef (Wrong code at -O3 on x86_64-linux-gnu) #50387

SimpleLoopUnswitch hoists branches on poison/undef (Wrong code at -O3 on x86_64-linux-gnu) #50387

zhendongsu commented Jul 9, 2021

zhendongsu commented Jul 9, 2021

rotateright commented Jul 12, 2021

rotateright commented Jul 12, 2021

rotateright commented Jul 12, 2021

aeubanks commented Jul 13, 2021

aeubanks commented Jul 13, 2021

aeubanks commented Jul 13, 2021

aeubanks commented Jul 13, 2021

nunoplopes commented Jul 14, 2021

llvmbot commented Jul 14, 2021

rotateright commented Jul 14, 2021

regehr commented Jul 14, 2021

llvmbot commented Jul 16, 2021

llvmbot commented Jul 16, 2021

aqjune commented Jul 17, 2021

aeubanks commented Aug 2, 2021

aqjune commented Aug 3, 2021

aeubanks commented Aug 3, 2021

nunoplopes commented Aug 30, 2021

aeubanks commented Nov 27, 2021

nunoplopes commented Nov 27, 2021

fhahn commented Mar 21, 2022

aqjune commented Mar 21, 2022 •

edited

Loading

fhahn commented Mar 22, 2022

fhahn commented Apr 22, 2022

nunoplopes commented Apr 25, 2022

fhahn commented Apr 26, 2022

nunoplopes commented Apr 26, 2022

fhahn commented Apr 26, 2022

nunoplopes commented Apr 26, 2022

fhahn commented Apr 27, 2022

fhahn commented May 6, 2022

nunoplopes commented May 10, 2022

SimpleLoopUnswitch hoists branches on poison/undef (Wrong code at -O3 on x86_64-linux-gnu) #50387

SimpleLoopUnswitch hoists branches on poison/undef (Wrong code at -O3 on x86_64-linux-gnu) #50387

Comments

zhendongsu commented Jul 9, 2021

Extended Description

zhendongsu commented Jul 9, 2021

rotateright commented Jul 12, 2021

rotateright commented Jul 12, 2021

rotateright commented Jul 12, 2021

aeubanks commented Jul 13, 2021

aeubanks commented Jul 13, 2021

aeubanks commented Jul 13, 2021

aeubanks commented Jul 13, 2021

nunoplopes commented Jul 14, 2021

llvmbot commented Jul 14, 2021

rotateright commented Jul 14, 2021

regehr commented Jul 14, 2021

llvmbot commented Jul 16, 2021

llvmbot commented Jul 16, 2021

aqjune commented Jul 17, 2021

aeubanks commented Aug 2, 2021

aqjune commented Aug 3, 2021

aeubanks commented Aug 3, 2021

nunoplopes commented Aug 30, 2021

aeubanks commented Nov 27, 2021

nunoplopes commented Nov 27, 2021

fhahn commented Mar 21, 2022

aqjune commented Mar 21, 2022 • edited Loading

fhahn commented Mar 22, 2022

fhahn commented Apr 22, 2022

nunoplopes commented Apr 25, 2022

fhahn commented Apr 26, 2022

nunoplopes commented Apr 26, 2022

fhahn commented Apr 26, 2022

nunoplopes commented Apr 26, 2022

fhahn commented Apr 27, 2022

fhahn commented May 6, 2022

nunoplopes commented May 10, 2022

aqjune commented Mar 21, 2022 •

edited

Loading