GEP inbound with negative offset generated by loop vectorizer #47470

zhengyang92 · 2020-11-10T02:55:37Z


Bugzilla Link	48126
Version	trunk
OS	All
CC	@DMG862,@efriedma-quic,@fhahn,@jdoerfert,@aqjune,@LebedevRI,@zhengyang92,@nunoplopes,@regehr,@rotateright

Extended Description

Test case from test/Transforms/LoopVectorize/gcc-examples.ll

;CHECK-LABEL: @example21(
;CHECK: load <4 x i32>
;CHECK: shufflevector {{.}} <i32 3, i32 2, i32 1, i32 0>
;CHECK: ret i32
define i32 @example21(i32 nocapture %b, i32 %n) nounwind uwtable readonly ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge

.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
br label %3

; :3 ; preds = %.lr.ph, %3
%indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
%a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %a.02
%7 = trunc i64 %indvars.iv.next to i32
%8 = icmp sgt i32 %7, 0
br i1 %8, label %3, label %._crit_edge

._crit_edge: ; preds = %3, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ]
ret i32 %a.0.lcssa
}

Below is the vectorized function after calling opt with the arguments listed in the test case (-basic-aa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine).

; Function Attrs: nounwind readonly ssp uwtable
define i32 @example21(i32* nocapture %b, i32 %n) #0 {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge

.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
%3 = add i32 %n, -1
%4 = zext i32 %3 to i64
%5 = add nuw nsw i64 %4, 1
%min.iters.check = icmp ult i32 %3, 3
br i1 %min.iters.check, label %scalar.ph, label %vector.ph

vector.ph: ; preds = %.lr.ph
%n.vec = and i64 %5, 8589934588
%ind.end = sub nsw i64 %2, %n.vec
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body ]
%6 = xor i64 %index, -1
%7 = add i64 %6, %2
%8 = getelementptr inbounds i32, i32* %b, i64 -3
%9 = getelementptr inbounds i32, i32* %8, i64 %7
%10 = bitcast i32* %9 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %10, align 4
%reverse = shufflevector <4 x i32> %wide.load, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%11 = add <4 x i32> %reverse, %vec.phi
%index.next = add i64 %index, 4
%12 = icmp eq i64 %index.next, %n.vec
br i1 %12, label %middle.block, label %vector.body, !llvm.loop !0

middle.block: ; preds = %vector.body
%rdx.shuf = shufflevector <4 x i32> %11, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = add <4 x i32> %11, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%bin.rdx2 = add <4 x i32> %bin.rdx, %rdx.shuf1
%13 = extractelement <4 x i32> %bin.rdx2, i32 0
%cmp.n = icmp eq i64 %5, %n.vec
br i1 %cmp.n, label %._crit_edge.loopexit, label %scalar.ph

scalar.ph: ; preds = %middle.block, %.lr.ph
%bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ %2, %.lr.ph ]
%bc.merge.rdx = phi i32 [ %13, %middle.block ], [ 0, %.lr.ph ]
br label %14

14: ; preds = %14, %scalar.ph
%indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %14 ]
%a.02 = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %17, %14 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%15 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%16 = load i32, i32* %15, align 4
%17 = add nsw i32 %16, %a.02
%18 = trunc i64 %indvars.iv.next to i32
%19 = icmp sgt i32 %18, 0
br i1 %19, label %14, label %._crit_edge.loopexit, !llvm.loop !2

._crit_edge.loopexit: ; preds = %middle.block, %14
%.lcssa = phi i32 [ %17, %14 ], [ %13, %middle.block ]
br label %._crit_edge

._crit_edge: ; preds = %._crit_edge.loopexit, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %.lcssa, %._crit_edge.loopexit ]
ret i32 %a.0.lcssa
}

attributes #0 = { nounwind readonly ssp uwtable }

!0 = distinct !{#0, !1}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = distinct !{#2, !3, !1}
!3 = !{!"llvm.loop.unroll.runtime.disable"}

Here %8 is a generated gep inbound with negative offset. The source in this transformation is more defined than target.

Alive2: https://alive2.llvm.org/ce/z/g2VueK

llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 10, 2021

nunoplopes added the miscompilation label May 26, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

GEP inbound with negative offset generated by loop vectorizer #47470

GEP inbound with negative offset generated by loop vectorizer #47470

zhengyang92 commented Nov 10, 2020

GEP inbound with negative offset generated by loop vectorizer #47470

GEP inbound with negative offset generated by loop vectorizer #47470

Comments

zhengyang92 commented Nov 10, 2020

Extended Description