Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GEP inbound with negative offset generated by loop vectorizer #47470

Open
zhengyang92 opened this issue Nov 10, 2020 · 0 comments
Open

GEP inbound with negative offset generated by loop vectorizer #47470

zhengyang92 opened this issue Nov 10, 2020 · 0 comments
Labels
bugzilla Issues migrated from bugzilla loopoptim miscompilation

Comments

@zhengyang92
Copy link
Contributor

Bugzilla Link 48126
Version trunk
OS All
CC @DMG862,@efriedma-quic,@fhahn,@jdoerfert,@aqjune,@LebedevRI,@zhengyang92,@nunoplopes,@regehr,@rotateright

Extended Description

Test case from test/Transforms/LoopVectorize/gcc-examples.ll

;CHECK-LABEL: @​example21(
;CHECK: load <4 x i32>
;CHECK: shufflevector {{.}} <i32 3, i32 2, i32 1, i32 0>
;CHECK: ret i32
define i32 @​example21(i32
nocapture %b, i32 %n) nounwind uwtable readonly ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge

.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
br label %3

; :3 ; preds = %.lr.ph, %3
%indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
%a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %a.02
%7 = trunc i64 %indvars.iv.next to i32
%8 = icmp sgt i32 %7, 0
br i1 %8, label %3, label %._crit_edge

._crit_edge: ; preds = %3, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ]
ret i32 %a.0.lcssa
}

Below is the vectorized function after calling opt with the arguments listed in the test case (-basic-aa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine).

; Function Attrs: nounwind readonly ssp uwtable
define i32 @​example21(i32* nocapture %b, i32 %n) #​0 {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge

.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
%3 = add i32 %n, -1
%4 = zext i32 %3 to i64
%5 = add nuw nsw i64 %4, 1
%min.iters.check = icmp ult i32 %3, 3
br i1 %min.iters.check, label %scalar.ph, label %vector.ph

vector.ph: ; preds = %.lr.ph
%n.vec = and i64 %5, 8589934588
%ind.end = sub nsw i64 %2, %n.vec
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body ]
%6 = xor i64 %index, -1
%7 = add i64 %6, %2
%8 = getelementptr inbounds i32, i32* %b, i64 -3
%9 = getelementptr inbounds i32, i32* %8, i64 %7
%10 = bitcast i32* %9 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %10, align 4
%reverse = shufflevector <4 x i32> %wide.load, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%11 = add <4 x i32> %reverse, %vec.phi
%index.next = add i64 %index, 4
%12 = icmp eq i64 %index.next, %n.vec
br i1 %12, label %middle.block, label %vector.body, !llvm.loop !​0

middle.block: ; preds = %vector.body
%rdx.shuf = shufflevector <4 x i32> %11, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = add <4 x i32> %11, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%bin.rdx2 = add <4 x i32> %bin.rdx, %rdx.shuf1
%13 = extractelement <4 x i32> %bin.rdx2, i32 0
%cmp.n = icmp eq i64 %5, %n.vec
br i1 %cmp.n, label %._crit_edge.loopexit, label %scalar.ph

scalar.ph: ; preds = %middle.block, %.lr.ph
%bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ %2, %.lr.ph ]
%bc.merge.rdx = phi i32 [ %13, %middle.block ], [ 0, %.lr.ph ]
br label %14

14: ; preds = %14, %scalar.ph
%indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %14 ]
%a.02 = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %17, %14 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%15 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%16 = load i32, i32* %15, align 4
%17 = add nsw i32 %16, %a.02
%18 = trunc i64 %indvars.iv.next to i32
%19 = icmp sgt i32 %18, 0
br i1 %19, label %14, label %._crit_edge.loopexit, !llvm.loop !​2

._crit_edge.loopexit: ; preds = %middle.block, %14
%.lcssa = phi i32 [ %17, %14 ], [ %13, %middle.block ]
br label %._crit_edge

._crit_edge: ; preds = %._crit_edge.loopexit, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %.lcssa, %._crit_edge.loopexit ]
ret i32 %a.0.lcssa
}

attributes #​0 = { nounwind readonly ssp uwtable }

!​0 = distinct !{#0, !​1}
!​1 = !{!"llvm.loop.isvectorized", i32 1}
!​2 = distinct !{#2, !​3, !​1}
!​3 = !{!"llvm.loop.unroll.runtime.disable"}

Here %8 is a generated gep inbound with negative offset. The source in this transformation is more defined than target.

Alive2: https://alive2.llvm.org/ce/z/g2VueK

@llvmbot llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 10, 2021
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bugzilla Issues migrated from bugzilla loopoptim miscompilation
Projects
None yet
Development

No branches or pull requests

2 participants