You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Below is the vectorized function after calling opt with the arguments listed in the test case (-basic-aa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine).
Extended Description
Test case from test/Transforms/LoopVectorize/gcc-examples.ll
;CHECK-LABEL: @example21(
;CHECK: load <4 x i32>
;CHECK: shufflevector {{.}} <i32 3, i32 2, i32 1, i32 0>
;CHECK: ret i32
define i32 @example21(i32 nocapture %b, i32 %n) nounwind uwtable readonly ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
br label %3
; :3 ; preds = %.lr.ph, %3
%indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
%a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %a.02
%7 = trunc i64 %indvars.iv.next to i32
%8 = icmp sgt i32 %7, 0
br i1 %8, label %3, label %._crit_edge
._crit_edge: ; preds = %3, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ]
ret i32 %a.0.lcssa
}
Below is the vectorized function after calling opt with the arguments listed in the test case (-basic-aa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine).
; Function Attrs: nounwind readonly ssp uwtable
define i32 @example21(i32* nocapture %b, i32 %n) #0 {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
%3 = add i32 %n, -1
%4 = zext i32 %3 to i64
%5 = add nuw nsw i64 %4, 1
%min.iters.check = icmp ult i32 %3, 3
br i1 %min.iters.check, label %scalar.ph, label %vector.ph
vector.ph: ; preds = %.lr.ph
%n.vec = and i64 %5, 8589934588
%ind.end = sub nsw i64 %2, %n.vec
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body ]
%6 = xor i64 %index, -1
%7 = add i64 %6, %2
%8 = getelementptr inbounds i32, i32* %b, i64 -3
%9 = getelementptr inbounds i32, i32* %8, i64 %7
%10 = bitcast i32* %9 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %10, align 4
%reverse = shufflevector <4 x i32> %wide.load, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%11 = add <4 x i32> %reverse, %vec.phi
%index.next = add i64 %index, 4
%12 = icmp eq i64 %index.next, %n.vec
br i1 %12, label %middle.block, label %vector.body, !llvm.loop !0
middle.block: ; preds = %vector.body
%rdx.shuf = shufflevector <4 x i32> %11, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = add <4 x i32> %11, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%bin.rdx2 = add <4 x i32> %bin.rdx, %rdx.shuf1
%13 = extractelement <4 x i32> %bin.rdx2, i32 0
%cmp.n = icmp eq i64 %5, %n.vec
br i1 %cmp.n, label %._crit_edge.loopexit, label %scalar.ph
scalar.ph: ; preds = %middle.block, %.lr.ph
%bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ %2, %.lr.ph ]
%bc.merge.rdx = phi i32 [ %13, %middle.block ], [ 0, %.lr.ph ]
br label %14
14: ; preds = %14, %scalar.ph
%indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %14 ]
%a.02 = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %17, %14 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%15 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%16 = load i32, i32* %15, align 4
%17 = add nsw i32 %16, %a.02
%18 = trunc i64 %indvars.iv.next to i32
%19 = icmp sgt i32 %18, 0
br i1 %19, label %14, label %._crit_edge.loopexit, !llvm.loop !2
._crit_edge.loopexit: ; preds = %middle.block, %14
%.lcssa = phi i32 [ %17, %14 ], [ %13, %middle.block ]
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %.lcssa, %._crit_edge.loopexit ]
ret i32 %a.0.lcssa
}
attributes #0 = { nounwind readonly ssp uwtable }
!0 = distinct !{#0, !1}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = distinct !{#2, !3, !1}
!3 = !{!"llvm.loop.unroll.runtime.disable"}
Here %8 is a generated gep inbound with negative offset. The source in this transformation is more defined than target.
Alive2: https://alive2.llvm.org/ce/z/g2VueK
The text was updated successfully, but these errors were encountered: