LLVM Bugzilla is read-only and represents the historical archive of all LLVM issues filled before November 26, 2021. Use github to submit LLVM bugs

Bug 48123 - Redundant branches with ctlz and cttz
Summary: Redundant branches with ctlz and cttz
Status: NEW
Alias: None
Product: libraries
Classification: Unclassified
Component: Backend: X86 (show other bugs)
Version: trunk
Hardware: PC Windows NT
: P enhancement
Assignee: Unassigned LLVM Bugs
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2020-11-09 15:03 PST by Diggory Blake
Modified: 2020-11-11 11:35 PST (History)
5 users (show)

See Also:
Fixed By Commit(s):


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Diggory Blake 2020-11-09 15:03:34 PST
Rust code:

```rust
pub fn can_represent_as_f64(x: u64) -> bool {
    x.leading_zeros() + x.trailing_zeros() >= 11
}
```

LLVM IR:
```llvm
define zeroext i1 @_ZN10playground20can_represent_as_f6417h8c9d47bab619cb5fE(i64 %x) unnamed_addr #0 {
start:
  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) #2, !range !2
  %1 = trunc i64 %0 to i32
  %2 = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) #2, !range !2
  %3 = trunc i64 %2 to i32
  %_2 = add nuw nsw i32 %1, %3
  %4 = icmp ugt i32 %_2, 10
  ret i1 %4
}
```

Assembly:
```asm
playground::can_represent_as_f64:
	mov	eax, 64
	mov	ecx, 64
	test	rdi, rdi    ; Initial test for zero and branch
	je	.LBB0_2
	bsr	rcx, rdi
	xor	rcx, 63

.LBB0_2:
	test	rdi, rdi    ; Second test for zero and branch
	je	.LBB0_4
	bsf	rax, rdi

.LBB0_4:
	add	ecx, eax
	cmp	ecx, 10
	seta	al
	ret
```

Instead of performing the comparison twice, the code should immediately branch to LBB0_4.
Comment 1 Sanjay Patel 2020-11-11 11:33:43 PST
We expand the intrinsics in -codegenprepare, and I'm not sure where we would solve this. machine-cse seems like the most likely candidate, but it would require tracking eflags state across basic blocks. Not sure if we do that:
    TEST64rr %5, %5, implicit-def $eflags
    JCC_1 %bb.2, 4, implicit $eflags

IR going into SDAG:

  define zeroext i1 @_ZN10playground20can_represent_as_f6417h8c9d47bab619cb5fE(i64 %x) unnamed_addr {
  start:
    %cmpz = icmp eq i64 %x, 0
    br i1 %cmpz, label %cond.end, label %cond.false
  
  cond.false:                                       ; preds = %start
    %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
    br label %cond.end
  
  cond.end:                                         ; preds = %start, %cond.false
    %ctz = phi i64 [ 64, %start ], [ %0, %cond.false ]
    %1 = trunc i64 %ctz to i32
    %cmpz3 = icmp eq i64 %x, 0
    br i1 %cmpz3, label %cond.end2, label %cond.false1
  
  cond.false1:                                      ; preds = %cond.end
    %2 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
    br label %cond.end2
  
  cond.end2:                                        ; preds = %cond.end, %cond.false1
    %ctz4 = phi i64 [ 64, %cond.end ], [ %2, %cond.false1 ]
    %3 = trunc i64 %ctz4 to i32
    %_2 = add nuw nsw i32 %1, %3
    %4 = icmp ugt i32 %_2, 10
    ret i1 %4
  }
Comment 2 Sanjay Patel 2020-11-11 11:35:57 PST
Note that this should not be an issue when compiling for more recent x86:

	lzcntq	%rdi, %rax
	tzcntq	%rdi, %rcx
	addl	%eax, %ecx
	cmpl	$10, %ecx
	seta	%al
	retq