The problem I am observing is in the following snippet, where with optimisation level 2 switch on, the code inside the if block executes even if maxLength is 0: GuiTextField::GuiTextField(int maxLength) : base(), valid(true) { asm volatile(".marker1":::"memory"); if (maxLength > 0x1234) // testing; was > 0) { asm volatile(".marker2":::"memory"); printf("hmm %i %i\n", maxLength, maxLength > 0); this->SetMaxLength(maxLength); } asm volatile(".marker3":::"memory"); } Here is example output from code compiled at -O2: hmm 0 1 I am afraid that I have been unable to reduce a test-case out of this code (having spent maybe 6 hours on it today!), but hopefully the outputs below can help highlight the problem. Here is the assembly between marker1 and marker3 when compiling with -O0 for PowerPC (targetting 603e): #APP .marker1 #NO_APP lwz 3, 68(31) cmpwi 0, 3, 4661 blt 0, .LBB0_9 b .LBB0_2 .LBB0_2: #APP .marker2 #NO_APP lwz 3, 68(31) lis 4, .L.str@ha .Ltmp3: li 5, 0 li 6, 1 cmpwi 0, 3, 0 la 4, .L.str@l(4) stw 3, 32(31) stw 6, 28(31) stw 5, 24(31) stw 4, 20(31) bgt 0, .LBB0_4 lwz 3, 24(31) stw 3, 28(31) .LBB0_4: lwz 3, 28(31) lwz 4, 20(31) stw 3, 16(31) mr 3, 4 lwz 4, 32(31) lwz 5, 16(31) crxor 6, 6, 6 bl printf .Ltmp4: stw 3, 12(31) b .LBB0_5 .LBB0_5: lwz 4, 68(31) .Ltmp5: lwz 3, 48(31) bl _ZN17GuiEntryFieldBase12SetMaxLengthEi .Ltmp6: b .LBB0_6 .LBB0_6: b .LBB0_9 .LBB0_7: .Ltmp2: stw 3, 64(31) stw 4, 60(31) b .LBB0_10 .LBB0_8: .Ltmp7: stw 3, 64(31) stw 4, 60(31) lwz 3, 44(31) bl _ZN7QStringD2Ev b .LBB0_10 .LBB0_9: #APP .marker3 #NO_APP And here is the same but at -O1 (note that the cmpwi has moved relative to marker1 even though the marker1 declaration should prevent this, but the code still runs correctly in this case): cmpwi 0, 29, 4661 stb 3, 56(30) #APP .marker1 #NO_APP blt 0, .LBB0_2 lis 3, .L.str@ha mr 4, 29 li 5, 1 crxor 6, 6, 6 #APP .marker2 #NO_APP la 3, .L.str@l(3) bl printf .Ltmp0: mr 3, 30 mr 4, 29 bl _ZN17GuiEntryFieldBase12SetMaxLengthEi .Ltmp1: .LBB0_2: lwz 30, 24(31) lwz 29, 20(31) lwz 28, 16(31) #APP .marker3 #NO_APP And then at -O2 (by which point the cmpwi is miles away from its corresponding blt and the code inside the if block executes even though the condition is not met -- I expect corrupted by the lwarx/stwcx that is part of Qt's atomics operations): cmpwi 0, 29, 4661 la 3, _ZTV17GuiTextInputField@l(3) la 12, _ZN7QString11shared_nullE@l(5) addi 4, 3, 8 addi 3, 3, 368 stw 4, 0(30) stw 3, 8(30) stw 12, 52(30) li 3, 1 #APP lwarx 5,0, 12 addi 6, 5, 1 stwcx. 6,0, 12 bne- $-12 #NO_APP stb 3, 56(30) #APP .marker1 #NO_APP blt 0, .LBB0_4 lis 3, .L.str@ha mr 4, 29 li 5, 1 crxor 6, 6, 6 #APP .marker2 #NO_APP la 3, .L.str@l(3) bl printf lwz 3, 40(30) lwz 4, 0(3) lwz 4, 12(4) .Ltmp0: mtctr 4 bctrl mr 28, 3 .Ltmp1: lwz 5, 28(28) lis 3, .L.str1@ha mr 4, 28 mr 6, 29 crxor 6, 6, 6 la 3, .L.str1@l(3) bl printf stw 29, 28(28) lwz 3, 40(30) lwz 4, 0(3) lwz 4, 12(4) .Ltmp2: mtctr 4 bctrl mr 4, 3 .Ltmp3: lwz 5, 28(4) lis 3, .L.str2@ha crxor 6, 6, 6 la 3, .L.str2@l(3) bl printf .LBB0_4: lwz 30, 24(31) lwz 29, 20(31) lwz 28, 16(31) #APP .marker3 #NO_APP Compare the code from gcc 4.6 for ppc603e at -O2: #APP .marker1 #NO_APP cmpwi 7,30,4660 bgt- 7,.L7 #APP .marker3 #NO_APP lwz 0,36(1) lwz 29,20(1) mtlr 0 lwz 30,24(1) lwz 31,28(1) addi 1,1,32 .cfi_remember_state .cfi_def_cfa_offset 0 .cfi_restore 31 .cfi_restore 30 .cfi_restore 29 blr .L7: .cfi_restore_state #APP .marker2 #NO_APP lis 4,.LC0@ha li 3,1 la 4,.LC0@l(4) mr 5,30 li 6,1 .LEHB1: crxor 6,6,6 bl __printf_chk ... And here is the same code for x86_64 compiled by clang (note that the code works correctly here): #APP .marker1 #NO_APP cmpl $4661, %ebp # imm = 0x1235 jl .LBB0_4 # BB#1: # %invoke.cont5 #APP .marker2 #NO_APP movl $.L.str, %edi movl $1, %edx xorl %eax, %eax movl %ebp, %esi callq printf movq 80(%r14), %rdi movq (%rdi), %rax movq 24(%rax), %rax .Ltmp0: callq *%rax movq %rax, %rbx .Ltmp1: # BB#2: # %call.i.i.noexc movl 52(%rbx), %edx movl $.L.str1, %edi xorl %eax, %eax movq %rbx, %rsi movl %ebp, %ecx callq printf movl %ebp, 52(%rbx) movq 80(%r14), %rdi movq (%rdi), %rax movq 24(%rax), %rax .Ltmp2: callq *%rax movq %rax, %rcx .Ltmp3: # BB#3: # %_ZN17GuiEntryFieldBase12SetMaxLengthEi.exit movl 52(%rcx), %edx movl $.L.str2, %edi xorl %eax, %eax movq %rcx, %rsi callq printf .LBB0_4: # %if.end #APP .marker3 #NO_APP In case it is useful, here is the code for the Qt atomic function called in this example: inline bool QBasicAtomicInt::ref() { register int originalValue; register int newValue; asm volatile("lwarx %[originalValue]," _Q_VALUE "\n" "addi %[newValue], %[originalValue], %[one]\n" "stwcx. %[newValue]," _Q_VALUE "\n" "bne- $-12\n" : [originalValue] "=&b" (originalValue), [newValue] "=&r" (newValue), _Q_VALUE_MEMORY_OPERAND : _Q_VALUE_REGISTER_OPERAND [one] "i" (1) : "cc", "memory"); return newValue != 0; } If I were to hazard a guess, I would say that the PowerPC back-end is not taking due care of the asm constraints.
Sorry, I noticed after I submitted the bug that I'd somehow mistakenly marked it "release blocker", but then I thought that maybe this was reasonable after all! Please drop its severity if you don't agree! :o)
Thanks for the report. It looks like LLVM does not currently understand the "cc" register clobber (which I suppose is intended to mean that it clobbers all condition registers). I'll fix this. At the same time, you should submit a bug report against the upstream sources. "cc" is *very* pessimistic. "cr0" would be fine, and would still let the compiler use the other 7 cr registers.
I tried changing the "cc" to "cr0", and it seems to fix the issue. The output is now... #APP lwarx 5,0, 12 addi 6, 5, 1 stwcx. 6,0, 12 bne- $-12 #NO_APP stb 3, 56(30) cmpwi 0, 29, 4661 #APP .marker1 #NO_APP blt 0, .LBB0_4 I will search and replace all instances of this and try the complete application to see if that solves all the issues. Many, many thanks!
Fixed in r205630. Thanks for the report and working on the reduction!