Index: cfe/trunk/lib/CodeGen/CGDecl.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGDecl.cpp +++ cfe/trunk/lib/CodeGen/CGDecl.cpp @@ -948,6 +948,113 @@ canEmitInitWithFewStoresAfterBZero(Init, StoreBudget); } +/// A byte pattern. +/// +/// Can be "any" pattern if the value was padding or known to be undef. +/// Can be "none" pattern if a sequence doesn't exist. +class BytePattern { + uint8_t Val; + enum class ValueType : uint8_t { Specific, Any, None } Type; + BytePattern(ValueType Type) : Type(Type) {} + +public: + BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {} + static BytePattern Any() { return BytePattern(ValueType::Any); } + static BytePattern None() { return BytePattern(ValueType::None); } + bool isAny() const { return Type == ValueType::Any; } + bool isNone() const { return Type == ValueType::None; } + bool isValued() const { return Type == ValueType::Specific; } + uint8_t getValue() const { + assert(isValued()); + return Val; + } + BytePattern merge(const BytePattern Other) const { + if (isNone() || Other.isNone()) + return None(); + if (isAny()) + return Other; + if (Other.isAny()) + return *this; + if (getValue() == Other.getValue()) + return *this; + return None(); + } +}; + +/// Figures out whether the constant can be initialized with memset. +static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) { + if (isa(C) || isa(C)) + return BytePattern(0x00); + if (isa(C)) + return BytePattern::Any(); + + if (isa(C)) { + auto *Int = cast(C); + if (Int->getBitWidth() % 8 != 0) + return BytePattern::None(); + const llvm::APInt &Value = Int->getValue(); + if (Value.isSplat(8)) + return BytePattern(Value.getLoBits(8).getLimitedValue()); + return BytePattern::None(); + } + + if (isa(C)) { + auto *FP = cast(C); + llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt(); + if (Bits.getBitWidth() % 8 != 0) + return BytePattern::None(); + if (!Bits.isSplat(8)) + return BytePattern::None(); + return BytePattern(Bits.getLimitedValue() & 0xFF); + } + + if (isa(C)) { + llvm::Constant *Splat = cast(C)->getSplatValue(); + if (Splat) + return constantIsRepeatedBytePattern(Splat); + return BytePattern::None(); + } + + if (isa(C) || isa(C)) { + BytePattern Pattern(BytePattern::Any()); + for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) { + llvm::Constant *Elt = cast(C->getOperand(I)); + Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt)); + if (Pattern.isNone()) + return Pattern; + } + return Pattern; + } + + if (llvm::ConstantDataSequential *CDS = + dyn_cast(C)) { + BytePattern Pattern(BytePattern::Any()); + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + llvm::Constant *Elt = CDS->getElementAsConstant(I); + Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt)); + if (Pattern.isNone()) + return Pattern; + } + return Pattern; + } + + // BlockAddress, ConstantExpr, and everything else is scary. + return BytePattern::None(); +} + +/// Decide whether we should use memset to initialize a local variable instead +/// of using a memcpy from a constant global. Assumes we've already decided to +/// not user bzero. +/// FIXME We could be more clever, as we are for bzero above, and generate +/// memset followed by stores. It's unclear that's worth the effort. +static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init, + uint64_t GlobalSize) { + uint64_t SizeLimit = 32; + if (GlobalSize <= SizeLimit) + return BytePattern::None(); + return constantIsRepeatedBytePattern(Init); +} + /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a /// variable declaration with auto, register, or no storage class specifier. /// These turn into simple stack objects, or GlobalValues depending on target. @@ -1401,11 +1508,11 @@ if (Loc.getType() != BP) Loc = Builder.CreateBitCast(Loc, BP); - // If the initializer is all or mostly zeros, codegen with bzero then do a - // few stores afterward. - if (shouldUseBZeroPlusStoresToInitialize( - constant, - CGM.getDataLayout().getTypeAllocSize(constant->getType()))) { + // If the initializer is all or mostly the same, codegen with bzero / memset + // then do a few stores afterward. + uint64_t ConstantSize = + CGM.getDataLayout().getTypeAllocSize(constant->getType()); + if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, isVolatile); // Zero and undef don't require a stores. @@ -1414,28 +1521,36 @@ constant->getType()->getPointerTo(Loc.getAddressSpace())); emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder); } - } else { - // Otherwise, create a temporary global with the initializer then - // memcpy from the global to the alloca. - std::string Name = getStaticDeclName(CGM, D); - unsigned AS = CGM.getContext().getTargetAddressSpace( - CGM.getStringLiteralAddressSpace()); - BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); - - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true, - llvm::GlobalValue::PrivateLinkage, - constant, Name, nullptr, - llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Loc.getAlignment().getQuantity()); - GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - Address SrcPtr = Address(GV, Loc.getAlignment()); - if (SrcPtr.getType() != BP) - SrcPtr = Builder.CreateBitCast(SrcPtr, BP); + return; + } - Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile); + BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); + if (!Pattern.isNone()) { + uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue(); + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal, + isVolatile); + return; } + + // Otherwise, create a temporary global with the initializer then + // memcpy from the global to the alloca. + std::string Name = getStaticDeclName(CGM, D); + unsigned AS = CGM.getContext().getTargetAddressSpace( + CGM.getStringLiteralAddressSpace()); + BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); + + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + CGM.getModule(), constant->getType(), true, + llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr, + llvm::GlobalValue::NotThreadLocal, AS); + GV->setAlignment(Loc.getAlignment().getQuantity()); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + Address SrcPtr = Address(GV, Loc.getAlignment()); + if (SrcPtr.getType() != BP) + SrcPtr = Builder.CreateBitCast(SrcPtr, BP); + + Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile); } /// Emit an expression as an initializer for an object (variable, field, etc.) Index: cfe/trunk/test/CodeGen/init.c =================================================================== --- cfe/trunk/test/CodeGen/init.c +++ cfe/trunk/test/CodeGen/init.c @@ -140,6 +140,72 @@ // CHECK: call void @bar } +void nonzeroMemseti8() { + char arr[33] = { 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, }; + // CHECK-LABEL: @nonzeroMemseti8( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 42, i32 33, i1 false) +} + +void nonzeroMemseti16() { + unsigned short arr[17] = { 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, }; + // CHECK-LABEL: @nonzeroMemseti16( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 66, i32 34, i1 false) +} + +void nonzeroMemseti32() { + unsigned arr[9] = { 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, }; + // CHECK-LABEL: @nonzeroMemseti32( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -16, i32 36, i1 false) +} + +void nonzeroMemseti64() { + unsigned long long arr[7] = { 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, }; + // CHECK-LABEL: @nonzeroMemseti64( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -86, i32 56, i1 false) +} + +void nonzeroMemsetf32() { + float arr[9] = { 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, }; + // CHECK-LABEL: @nonzeroMemsetf32( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 101, i32 36, i1 false) +} + +void nonzeroMemsetf64() { + double arr[7] = { 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, }; + // CHECK-LABEL: @nonzeroMemsetf64( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 68, i32 56, i1 false) +} + +void nonzeroPaddedUnionMemset() { + union U { char c; int i; }; + union U arr[9] = { 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, }; + // CHECK-LABEL: @nonzeroPaddedUnionMemset( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -16, i32 36, i1 false) +} + +void nonzeroNestedMemset() { + union U { char c; int i; }; + struct S { union U u; short i; }; + struct S arr[5] = { { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, }; + // CHECK-LABEL: @nonzeroNestedMemset( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -16, i32 40, i1 false) +} // PR9257 struct test11S {