/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Bug Summary

File:	lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Warning:	line 1672, column 7 Value stored to 'MadeChange' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name InstCombineSimplifyDemanded.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn329677/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Transforms/InstCombine -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-04-11-031539-24776-1 -x c++ /build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

1	//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2	//
3	// The LLVM Compiler Infrastructure
4	//
5	// This file is distributed under the University of Illinois Open Source
6	// License. See LICENSE.TXT for details.
7	//
8	//===----------------------------------------------------------------------===//
9	//
10	// This file contains logic for simplifying instructions based on information
11	// about how they are used.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "InstCombineInternal.h"
16	#include "llvm/Analysis/ValueTracking.h"
17	#include "llvm/IR/IntrinsicInst.h"
18	#include "llvm/IR/PatternMatch.h"
19	#include "llvm/Support/KnownBits.h"
20
21	using namespace llvm;
22	using namespace llvm::PatternMatch;
23
24	#define DEBUG_TYPE"instcombine" "instcombine"
25
26	/// Check to see if the specified operand of the specified instruction is a
27	/// constant integer. If so, check to see if there are any bits set in the
28	/// constant that are not demanded. If so, shrink the constant and return true.
29	static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
30	const APInt &Demanded) {
31	assert(I && "No instruction?")(static_cast <bool> (I && "No instruction?") ? void (0) : __assert_fail ("I && \"No instruction?\"", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 31, __extension__ __PRETTY_FUNCTION__));
32	assert(OpNo < I->getNumOperands() && "Operand index too large")(static_cast <bool> (OpNo < I->getNumOperands() && "Operand index too large") ? void (0) : __assert_fail ("OpNo < I->getNumOperands() && \"Operand index too large\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 32, __extension__ __PRETTY_FUNCTION__));
33
34	// The operand must be a constant integer or splat integer.
35	Value *Op = I->getOperand(OpNo);
36	const APInt *C;
37	if (!match(Op, m_APInt(C)))
38	return false;
39
40	// If there are no bits set that aren't demanded, nothing to do.
41	if (C->isSubsetOf(Demanded))
42	return false;
43
44	// This instruction is producing bits that are not demanded. Shrink the RHS.
45	I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
46
47	return true;
48	}
49
50
51
52	/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
53	/// the instruction has any properties that allow us to simplify its operands.
54	bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
55	unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
56	KnownBits Known(BitWidth);
57	APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
58
59	Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
60	0, &Inst);
61	if (!V) return false;
62	if (V == &Inst) return true;
63	replaceInstUsesWith(Inst, V);
64	return true;
65	}
66
67	/// This form of SimplifyDemandedBits simplifies the specified instruction
68	/// operand if possible, updating it in place. It returns true if it made any
69	/// change and false otherwise.
70	bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
71	const APInt &DemandedMask,
72	KnownBits &Known,
73	unsigned Depth) {
74	Use &U = I->getOperandUse(OpNo);
75	Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
76	Depth, I);
77	if (!NewVal) return false;
78	U = NewVal;
79	return true;
80	}
81
82
83	/// This function attempts to replace V with a simpler value based on the
84	/// demanded bits. When this function is called, it is known that only the bits
85	/// set in DemandedMask of the result of V are ever used downstream.
86	/// Consequently, depending on the mask and V, it may be possible to replace V
87	/// with a constant or one of its operands. In such cases, this function does
88	/// the replacement and returns true. In all other cases, it returns false after
89	/// analyzing the expression and setting KnownOne and known to be one in the
90	/// expression. Known.Zero contains all the bits that are known to be zero in
91	/// the expression. These are provided to potentially allow the caller (which
92	/// might recursively be SimplifyDemandedBits itself) to simplify the
93	/// expression.
94	/// Known.One and Known.Zero always follow the invariant that:
95	/// Known.One & Known.Zero == 0.
96	/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
97	/// Known.Zero may only be accurate for those bits set in DemandedMask. Note
98	/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
99	/// be the same.
100	///
101	/// This returns null if it did not change anything and it permits no
102	/// simplification. This returns V itself if it did some simplification of V's
103	/// operands based on the information about what bits are demanded. This returns
104	/// some other non-null value if it found out that V is equal to another value
105	/// in the context where the specified bits are demanded, but not for all users.
106	Value InstCombiner::SimplifyDemandedUseBits(Value V, APInt DemandedMask,
107	KnownBits &Known, unsigned Depth,
108	Instruction *CxtI) {
109	assert(V != nullptr && "Null pointer of Value???")(static_cast <bool> (V != nullptr && "Null pointer of Value???" ) ? void (0) : __assert_fail ("V != nullptr && \"Null pointer of Value???\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 109, __extension__ __PRETTY_FUNCTION__));
110	assert(Depth <= 6 && "Limit Search Depth")(static_cast <bool> (Depth <= 6 && "Limit Search Depth" ) ? void (0) : __assert_fail ("Depth <= 6 && \"Limit Search Depth\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 110, __extension__ __PRETTY_FUNCTION__));
111	uint32_t BitWidth = DemandedMask.getBitWidth();
112	Type *VTy = V->getType();
113	assert((static_cast <bool> ((!VTy->isIntOrIntVectorTy() \|\| VTy ->getScalarSizeInBits() == BitWidth) && Known.getBitWidth () == BitWidth && "Value V, DemandedMask and Known must have same BitWidth" ) ? void (0) : __assert_fail ("(!VTy->isIntOrIntVectorTy() \|\| VTy->getScalarSizeInBits() == BitWidth) && Known.getBitWidth() == BitWidth && \"Value V, DemandedMask and Known must have same BitWidth\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 116, __extension__ __PRETTY_FUNCTION__))
114	(!VTy->isIntOrIntVectorTy() \|\| VTy->getScalarSizeInBits() == BitWidth) &&(static_cast <bool> ((!VTy->isIntOrIntVectorTy() \|\| VTy ->getScalarSizeInBits() == BitWidth) && Known.getBitWidth () == BitWidth && "Value V, DemandedMask and Known must have same BitWidth" ) ? void (0) : __assert_fail ("(!VTy->isIntOrIntVectorTy() \|\| VTy->getScalarSizeInBits() == BitWidth) && Known.getBitWidth() == BitWidth && \"Value V, DemandedMask and Known must have same BitWidth\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 116, __extension__ __PRETTY_FUNCTION__))
115	Known.getBitWidth() == BitWidth &&(static_cast <bool> ((!VTy->isIntOrIntVectorTy() \|\| VTy ->getScalarSizeInBits() == BitWidth) && Known.getBitWidth () == BitWidth && "Value V, DemandedMask and Known must have same BitWidth" ) ? void (0) : __assert_fail ("(!VTy->isIntOrIntVectorTy() \|\| VTy->getScalarSizeInBits() == BitWidth) && Known.getBitWidth() == BitWidth && \"Value V, DemandedMask and Known must have same BitWidth\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 116, __extension__ __PRETTY_FUNCTION__))
116	"Value V, DemandedMask and Known must have same BitWidth")(static_cast <bool> ((!VTy->isIntOrIntVectorTy() \|\| VTy ->getScalarSizeInBits() == BitWidth) && Known.getBitWidth () == BitWidth && "Value V, DemandedMask and Known must have same BitWidth" ) ? void (0) : __assert_fail ("(!VTy->isIntOrIntVectorTy() \|\| VTy->getScalarSizeInBits() == BitWidth) && Known.getBitWidth() == BitWidth && \"Value *V, DemandedMask and Known must have same BitWidth\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 116, __extension__ __PRETTY_FUNCTION__));
117
118	if (isa<Constant>(V)) {
119	computeKnownBits(V, Known, Depth, CxtI);
120	return nullptr;
121	}
122
123	Known.resetAll();
124	if (DemandedMask.isNullValue()) // Not demanding any bits from V.
125	return UndefValue::get(VTy);
126
127	if (Depth == 6) // Limit search depth.
128	return nullptr;
129
130	Instruction *I = dyn_cast<Instruction>(V);
131	if (!I) {
132	computeKnownBits(V, Known, Depth, CxtI);
133	return nullptr; // Only analyze instructions.
134	}
135
136	// If there are multiple uses of this value and we aren't at the root, then
137	// we can't do any simplifications of the operands, because DemandedMask
138	// only reflects the bits demanded by one of the users.
139	if (Depth != 0 && !I->hasOneUse())
140	return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
141
142	KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
143
144	// If this is the root being simplified, allow it to have multiple uses,
145	// just set the DemandedMask to all bits so that we can try to simplify the
146	// operands. This allows visitTruncInst (for example) to simplify the
147	// operand of a trunc without duplicating all the logic below.
148	if (Depth == 0 && !V->hasOneUse())
149	DemandedMask.setAllBits();
150
151	switch (I->getOpcode()) {
152	default:
153	computeKnownBits(I, Known, Depth, CxtI);
154	break;
155	case Instruction::And: {
156	// If either the LHS or the RHS are Zero, the result is zero.
157	if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) \|\|
158	SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
159	Depth + 1))
160	return I;
161	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!RHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!RHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 161, __extension__ __PRETTY_FUNCTION__));
162	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!LHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!LHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 162, __extension__ __PRETTY_FUNCTION__));
163
164	// Output known-0 are known to be clear if zero in either the LHS \| RHS.
165	APInt IKnownZero = RHSKnown.Zero \| LHSKnown.Zero;
166	// Output known-1 bits are only known if set in both the LHS & RHS.
167	APInt IKnownOne = RHSKnown.One & LHSKnown.One;
168
169	// If the client is only demanding bits that we know, return the known
170	// constant.
171	if (DemandedMask.isSubsetOf(IKnownZero\|IKnownOne))
172	return Constant::getIntegerValue(VTy, IKnownOne);
173
174	// If all of the demanded bits are known 1 on one side, return the other.
175	// These bits cannot contribute to the result of the 'and'.
176	if (DemandedMask.isSubsetOf(LHSKnown.Zero \| RHSKnown.One))
177	return I->getOperand(0);
178	if (DemandedMask.isSubsetOf(RHSKnown.Zero \| LHSKnown.One))
179	return I->getOperand(1);
180
181	// If the RHS is a constant, see if we can simplify it.
182	if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
183	return I;
184
185	Known.Zero = std::move(IKnownZero);
186	Known.One = std::move(IKnownOne);
187	break;
188	}
189	case Instruction::Or: {
190	// If either the LHS or the RHS are One, the result is One.
191	if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) \|\|
192	SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
193	Depth + 1))
194	return I;
195	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!RHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!RHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 195, __extension__ __PRETTY_FUNCTION__));
196	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!LHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!LHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 196, __extension__ __PRETTY_FUNCTION__));
197
198	// Output known-0 bits are only known if clear in both the LHS & RHS.
199	APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
200	// Output known-1 are known. to be set if s.et in either the LHS \| RHS.
201	APInt IKnownOne = RHSKnown.One \| LHSKnown.One;
202
203	// If the client is only demanding bits that we know, return the known
204	// constant.
205	if (DemandedMask.isSubsetOf(IKnownZero\|IKnownOne))
206	return Constant::getIntegerValue(VTy, IKnownOne);
207
208	// If all of the demanded bits are known zero on one side, return the other.
209	// These bits cannot contribute to the result of the 'or'.
210	if (DemandedMask.isSubsetOf(LHSKnown.One \| RHSKnown.Zero))
211	return I->getOperand(0);
212	if (DemandedMask.isSubsetOf(RHSKnown.One \| LHSKnown.Zero))
213	return I->getOperand(1);
214
215	// If the RHS is a constant, see if we can simplify it.
216	if (ShrinkDemandedConstant(I, 1, DemandedMask))
217	return I;
218
219	Known.Zero = std::move(IKnownZero);
220	Known.One = std::move(IKnownOne);
221	break;
222	}
223	case Instruction::Xor: {
224	if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) \|\|
225	SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
226	return I;
227	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!RHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!RHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 227, __extension__ __PRETTY_FUNCTION__));
228	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!LHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!LHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 228, __extension__ __PRETTY_FUNCTION__));
229
230	// Output known-0 bits are known if clear or set in both the LHS & RHS.
231	APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) \|
232	(RHSKnown.One & LHSKnown.One);
233	// Output known-1 are known to be set if set in only one of the LHS, RHS.
234	APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) \|
235	(RHSKnown.One & LHSKnown.Zero);
236
237	// If the client is only demanding bits that we know, return the known
238	// constant.
239	if (DemandedMask.isSubsetOf(IKnownZero\|IKnownOne))
240	return Constant::getIntegerValue(VTy, IKnownOne);
241
242	// If all of the demanded bits are known zero on one side, return the other.
243	// These bits cannot contribute to the result of the 'xor'.
244	if (DemandedMask.isSubsetOf(RHSKnown.Zero))
245	return I->getOperand(0);
246	if (DemandedMask.isSubsetOf(LHSKnown.Zero))
247	return I->getOperand(1);
248
249	// If all of the demanded bits are known to be zero on one side or the
250	// other, turn this into an inclusive or.
251	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
252	if (DemandedMask.isSubsetOf(RHSKnown.Zero \| LHSKnown.Zero)) {
253	Instruction *Or =
254	BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
255	I->getName());
256	return InsertNewInstWith(Or, *I);
257	}
258
259	// If all of the demanded bits on one side are known, and all of the set
260	// bits on that side are also known to be set on the other side, turn this
261	// into an AND, as we know the bits will be cleared.
262	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
263	if (DemandedMask.isSubsetOf(RHSKnown.Zero\|RHSKnown.One) &&
264	RHSKnown.One.isSubsetOf(LHSKnown.One)) {
265	Constant *AndC = Constant::getIntegerValue(VTy,
266	~RHSKnown.One & DemandedMask);
267	Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
268	return InsertNewInstWith(And, *I);
269	}
270
271	// If the RHS is a constant, see if we can simplify it.
272	// FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
273	if (ShrinkDemandedConstant(I, 1, DemandedMask))
274	return I;
275
276	// If our LHS is an 'and' and if it has one use, and if any of the bits we
277	// are flipping are known to be set, then the xor is just resetting those
278	// bits to zero. We can just knock out bits from the 'and' and the 'xor',
279	// simplifying both of them.
280	if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
281	if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
282	isa<ConstantInt>(I->getOperand(1)) &&
283	isa<ConstantInt>(LHSInst->getOperand(1)) &&
284	(LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
285	ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
286	ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
287	APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
288
289	Constant *AndC =
290	ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
291	Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
292	InsertNewInstWith(NewAnd, *I);
293
294	Constant *XorC =
295	ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
296	Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
297	return InsertNewInstWith(NewXor, *I);
298	}
299
300	// Output known-0 bits are known if clear or set in both the LHS & RHS.
301	Known.Zero = std::move(IKnownZero);
302	// Output known-1 are known to be set if set in only one of the LHS, RHS.
303	Known.One = std::move(IKnownOne);
304	break;
305	}
306	case Instruction::Select:
307	// If this is a select as part of a min/max pattern, don't simplify any
308	// further in case we break the structure.
309	Value LHS, RHS;
310	if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
311	return nullptr;
312
313	if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) \|\|
314	SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
315	return I;
316	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!RHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!RHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 316, __extension__ __PRETTY_FUNCTION__));
317	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!LHSKnown.hasConflict() && "Bits known to be one AND zero?") ? void (0) : __assert_fail ("!LHSKnown.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 317, __extension__ __PRETTY_FUNCTION__));
318
319	// If the operands are constants, see if we can simplify them.
320	if (ShrinkDemandedConstant(I, 1, DemandedMask) \|\|
321	ShrinkDemandedConstant(I, 2, DemandedMask))
322	return I;
323
324	// Only known if known in both the LHS and RHS.
325	Known.One = RHSKnown.One & LHSKnown.One;
326	Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
327	break;
328	case Instruction::ZExt:
329	case Instruction::Trunc: {
330	unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
331
332	APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
333	KnownBits InputKnown(SrcBitWidth);
334	if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
335	return I;
336	Known = InputKnown.zextOrTrunc(BitWidth);
337	// Any top bits are known to be zero.
338	if (BitWidth > SrcBitWidth)
339	Known.Zero.setBitsFrom(SrcBitWidth);
340	assert(!Known.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!Known.hasConflict() && "Bits known to be one AND zero?" ) ? void (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 340, __extension__ __PRETTY_FUNCTION__));
341	break;
342	}
343	case Instruction::BitCast:
344	if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
345	return nullptr; // vector->int or fp->int?
346
347	if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
348	if (VectorType *SrcVTy =
349	dyn_cast<VectorType>(I->getOperand(0)->getType())) {
350	if (DstVTy->getNumElements() != SrcVTy->getNumElements())
351	// Don't touch a bitcast between vectors of different element counts.
352	return nullptr;
353	} else
354	// Don't touch a scalar-to-vector bitcast.
355	return nullptr;
356	} else if (I->getOperand(0)->getType()->isVectorTy())
357	// Don't touch a vector-to-scalar bitcast.
358	return nullptr;
359
360	if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
361	return I;
362	assert(!Known.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!Known.hasConflict() && "Bits known to be one AND zero?" ) ? void (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 362, __extension__ __PRETTY_FUNCTION__));
363	break;
364	case Instruction::SExt: {
365	// Compute the bits in the result that are not present in the input.
366	unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
367
368	APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
369
370	// If any of the sign extended bits are demanded, we know that the sign
371	// bit is demanded.
372	if (DemandedMask.getActiveBits() > SrcBitWidth)
373	InputDemandedBits.setBit(SrcBitWidth-1);
374
375	KnownBits InputKnown(SrcBitWidth);
376	if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1))
377	return I;
378
379	// If the input sign bit is known zero, or if the NewBits are not demanded
380	// convert this into a zero extension.
381	if (InputKnown.isNonNegative() \|\|
382	DemandedMask.getActiveBits() <= SrcBitWidth) {
383	// Convert to ZExt cast.
384	CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
385	return InsertNewInstWith(NewCast, *I);
386	}
387
388	// If the sign bit of the input is known set or clear, then we know the
389	// top bits of the result.
390	Known = InputKnown.sext(BitWidth);
391	assert(!Known.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!Known.hasConflict() && "Bits known to be one AND zero?" ) ? void (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 391, __extension__ __PRETTY_FUNCTION__));
392	break;
393	}
394	case Instruction::Add:
395	case Instruction::Sub: {
396	/// If the high-bits of an ADD/SUB are not demanded, then we do not care
397	/// about the high bits of the operands.
398	unsigned NLZ = DemandedMask.countLeadingZeros();
399	// Right fill the mask of bits for this ADD/SUB to demand the most
400	// significant bit and all those below it.
401	APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
402	if (ShrinkDemandedConstant(I, 0, DemandedFromOps) \|\|
403	SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) \|\|
404	ShrinkDemandedConstant(I, 1, DemandedFromOps) \|\|
405	SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
406	if (NLZ > 0) {
407	// Disable the nsw and nuw flags here: We can no longer guarantee that
408	// we won't wrap after simplification. Removing the nsw/nuw flags is
409	// legal here because the top bit is not demanded.
410	BinaryOperator &BinOP = *cast<BinaryOperator>(I);
411	BinOP.setHasNoSignedWrap(false);
412	BinOP.setHasNoUnsignedWrap(false);
413	}
414	return I;
415	}
416
417	// If we are known to be adding/subtracting zeros to every bit below
418	// the highest demanded bit, we just return the other side.
419	if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
420	return I->getOperand(0);
421	// We can't do this with the LHS for subtraction, unless we are only
422	// demanding the LSB.
423	if ((I->getOpcode() == Instruction::Add \|\|
424	DemandedFromOps.isOneValue()) &&
425	DemandedFromOps.isSubsetOf(LHSKnown.Zero))
426	return I->getOperand(1);
427
428	// Otherwise just compute the known bits of the result.
429	bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
430	Known = KnownBits::computeForAddSub(I->getOpcode() == Instruction::Add,
431	NSW, LHSKnown, RHSKnown);
432	break;
433	}
434	case Instruction::Shl: {
435	const APInt *SA;
436	if (match(I->getOperand(1), m_APInt(SA))) {
437	const APInt *ShrAmt;
438	if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))
439	if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))
440	if (Value R = simplifyShrShlDemandedBits(Shr, ShrAmt, I, *SA,
441	DemandedMask, Known))
442	return R;
443
444	uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
445	APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
446
447	// If the shift is NUW/NSW, then it does demand the high bits.
448	ShlOperator *IOp = cast<ShlOperator>(I);
449	if (IOp->hasNoSignedWrap())
450	DemandedMaskIn.setHighBits(ShiftAmt+1);
451	else if (IOp->hasNoUnsignedWrap())
452	DemandedMaskIn.setHighBits(ShiftAmt);
453
454	if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
455	return I;
456	assert(!Known.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!Known.hasConflict() && "Bits known to be one AND zero?" ) ? void (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 456, __extension__ __PRETTY_FUNCTION__));
457	Known.Zero <<= ShiftAmt;
458	Known.One <<= ShiftAmt;
459	// low bits known zero.
460	if (ShiftAmt)
461	Known.Zero.setLowBits(ShiftAmt);
462	}
463	break;
464	}
465	case Instruction::LShr: {
466	const APInt *SA;
467	if (match(I->getOperand(1), m_APInt(SA))) {
468	uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
469
470	// Unsigned shift right.
471	APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
472
473	// If the shift is exact, then it does demand the low bits (and knows that
474	// they are zero).
475	if (cast<LShrOperator>(I)->isExact())
476	DemandedMaskIn.setLowBits(ShiftAmt);
477
478	if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
479	return I;
480	assert(!Known.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!Known.hasConflict() && "Bits known to be one AND zero?" ) ? void (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 480, __extension__ __PRETTY_FUNCTION__));
481	Known.Zero.lshrInPlace(ShiftAmt);
482	Known.One.lshrInPlace(ShiftAmt);
483	if (ShiftAmt)
484	Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
485	}
486	break;
487	}
488	case Instruction::AShr: {
489	// If this is an arithmetic shift right and only the low-bit is set, we can
490	// always convert this into a logical shr, even if the shift amount is
491	// variable. The low bit of the shift cannot be an input sign bit unless
492	// the shift amount is >= the size of the datatype, which is undefined.
493	if (DemandedMask.isOneValue()) {
494	// Perform the logical shift right.
495	Instruction *NewVal = BinaryOperator::CreateLShr(
496	I->getOperand(0), I->getOperand(1), I->getName());
497	return InsertNewInstWith(NewVal, *I);
498	}
499
500	// If the sign bit is the only bit demanded by this ashr, then there is no
501	// need to do it, the shift doesn't change the high bit.
502	if (DemandedMask.isSignMask())
503	return I->getOperand(0);
504
505	const APInt *SA;
506	if (match(I->getOperand(1), m_APInt(SA))) {
507	uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
508
509	// Signed shift right.
510	APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
511	// If any of the high bits are demanded, we should set the sign bit as
512	// demanded.
513	if (DemandedMask.countLeadingZeros() <= ShiftAmt)
514	DemandedMaskIn.setSignBit();
515
516	// If the shift is exact, then it does demand the low bits (and knows that
517	// they are zero).
518	if (cast<AShrOperator>(I)->isExact())
519	DemandedMaskIn.setLowBits(ShiftAmt);
520
521	if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
522	return I;
523
524	unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
525
526	assert(!Known.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!Known.hasConflict() && "Bits known to be one AND zero?" ) ? void (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 526, __extension__ __PRETTY_FUNCTION__));
527	// Compute the new bits that are at the top now plus sign bits.
528	APInt HighBits(APInt::getHighBitsSet(
529	BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth)));
530	Known.Zero.lshrInPlace(ShiftAmt);
531	Known.One.lshrInPlace(ShiftAmt);
532
533	// If the input sign bit is known to be zero, or if none of the top bits
534	// are demanded, turn this into an unsigned shift right.
535	assert(BitWidth > ShiftAmt && "Shift amount not saturated?")(static_cast <bool> (BitWidth > ShiftAmt && "Shift amount not saturated?" ) ? void (0) : __assert_fail ("BitWidth > ShiftAmt && \"Shift amount not saturated?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 535, __extension__ __PRETTY_FUNCTION__));
536	if (Known.Zero[BitWidth-ShiftAmt-1] \|\|
537	!DemandedMask.intersects(HighBits)) {
538	BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
539	I->getOperand(1));
540	LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
541	return InsertNewInstWith(LShr, *I);
542	} else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.
543	Known.One \|= HighBits;
544	}
545	}
546	break;
547	}
548	case Instruction::SRem:
549	if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
550	// X % -1 demands all the bits because we don't want to introduce
551	// INT_MIN % -1 (== undef) by accident.
552	if (Rem->isMinusOne())
553	break;
554	APInt RA = Rem->getValue().abs();
555	if (RA.isPowerOf2()) {
556	if (DemandedMask.ult(RA)) // srem won't affect demanded bits
557	return I->getOperand(0);
558
559	APInt LowBits = RA - 1;
560	APInt Mask2 = LowBits \| APInt::getSignMask(BitWidth);
561	if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1))
562	return I;
563
564	// The low bits of LHS are unchanged by the srem.
565	Known.Zero = LHSKnown.Zero & LowBits;
566	Known.One = LHSKnown.One & LowBits;
567
568	// If LHS is non-negative or has all low bits zero, then the upper bits
569	// are all zero.
570	if (LHSKnown.isNonNegative() \|\| LowBits.isSubsetOf(LHSKnown.Zero))
571	Known.Zero \|= ~LowBits;
572
573	// If LHS is negative and not all low bits are zero, then the upper bits
574	// are all one.
575	if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
576	Known.One \|= ~LowBits;
577
578	assert(!Known.hasConflict() && "Bits known to be one AND zero?")(static_cast <bool> (!Known.hasConflict() && "Bits known to be one AND zero?" ) ? void (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 578, __extension__ __PRETTY_FUNCTION__));
579	break;
580	}
581	}
582
583	// The sign bit is the LHS's sign bit, except when the result of the
584	// remainder is zero.
585	if (DemandedMask.isSignBitSet()) {
586	computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
587	// If it's known zero, our sign bit is also zero.
588	if (LHSKnown.isNonNegative())
589	Known.makeNonNegative();
590	}
591	break;
592	case Instruction::URem: {
593	KnownBits Known2(BitWidth);
594	APInt AllOnes = APInt::getAllOnesValue(BitWidth);
595	if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) \|\|
596	SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
597	return I;
598
599	unsigned Leaders = Known2.countMinLeadingZeros();
600	Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
601	break;
602	}
603	case Instruction::Call:
604	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
605	switch (II->getIntrinsicID()) {
606	default: break;
607	case Intrinsic::bswap: {
608	// If the only bits demanded come from one byte of the bswap result,
609	// just shift the input byte into position to eliminate the bswap.
610	unsigned NLZ = DemandedMask.countLeadingZeros();
611	unsigned NTZ = DemandedMask.countTrailingZeros();
612
613	// Round NTZ down to the next byte. If we have 11 trailing zeros, then
614	// we need all the bits down to bit 8. Likewise, round NLZ. If we
615	// have 14 leading zeros, round to 8.
616	NLZ &= ~7;
617	NTZ &= ~7;
618	// If we need exactly one byte, we can do this transformation.
619	if (BitWidth-NLZ-NTZ == 8) {
620	unsigned ResultBit = NTZ;
621	unsigned InputBit = BitWidth-NTZ-8;
622
623	// Replace this with either a left or right shift to get the byte into
624	// the right place.
625	Instruction *NewVal;
626	if (InputBit > ResultBit)
627	NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
628	ConstantInt::get(I->getType(), InputBit-ResultBit));
629	else
630	NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
631	ConstantInt::get(I->getType(), ResultBit-InputBit));
632	NewVal->takeName(I);
633	return InsertNewInstWith(NewVal, *I);
634	}
635
636	// TODO: Could compute known zero/one bits based on the input.
637	break;
638	}
639	case Intrinsic::x86_mmx_pmovmskb:
640	case Intrinsic::x86_sse_movmsk_ps:
641	case Intrinsic::x86_sse2_movmsk_pd:
642	case Intrinsic::x86_sse2_pmovmskb_128:
643	case Intrinsic::x86_avx_movmsk_ps_256:
644	case Intrinsic::x86_avx_movmsk_pd_256:
645	case Intrinsic::x86_avx2_pmovmskb: {
646	// MOVMSK copies the vector elements' sign bits to the low bits
647	// and zeros the high bits.
648	unsigned ArgWidth;
649	if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
650	ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
651	} else {
652	auto Arg = II->getArgOperand(0);
653	auto ArgType = cast<VectorType>(Arg->getType());
654	ArgWidth = ArgType->getNumElements();
655	}
656
657	// If we don't need any of low bits then return zero,
658	// we know that DemandedMask is non-zero already.
659	APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
660	if (DemandedElts.isNullValue())
661	return ConstantInt::getNullValue(VTy);
662
663	// We know that the upper bits are set to zero.
664	Known.Zero.setBitsFrom(ArgWidth);
665	return nullptr;
666	}
667	case Intrinsic::x86_sse42_crc32_64_64:
668	Known.Zero.setBitsFrom(32);
669	return nullptr;
670	}
671	}
672	computeKnownBits(V, Known, Depth, CxtI);
673	break;
674	}
675
676	// If the client is only demanding bits that we know, return the known
677	// constant.
678	if (DemandedMask.isSubsetOf(Known.Zero\|Known.One))
679	return Constant::getIntegerValue(VTy, Known.One);
680	return nullptr;
681	}
682
683	/// Helper routine of SimplifyDemandedUseBits. It computes Known
684	/// bits. It also tries to handle simplifications that can be done based on
685	/// DemandedMask, but without modifying the Instruction.
686	Value InstCombiner::SimplifyMultipleUseDemandedBits(Instruction I,
687	const APInt &DemandedMask,
688	KnownBits &Known,
689	unsigned Depth,
690	Instruction *CxtI) {
691	unsigned BitWidth = DemandedMask.getBitWidth();
692	Type *ITy = I->getType();
693
694	KnownBits LHSKnown(BitWidth);
695	KnownBits RHSKnown(BitWidth);
696
697	// Despite the fact that we can't simplify this instruction in all User's
698	// context, we can at least compute the known bits, and we can
699	// do simplifications that apply to just the one user if we know that
700	// this instruction has a simpler value in that context.
701	switch (I->getOpcode()) {
702	case Instruction::And: {
703	// If either the LHS or the RHS are Zero, the result is zero.
704	computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
705	computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
706	CxtI);
707
708	// Output known-0 are known to be clear if zero in either the LHS \| RHS.
709	APInt IKnownZero = RHSKnown.Zero \| LHSKnown.Zero;
710	// Output known-1 bits are only known if set in both the LHS & RHS.
711	APInt IKnownOne = RHSKnown.One & LHSKnown.One;
712
713	// If the client is only demanding bits that we know, return the known
714	// constant.
715	if (DemandedMask.isSubsetOf(IKnownZero\|IKnownOne))
716	return Constant::getIntegerValue(ITy, IKnownOne);
717
718	// If all of the demanded bits are known 1 on one side, return the other.
719	// These bits cannot contribute to the result of the 'and' in this
720	// context.
721	if (DemandedMask.isSubsetOf(LHSKnown.Zero \| RHSKnown.One))
722	return I->getOperand(0);
723	if (DemandedMask.isSubsetOf(RHSKnown.Zero \| LHSKnown.One))
724	return I->getOperand(1);
725
726	Known.Zero = std::move(IKnownZero);
727	Known.One = std::move(IKnownOne);
728	break;
729	}
730	case Instruction::Or: {
731	// We can simplify (X\|Y) -> X or Y in the user's context if we know that
732	// only bits from X or Y are demanded.
733
734	// If either the LHS or the RHS are One, the result is One.
735	computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
736	computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
737	CxtI);
738
739	// Output known-0 bits are only known if clear in both the LHS & RHS.
740	APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
741	// Output known-1 are known to be set if set in either the LHS \| RHS.
742	APInt IKnownOne = RHSKnown.One \| LHSKnown.One;
743
744	// If the client is only demanding bits that we know, return the known
745	// constant.
746	if (DemandedMask.isSubsetOf(IKnownZero\|IKnownOne))
747	return Constant::getIntegerValue(ITy, IKnownOne);
748
749	// If all of the demanded bits are known zero on one side, return the
750	// other. These bits cannot contribute to the result of the 'or' in this
751	// context.
752	if (DemandedMask.isSubsetOf(LHSKnown.One \| RHSKnown.Zero))
753	return I->getOperand(0);
754	if (DemandedMask.isSubsetOf(RHSKnown.One \| LHSKnown.Zero))
755	return I->getOperand(1);
756
757	Known.Zero = std::move(IKnownZero);
758	Known.One = std::move(IKnownOne);
759	break;
760	}
761	case Instruction::Xor: {
762	// We can simplify (X^Y) -> X or Y in the user's context if we know that
763	// only bits from X or Y are demanded.
764
765	computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
766	computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
767	CxtI);
768
769	// Output known-0 bits are known if clear or set in both the LHS & RHS.
770	APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) \|
771	(RHSKnown.One & LHSKnown.One);
772	// Output known-1 are known to be set if set in only one of the LHS, RHS.
773	APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) \|
774	(RHSKnown.One & LHSKnown.Zero);
775
776	// If the client is only demanding bits that we know, return the known
777	// constant.
778	if (DemandedMask.isSubsetOf(IKnownZero\|IKnownOne))
779	return Constant::getIntegerValue(ITy, IKnownOne);
780
781	// If all of the demanded bits are known zero on one side, return the
782	// other.
783	if (DemandedMask.isSubsetOf(RHSKnown.Zero))
784	return I->getOperand(0);
785	if (DemandedMask.isSubsetOf(LHSKnown.Zero))
786	return I->getOperand(1);
787
788	// Output known-0 bits are known if clear or set in both the LHS & RHS.
789	Known.Zero = std::move(IKnownZero);
790	// Output known-1 are known to be set if set in only one of the LHS, RHS.
791	Known.One = std::move(IKnownOne);
792	break;
793	}
794	default:
795	// Compute the Known bits to simplify things downstream.
796	computeKnownBits(I, Known, Depth, CxtI);
797
798	// If this user is only demanding bits that we know, return the known
799	// constant.
800	if (DemandedMask.isSubsetOf(Known.Zero\|Known.One))
801	return Constant::getIntegerValue(ITy, Known.One);
802
803	break;
804	}
805
806	return nullptr;
807	}
808
809
810	/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
811	/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
812	/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
813	/// of "C2-C1".
814	///
815	/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
816	/// ..., bn}, without considering the specific value X is holding.
817	/// This transformation is legal iff one of following conditions is hold:
818	/// 1) All the bit in S are 0, in this case E1 == E2.
819	/// 2) We don't care those bits in S, per the input DemandedMask.
820	/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
821	/// rest bits.
822	///
823	/// Currently we only test condition 2).
824	///
825	/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
826	/// not successful.
827	Value *
828	InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
829	Instruction *Shl, const APInt &ShlOp1,
830	const APInt &DemandedMask,
831	KnownBits &Known) {
832	if (!ShlOp1 \|\| !ShrOp1)
833	return nullptr; // No-op.
834
835	Value *VarX = Shr->getOperand(0);
836	Type *Ty = VarX->getType();
837	unsigned BitWidth = Ty->getScalarSizeInBits();
838	if (ShlOp1.uge(BitWidth) \|\| ShrOp1.uge(BitWidth))
839	return nullptr; // Undef.
840
841	unsigned ShlAmt = ShlOp1.getZExtValue();
842	unsigned ShrAmt = ShrOp1.getZExtValue();
843
844	Known.One.clearAllBits();
845	Known.Zero.setLowBits(ShlAmt - 1);
846	Known.Zero &= DemandedMask;
847
848	APInt BitMask1(APInt::getAllOnesValue(BitWidth));
849	APInt BitMask2(APInt::getAllOnesValue(BitWidth));
850
851	bool isLshr = (Shr->getOpcode() == Instruction::LShr);
852	BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
853	(BitMask1.ashr(ShrAmt) << ShlAmt);
854
855	if (ShrAmt <= ShlAmt) {
856	BitMask2 <<= (ShlAmt - ShrAmt);
857	} else {
858	BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
859	BitMask2.ashr(ShrAmt - ShlAmt);
860	}
861
862	// Check if condition-2 (see the comment to this function) is satified.
863	if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
864	if (ShrAmt == ShlAmt)
865	return VarX;
866
867	if (!Shr->hasOneUse())
868	return nullptr;
869
870	BinaryOperator *New;
871	if (ShrAmt < ShlAmt) {
872	Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
873	New = BinaryOperator::CreateShl(VarX, Amt);
874	BinaryOperator *Orig = cast<BinaryOperator>(Shl);
875	New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
876	New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
877	} else {
878	Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
879	New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
880	BinaryOperator::CreateAShr(VarX, Amt);
881	if (cast<BinaryOperator>(Shr)->isExact())
882	New->setIsExact(true);
883	}
884
885	return InsertNewInstWith(New, *Shl);
886	}
887
888	return nullptr;
889	}
890
891	/// The specified value produces a vector with any number of elements.
892	/// DemandedElts contains the set of elements that are actually used by the
893	/// caller. This method analyzes which elements of the operand are undef and
894	/// returns that information in UndefElts.
895	///
896	/// If the information about demanded elements can be used to simplify the
897	/// operation, the operation is simplified, then the resultant value is
898	/// returned. This returns null if no change was made.
899	Value InstCombiner::SimplifyDemandedVectorElts(Value V, APInt DemandedElts,
900	APInt &UndefElts,
901	unsigned Depth) {
902	unsigned VWidth = V->getType()->getVectorNumElements();
903	APInt EltMask(APInt::getAllOnesValue(VWidth));
904	assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!")(static_cast <bool> ((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!") ? void (0) : __assert_fail ("(DemandedElts & ~EltMask) == 0 && \"Invalid DemandedElts!\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 904, __extension__ __PRETTY_FUNCTION__));
905
906	if (isa<UndefValue>(V)) {
907	// If the entire vector is undefined, just return this info.
908	UndefElts = EltMask;
909	return nullptr;
910	}
911
912	if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef.
913	UndefElts = EltMask;
914	return UndefValue::get(V->getType());
915	}
916
917	UndefElts = 0;
918
919	// Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
920	if (Constant *C = dyn_cast<Constant>(V)) {
921	// Check if this is identity. If so, return 0 since we are not simplifying
922	// anything.
923	if (DemandedElts.isAllOnesValue())
924	return nullptr;
925
926	Type *EltTy = cast<VectorType>(V->getType())->getElementType();
927	Constant *Undef = UndefValue::get(EltTy);
928
929	SmallVector<Constant*, 16> Elts;
930	for (unsigned i = 0; i != VWidth; ++i) {
931	if (!DemandedElts[i]) { // If not demanded, set to undef.
932	Elts.push_back(Undef);
933	UndefElts.setBit(i);
934	continue;
935	}
936
937	Constant *Elt = C->getAggregateElement(i);
938	if (!Elt) return nullptr;
939
940	if (isa<UndefValue>(Elt)) { // Already undef.
941	Elts.push_back(Undef);
942	UndefElts.setBit(i);
943	} else { // Otherwise, defined.
944	Elts.push_back(Elt);
945	}
946	}
947
948	// If we changed the constant, return it.
949	Constant *NewCV = ConstantVector::get(Elts);
950	return NewCV != C ? NewCV : nullptr;
951	}
952
953	// Limit search depth.
954	if (Depth == 10)
955	return nullptr;
956
957	// If multiple users are using the root value, proceed with
958	// simplification conservatively assuming that all elements
959	// are needed.
960	if (!V->hasOneUse()) {
961	// Quit if we find multiple users of a non-root value though.
962	// They'll be handled when it's their turn to be visited by
963	// the main instcombine process.
964	if (Depth != 0)
965	// TODO: Just compute the UndefElts information recursively.
966	return nullptr;
967
968	// Conservatively assume that all elements are needed.
969	DemandedElts = EltMask;
970	}
971
972	Instruction *I = dyn_cast<Instruction>(V);
973	if (!I) return nullptr; // Only analyze instructions.
974
975	bool MadeChange = false;
976	APInt UndefElts2(VWidth, 0);
977	APInt UndefElts3(VWidth, 0);
978	Value *TmpV;
979	switch (I->getOpcode()) {
980	default: break;
981
982	case Instruction::InsertElement: {
983	// If this is a variable index, we don't know which element it overwrites.
984	// demand exactly the same input as we produce.
985	ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
986	if (!Idx) {
987	// Note that we can't propagate undef elt info, because we don't know
988	// which elt is getting updated.
989	TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
990	UndefElts2, Depth + 1);
991	if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
992	break;
993	}
994
995	// The element inserted overwrites whatever was there, so the input demanded
996	// set is simpler than the output set.
997	unsigned IdxNo = Idx->getZExtValue();
998	APInt PreInsertDemandedElts = DemandedElts;
999	if (IdxNo < VWidth)
1000	PreInsertDemandedElts.clearBit(IdxNo);
1001	TmpV = SimplifyDemandedVectorElts(I->getOperand(0), PreInsertDemandedElts,
1002	UndefElts, Depth + 1);
1003	if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1004
1005	// If this is inserting an element that isn't demanded, remove this
1006	// insertelement.
1007	if (IdxNo >= VWidth \|\| !DemandedElts[IdxNo]) {
1008	Worklist.Add(I);
1009	return I->getOperand(0);
1010	}
1011
1012	// The inserted element is defined.
1013	UndefElts.clearBit(IdxNo);
1014	break;
1015	}
1016	case Instruction::ShuffleVector: {
1017	ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1018	unsigned LHSVWidth =
1019	Shuffle->getOperand(0)->getType()->getVectorNumElements();
1020	APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
1021	for (unsigned i = 0; i < VWidth; i++) {
1022	if (DemandedElts[i]) {
1023	unsigned MaskVal = Shuffle->getMaskValue(i);
1024	if (MaskVal != -1u) {
1025	assert(MaskVal < LHSVWidth * 2 &&(static_cast <bool> (MaskVal < LHSVWidth * 2 && "shufflevector mask index out of range!") ? void (0) : __assert_fail ("MaskVal < LHSVWidth * 2 && \"shufflevector mask index out of range!\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 1026, __extension__ __PRETTY_FUNCTION__))
1026	"shufflevector mask index out of range!")(static_cast <bool> (MaskVal < LHSVWidth * 2 && "shufflevector mask index out of range!") ? void (0) : __assert_fail ("MaskVal < LHSVWidth * 2 && \"shufflevector mask index out of range!\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 1026, __extension__ __PRETTY_FUNCTION__));
1027	if (MaskVal < LHSVWidth)
1028	LeftDemanded.setBit(MaskVal);
1029	else
1030	RightDemanded.setBit(MaskVal - LHSVWidth);
1031	}
1032	}
1033	}
1034
1035	APInt LHSUndefElts(LHSVWidth, 0);
1036	TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
1037	LHSUndefElts, Depth + 1);
1038	if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1039
1040	APInt RHSUndefElts(LHSVWidth, 0);
1041	TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
1042	RHSUndefElts, Depth + 1);
1043	if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
1044
1045	bool NewUndefElts = false;
1046	unsigned LHSIdx = -1u, LHSValIdx = -1u;
1047	unsigned RHSIdx = -1u, RHSValIdx = -1u;
1048	bool LHSUniform = true;
1049	bool RHSUniform = true;
1050	for (unsigned i = 0; i < VWidth; i++) {
1051	unsigned MaskVal = Shuffle->getMaskValue(i);
1052	if (MaskVal == -1u) {
1053	UndefElts.setBit(i);
1054	} else if (!DemandedElts[i]) {
1055	NewUndefElts = true;
1056	UndefElts.setBit(i);
1057	} else if (MaskVal < LHSVWidth) {
1058	if (LHSUndefElts[MaskVal]) {
1059	NewUndefElts = true;
1060	UndefElts.setBit(i);
1061	} else {
1062	LHSIdx = LHSIdx == -1u ? i : LHSVWidth;
1063	LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth;
1064	LHSUniform = LHSUniform && (MaskVal == i);
1065	}
1066	} else {
1067	if (RHSUndefElts[MaskVal - LHSVWidth]) {
1068	NewUndefElts = true;
1069	UndefElts.setBit(i);
1070	} else {
1071	RHSIdx = RHSIdx == -1u ? i : LHSVWidth;
1072	RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
1073	RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i);
1074	}
1075	}
1076	}
1077
1078	// Try to transform shuffle with constant vector and single element from
1079	// this constant vector to single insertelement instruction.
1080	// shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1081	// insertelement V, C[ci], ci-n
1082	if (LHSVWidth == Shuffle->getType()->getNumElements()) {
1083	Value *Op = nullptr;
1084	Constant *Value = nullptr;
1085	unsigned Idx = -1u;
1086
1087	// Find constant vector with the single element in shuffle (LHS or RHS).
1088	if (LHSIdx < LHSVWidth && RHSUniform) {
1089	if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
1090	Op = Shuffle->getOperand(1);
1091	Value = CV->getOperand(LHSValIdx);
1092	Idx = LHSIdx;
1093	}
1094	}
1095	if (RHSIdx < LHSVWidth && LHSUniform) {
1096	if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
1097	Op = Shuffle->getOperand(0);
1098	Value = CV->getOperand(RHSValIdx);
1099	Idx = RHSIdx;
1100	}
1101	}
1102	// Found constant vector with single element - convert to insertelement.
1103	if (Op && Value) {
1104	Instruction *New = InsertElementInst::Create(
1105	Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
1106	Shuffle->getName());
1107	InsertNewInstWith(New, *Shuffle);
1108	return New;
1109	}
1110	}
1111	if (NewUndefElts) {
1112	// Add additional discovered undefs.
1113	SmallVector<Constant*, 16> Elts;
1114	for (unsigned i = 0; i < VWidth; ++i) {
1115	if (UndefElts[i])
1116	Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
1117	else
1118	Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
1119	Shuffle->getMaskValue(i)));
1120	}
1121	I->setOperand(2, ConstantVector::get(Elts));
1122	MadeChange = true;
1123	}
1124	break;
1125	}
1126	case Instruction::Select: {
1127	APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
1128	if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
1129	for (unsigned i = 0; i < VWidth; i++) {
1130	Constant *CElt = CV->getAggregateElement(i);
1131	// Method isNullValue always returns false when called on a
1132	// ConstantExpr. If CElt is a ConstantExpr then skip it in order to
1133	// to avoid propagating incorrect information.
1134	if (isa<ConstantExpr>(CElt))
1135	continue;
1136	if (CElt->isNullValue())
1137	LeftDemanded.clearBit(i);
1138	else
1139	RightDemanded.clearBit(i);
1140	}
1141	}
1142
1143	TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
1144	Depth + 1);
1145	if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
1146
1147	TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
1148	UndefElts2, Depth + 1);
1149	if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
1150
1151	// Output elements are undefined if both are undefined.
1152	UndefElts &= UndefElts2;
1153	break;
1154	}
1155	case Instruction::BitCast: {
1156	// Vector->vector casts only.
1157	VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
1158	if (!VTy) break;
1159	unsigned InVWidth = VTy->getNumElements();
1160	APInt InputDemandedElts(InVWidth, 0);
1161	UndefElts2 = APInt(InVWidth, 0);
1162	unsigned Ratio;
1163
1164	if (VWidth == InVWidth) {
1165	// If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1166	// elements as are demanded of us.
1167	Ratio = 1;
1168	InputDemandedElts = DemandedElts;
1169	} else if ((VWidth % InVWidth) == 0) {
1170	// If the number of elements in the output is a multiple of the number of
1171	// elements in the input then an input element is live if any of the
1172	// corresponding output elements are live.
1173	Ratio = VWidth / InVWidth;
1174	for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1175	if (DemandedElts[OutIdx])
1176	InputDemandedElts.setBit(OutIdx / Ratio);
1177	} else if ((InVWidth % VWidth) == 0) {
1178	// If the number of elements in the input is a multiple of the number of
1179	// elements in the output then an input element is live if the
1180	// corresponding output element is live.
1181	Ratio = InVWidth / VWidth;
1182	for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1183	if (DemandedElts[InIdx / Ratio])
1184	InputDemandedElts.setBit(InIdx);
1185	} else {
1186	// Unsupported so far.
1187	break;
1188	}
1189
1190	TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
1191	UndefElts2, Depth + 1);
1192	if (TmpV) {
1193	I->setOperand(0, TmpV);
1194	MadeChange = true;
1195	}
1196
1197	if (VWidth == InVWidth) {
1198	UndefElts = UndefElts2;
1199	} else if ((VWidth % InVWidth) == 0) {
1200	// If the number of elements in the output is a multiple of the number of
1201	// elements in the input then an output element is undef if the
1202	// corresponding input element is undef.
1203	for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1204	if (UndefElts2[OutIdx / Ratio])
1205	UndefElts.setBit(OutIdx);
1206	} else if ((InVWidth % VWidth) == 0) {
1207	// If the number of elements in the input is a multiple of the number of
1208	// elements in the output then an output element is undef if all of the
1209	// corresponding input elements are undef.
1210	for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
1211	APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
1212	if (SubUndef.countPopulation() == Ratio)
1213	UndefElts.setBit(OutIdx);
1214	}
1215	} else {
1216	llvm_unreachable("Unimp")::llvm::llvm_unreachable_internal("Unimp", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 1216);
1217	}
1218	break;
1219	}
1220	case Instruction::And:
1221	case Instruction::Or:
1222	case Instruction::Xor:
1223	case Instruction::Add:
1224	case Instruction::Sub:
1225	case Instruction::Mul:
1226	// div/rem demand all inputs, because they don't want divide by zero.
1227	TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
1228	Depth + 1);
1229	if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1230	TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
1231	UndefElts2, Depth + 1);
1232	if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
1233
1234	// Output elements are undefined if both are undefined. Consider things
1235	// like undef&0. The result is known zero, not undef.
1236	UndefElts &= UndefElts2;
1237	break;
1238	case Instruction::FPTrunc:
1239	case Instruction::FPExt:
1240	TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
1241	Depth + 1);
1242	if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1243	break;
1244
1245	case Instruction::Call: {
1246	IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1247	if (!II) break;
1248	switch (II->getIntrinsicID()) {
1249	default: break;
1250
1251	case Intrinsic::x86_xop_vfrcz_ss:
1252	case Intrinsic::x86_xop_vfrcz_sd:
1253	// The instructions for these intrinsics are speced to zero upper bits not
1254	// pass them through like other scalar intrinsics. So we shouldn't just
1255	// use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
1256	// Instead we should return a zero vector.
1257	if (!DemandedElts[0]) {
1258	Worklist.Add(II);
1259	return ConstantAggregateZero::get(II->getType());
1260	}
1261
1262	// Only the lower element is used.
1263	DemandedElts = 1;
1264	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1265	UndefElts, Depth + 1);
1266	if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1267
1268	// Only the lower element is undefined. The high elements are zero.
1269	UndefElts = UndefElts[0];
1270	break;
1271
1272	// Unary scalar-as-vector operations that work column-wise.
1273	case Intrinsic::x86_sse_rcp_ss:
1274	case Intrinsic::x86_sse_rsqrt_ss:
1275	case Intrinsic::x86_sse_sqrt_ss:
1276	case Intrinsic::x86_sse2_sqrt_sd:
1277	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1278	UndefElts, Depth + 1);
1279	if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1280
1281	// If lowest element of a scalar op isn't used then use Arg0.
1282	if (!DemandedElts[0]) {
1283	Worklist.Add(II);
1284	return II->getArgOperand(0);
1285	}
1286	// TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
1287	// checks).
1288	break;
1289
1290	// Binary scalar-as-vector operations that work column-wise. The high
1291	// elements come from operand 0. The low element is a function of both
1292	// operands.
1293	case Intrinsic::x86_sse_min_ss:
1294	case Intrinsic::x86_sse_max_ss:
1295	case Intrinsic::x86_sse_cmp_ss:
1296	case Intrinsic::x86_sse2_min_sd:
1297	case Intrinsic::x86_sse2_max_sd:
1298	case Intrinsic::x86_sse2_cmp_sd: {
1299	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1300	UndefElts, Depth + 1);
1301	if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1302
1303	// If lowest element of a scalar op isn't used then use Arg0.
1304	if (!DemandedElts[0]) {
1305	Worklist.Add(II);
1306	return II->getArgOperand(0);
1307	}
1308
1309	// Only lower element is used for operand 1.
1310	DemandedElts = 1;
1311	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1312	UndefElts2, Depth + 1);
1313	if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
1314
1315	// Lower element is undefined if both lower elements are undefined.
1316	// Consider things like undef&0. The result is known zero, not undef.
1317	if (!UndefElts2[0])
1318	UndefElts.clearBit(0);
1319
1320	break;
1321	}
1322
1323	// Binary scalar-as-vector operations that work column-wise. The high
1324	// elements come from operand 0 and the low element comes from operand 1.
1325	case Intrinsic::x86_sse41_round_ss:
1326	case Intrinsic::x86_sse41_round_sd: {
1327	// Don't use the low element of operand 0.
1328	APInt DemandedElts2 = DemandedElts;
1329	DemandedElts2.clearBit(0);
1330	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
1331	UndefElts, Depth + 1);
1332	if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1333
1334	// If lowest element of a scalar op isn't used then use Arg0.
1335	if (!DemandedElts[0]) {
1336	Worklist.Add(II);
1337	return II->getArgOperand(0);
1338	}
1339
1340	// Only lower element is used for operand 1.
1341	DemandedElts = 1;
1342	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1343	UndefElts2, Depth + 1);
1344	if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
1345
1346	// Take the high undef elements from operand 0 and take the lower element
1347	// from operand 1.
1348	UndefElts.clearBit(0);
1349	UndefElts \|= UndefElts2[0];
1350	break;
1351	}
1352
1353	// Three input scalar-as-vector operations that work column-wise. The high
1354	// elements come from operand 0 and the low element is a function of all
1355	// three inputs.
1356	case Intrinsic::x86_avx512_mask_add_ss_round:
1357	case Intrinsic::x86_avx512_mask_div_ss_round:
1358	case Intrinsic::x86_avx512_mask_mul_ss_round:
1359	case Intrinsic::x86_avx512_mask_sub_ss_round:
1360	case Intrinsic::x86_avx512_mask_max_ss_round:
1361	case Intrinsic::x86_avx512_mask_min_ss_round:
1362	case Intrinsic::x86_avx512_mask_add_sd_round:
1363	case Intrinsic::x86_avx512_mask_div_sd_round:
1364	case Intrinsic::x86_avx512_mask_mul_sd_round:
1365	case Intrinsic::x86_avx512_mask_sub_sd_round:
1366	case Intrinsic::x86_avx512_mask_max_sd_round:
1367	case Intrinsic::x86_avx512_mask_min_sd_round:
1368	case Intrinsic::x86_fma_vfmadd_ss:
1369	case Intrinsic::x86_fma_vfmsub_ss:
1370	case Intrinsic::x86_fma_vfnmadd_ss:
1371	case Intrinsic::x86_fma_vfnmsub_ss:
1372	case Intrinsic::x86_fma_vfmadd_sd:
1373	case Intrinsic::x86_fma_vfmsub_sd:
1374	case Intrinsic::x86_fma_vfnmadd_sd:
1375	case Intrinsic::x86_fma_vfnmsub_sd:
1376	case Intrinsic::x86_avx512_mask_vfmadd_ss:
1377	case Intrinsic::x86_avx512_mask_vfmadd_sd:
1378	case Intrinsic::x86_avx512_maskz_vfmadd_ss:
1379	case Intrinsic::x86_avx512_maskz_vfmadd_sd:
1380	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1381	UndefElts, Depth + 1);
1382	if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1383
1384	// If lowest element of a scalar op isn't used then use Arg0.
1385	if (!DemandedElts[0]) {
1386	Worklist.Add(II);
1387	return II->getArgOperand(0);
1388	}
1389
1390	// Only lower element is used for operand 1 and 2.
1391	DemandedElts = 1;
1392	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1393	UndefElts2, Depth + 1);
1394	if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
1395	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
1396	UndefElts3, Depth + 1);
1397	if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
1398
1399	// Lower element is undefined if all three lower elements are undefined.
1400	// Consider things like undef&0. The result is known zero, not undef.
1401	if (!UndefElts2[0] \|\| !UndefElts3[0])
1402	UndefElts.clearBit(0);
1403
1404	break;
1405
1406	case Intrinsic::x86_avx512_mask3_vfmadd_ss:
1407	case Intrinsic::x86_avx512_mask3_vfmadd_sd:
1408	case Intrinsic::x86_avx512_mask3_vfmsub_ss:
1409	case Intrinsic::x86_avx512_mask3_vfmsub_sd:
1410	case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
1411	case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
1412	// These intrinsics get the passthru bits from operand 2.
1413	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
1414	UndefElts, Depth + 1);
1415	if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
1416
1417	// If lowest element of a scalar op isn't used then use Arg2.
1418	if (!DemandedElts[0]) {
1419	Worklist.Add(II);
1420	return II->getArgOperand(2);
1421	}
1422
1423	// Only lower element is used for operand 0 and 1.
1424	DemandedElts = 1;
1425	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1426	UndefElts2, Depth + 1);
1427	if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1428	TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1429	UndefElts3, Depth + 1);
1430	if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
1431
1432	// Lower element is undefined if all three lower elements are undefined.
1433	// Consider things like undef&0. The result is known zero, not undef.
1434	if (!UndefElts2[0] \|\| !UndefElts3[0])
1435	UndefElts.clearBit(0);
1436
1437	break;
1438
1439	case Intrinsic::x86_sse2_pmulu_dq:
1440	case Intrinsic::x86_sse41_pmuldq:
1441	case Intrinsic::x86_avx2_pmul_dq:
1442	case Intrinsic::x86_avx2_pmulu_dq:
1443	case Intrinsic::x86_avx512_pmul_dq_512:
1444	case Intrinsic::x86_avx512_pmulu_dq_512: {
1445	Value *Op0 = II->getArgOperand(0);
1446	Value *Op1 = II->getArgOperand(1);
1447	unsigned InnerVWidth = Op0->getType()->getVectorNumElements();
1448	assert((VWidth * 2) == InnerVWidth && "Unexpected input size")(static_cast <bool> ((VWidth * 2) == InnerVWidth && "Unexpected input size") ? void (0) : __assert_fail ("(VWidth * 2) == InnerVWidth && \"Unexpected input size\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 1448, __extension__ __PRETTY_FUNCTION__));
1449
1450	APInt InnerDemandedElts(InnerVWidth, 0);
1451	for (unsigned i = 0; i != VWidth; ++i)
1452	if (DemandedElts[i])
1453	InnerDemandedElts.setBit(i * 2);
1454
1455	UndefElts2 = APInt(InnerVWidth, 0);
1456	TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2,
1457	Depth + 1);
1458	if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1459
1460	UndefElts3 = APInt(InnerVWidth, 0);
1461	TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3,
1462	Depth + 1);
1463	if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
1464
1465	break;
1466	}
1467
1468	case Intrinsic::x86_sse2_packssdw_128:
1469	case Intrinsic::x86_sse2_packsswb_128:
1470	case Intrinsic::x86_sse2_packuswb_128:
1471	case Intrinsic::x86_sse41_packusdw:
1472	case Intrinsic::x86_avx2_packssdw:
1473	case Intrinsic::x86_avx2_packsswb:
1474	case Intrinsic::x86_avx2_packusdw:
1475	case Intrinsic::x86_avx2_packuswb:
1476	case Intrinsic::x86_avx512_packssdw_512:
1477	case Intrinsic::x86_avx512_packsswb_512:
1478	case Intrinsic::x86_avx512_packusdw_512:
1479	case Intrinsic::x86_avx512_packuswb_512: {
1480	auto *Ty0 = II->getArgOperand(0)->getType();
1481	unsigned InnerVWidth = Ty0->getVectorNumElements();
1482	assert(VWidth == (InnerVWidth * 2) && "Unexpected input size")(static_cast <bool> (VWidth == (InnerVWidth * 2) && "Unexpected input size") ? void (0) : __assert_fail ("VWidth == (InnerVWidth * 2) && \"Unexpected input size\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp" , 1482, __extension__ __PRETTY_FUNCTION__));
1483
1484	unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
1485	unsigned VWidthPerLane = VWidth / NumLanes;
1486	unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
1487
1488	// Per lane, pack the elements of the first input and then the second.
1489	// e.g.
1490	// v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
1491	// v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
1492	for (int OpNum = 0; OpNum != 2; ++OpNum) {
1493	APInt OpDemandedElts(InnerVWidth, 0);
1494	for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1495	unsigned LaneIdx = Lane * VWidthPerLane;
1496	for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
1497	unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
1498	if (DemandedElts[Idx])
1499	OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
1500	}
1501	}
1502
1503	// Demand elements from the operand.
1504	auto *Op = II->getArgOperand(OpNum);
1505	APInt OpUndefElts(InnerVWidth, 0);
1506	TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts,
1507	Depth + 1);
1508	if (TmpV) {
1509	II->setArgOperand(OpNum, TmpV);
1510	MadeChange = true;
1511	}
1512
1513	// Pack the operand's UNDEF elements, one lane at a time.
1514	OpUndefElts = OpUndefElts.zext(VWidth);
1515	for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1516	APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
1517	LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
1518	LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
1519	UndefElts \|= LaneElts;
1520	}
1521	}
1522	break;
1523	}
1524
1525	// PSHUFB
1526	case Intrinsic::x86_ssse3_pshuf_b_128:
1527	case Intrinsic::x86_avx2_pshuf_b:
1528	case Intrinsic::x86_avx512_pshuf_b_512:
1529	// PERMILVAR
1530	case Intrinsic::x86_avx_vpermilvar_ps:
1531	case Intrinsic::x86_avx_vpermilvar_ps_256:
1532	case Intrinsic::x86_avx512_vpermilvar_ps_512:
1533	case Intrinsic::x86_avx_vpermilvar_pd:
1534	case Intrinsic::x86_avx_vpermilvar_pd_256:
1535	case Intrinsic::x86_avx512_vpermilvar_pd_512:
1536	// PERMV
1537	case Intrinsic::x86_avx2_permd:
1538	case Intrinsic::x86_avx2_permps: {
1539	Value *Op1 = II->getArgOperand(1);
1540	TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
1541	Depth + 1);
1542	if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
1543	break;
1544	}
1545
1546	// SSE4A instructions leave the upper 64-bits of the 128-bit result
1547	// in an undefined state.
1548	case Intrinsic::x86_sse4a_extrq:
1549	case Intrinsic::x86_sse4a_extrqi:
1550	case Intrinsic::x86_sse4a_insertq:
1551	case Intrinsic::x86_sse4a_insertqi:
1552	UndefElts.setHighBits(VWidth / 2);
1553	break;
1554	case Intrinsic::amdgcn_buffer_load:
1555	case Intrinsic::amdgcn_buffer_load_format:
1556	case Intrinsic::amdgcn_image_sample:
1557	case Intrinsic::amdgcn_image_sample_cl:
1558	case Intrinsic::amdgcn_image_sample_d:
1559	case Intrinsic::amdgcn_image_sample_d_cl:
1560	case Intrinsic::amdgcn_image_sample_l:
1561	case Intrinsic::amdgcn_image_sample_b:
1562	case Intrinsic::amdgcn_image_sample_b_cl:
1563	case Intrinsic::amdgcn_image_sample_lz:
1564	case Intrinsic::amdgcn_image_sample_cd:
1565	case Intrinsic::amdgcn_image_sample_cd_cl:
1566
1567	case Intrinsic::amdgcn_image_sample_c:
1568	case Intrinsic::amdgcn_image_sample_c_cl:
1569	case Intrinsic::amdgcn_image_sample_c_d:
1570	case Intrinsic::amdgcn_image_sample_c_d_cl:
1571	case Intrinsic::amdgcn_image_sample_c_l:
1572	case Intrinsic::amdgcn_image_sample_c_b:
1573	case Intrinsic::amdgcn_image_sample_c_b_cl:
1574	case Intrinsic::amdgcn_image_sample_c_lz:
1575	case Intrinsic::amdgcn_image_sample_c_cd:
1576	case Intrinsic::amdgcn_image_sample_c_cd_cl:
1577
1578	case Intrinsic::amdgcn_image_sample_o:
1579	case Intrinsic::amdgcn_image_sample_cl_o:
1580	case Intrinsic::amdgcn_image_sample_d_o:
1581	case Intrinsic::amdgcn_image_sample_d_cl_o:
1582	case Intrinsic::amdgcn_image_sample_l_o:
1583	case Intrinsic::amdgcn_image_sample_b_o:
1584	case Intrinsic::amdgcn_image_sample_b_cl_o:
1585	case Intrinsic::amdgcn_image_sample_lz_o:
1586	case Intrinsic::amdgcn_image_sample_cd_o:
1587	case Intrinsic::amdgcn_image_sample_cd_cl_o:
1588
1589	case Intrinsic::amdgcn_image_sample_c_o:
1590	case Intrinsic::amdgcn_image_sample_c_cl_o:
1591	case Intrinsic::amdgcn_image_sample_c_d_o:
1592	case Intrinsic::amdgcn_image_sample_c_d_cl_o:
1593	case Intrinsic::amdgcn_image_sample_c_l_o:
1594	case Intrinsic::amdgcn_image_sample_c_b_o:
1595	case Intrinsic::amdgcn_image_sample_c_b_cl_o:
1596	case Intrinsic::amdgcn_image_sample_c_lz_o:
1597	case Intrinsic::amdgcn_image_sample_c_cd_o:
1598	case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
1599
1600	case Intrinsic::amdgcn_image_getlod: {
1601	if (VWidth == 1 \|\| !DemandedElts.isMask())
1602	return nullptr;
1603
1604	// TODO: Handle 3 vectors when supported in code gen.
1605	unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countTrailingOnes());
1606	if (NewNumElts == VWidth)
1607	return nullptr;
1608
1609	Module *M = II->getParent()->getParent()->getParent();
1610	Type *EltTy = V->getType()->getVectorElementType();
1611
1612	Type *NewTy = (NewNumElts == 1) ? EltTy :
1613	VectorType::get(EltTy, NewNumElts);
1614
1615	auto IID = II->getIntrinsicID();
1616
1617	bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load \|\|
1618	IID == Intrinsic::amdgcn_buffer_load_format;
1619
1620	Function *NewIntrin = IsBuffer ?
1621	Intrinsic::getDeclaration(M, IID, NewTy) :
1622	// Samplers have 3 mangled types.
1623	Intrinsic::getDeclaration(M, IID,
1624	{ NewTy, II->getArgOperand(0)->getType(),
1625	II->getArgOperand(1)->getType()});
1626
1627	SmallVector<Value *, 5> Args;
1628	for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
1629	Args.push_back(II->getArgOperand(I));
1630
1631	IRBuilderBase::InsertPointGuard Guard(Builder);
1632	Builder.SetInsertPoint(II);
1633
1634	CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
1635	NewCall->takeName(II);
1636	NewCall->copyMetadata(*II);
1637
1638	if (!IsBuffer) {
1639	ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
1640	if (DMask) {
1641	unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1642
1643	unsigned PopCnt = 0;
1644	unsigned NewDMask = 0;
1645	for (unsigned I = 0; I < 4; ++I) {
1646	const unsigned Bit = 1 << I;
1647	if (!!(DMaskVal & Bit)) {
1648	if (++PopCnt > NewNumElts)
1649	break;
1650
1651	NewDMask \|= Bit;
1652	}
1653	}
1654
1655	NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
1656	}
1657	}
1658
1659
1660	if (NewNumElts == 1) {
1661	return Builder.CreateInsertElement(UndefValue::get(V->getType()),
1662	NewCall, static_cast<uint64_t>(0));
1663	}
1664
1665	SmallVector<uint32_t, 8> EltMask;
1666	for (unsigned I = 0; I < VWidth; ++I)
1667	EltMask.push_back(I);
1668
1669	Value *Shuffle = Builder.CreateShuffleVector(
1670	NewCall, UndefValue::get(NewTy), EltMask);
1671
1672	MadeChange = true;
	Value stored to 'MadeChange' is never read
1673	return Shuffle;
1674	}
1675	}
1676	break;
1677	}
1678	}
1679	return MadeChange ? I : nullptr;
1680	}