Line data Source code
1 : //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 : // both before and after the DAG is legalized.
12 : //
13 : // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 : // primarily intended to handle simplification opportunities that are implicit
15 : // in the LLVM IR and exposed by the various codegen lowering phases.
16 : //
17 : //===----------------------------------------------------------------------===//
18 :
19 : #include "llvm/ADT/APFloat.h"
20 : #include "llvm/ADT/APInt.h"
21 : #include "llvm/ADT/ArrayRef.h"
22 : #include "llvm/ADT/DenseMap.h"
23 : #include "llvm/ADT/None.h"
24 : #include "llvm/ADT/Optional.h"
25 : #include "llvm/ADT/STLExtras.h"
26 : #include "llvm/ADT/SetVector.h"
27 : #include "llvm/ADT/SmallBitVector.h"
28 : #include "llvm/ADT/SmallPtrSet.h"
29 : #include "llvm/ADT/SmallSet.h"
30 : #include "llvm/ADT/SmallVector.h"
31 : #include "llvm/ADT/Statistic.h"
32 : #include "llvm/Analysis/AliasAnalysis.h"
33 : #include "llvm/Analysis/MemoryLocation.h"
34 : #include "llvm/CodeGen/DAGCombine.h"
35 : #include "llvm/CodeGen/ISDOpcodes.h"
36 : #include "llvm/CodeGen/MachineFrameInfo.h"
37 : #include "llvm/CodeGen/MachineFunction.h"
38 : #include "llvm/CodeGen/MachineMemOperand.h"
39 : #include "llvm/CodeGen/RuntimeLibcalls.h"
40 : #include "llvm/CodeGen/SelectionDAG.h"
41 : #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 : #include "llvm/CodeGen/SelectionDAGNodes.h"
43 : #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 : #include "llvm/CodeGen/TargetLowering.h"
45 : #include "llvm/CodeGen/TargetRegisterInfo.h"
46 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 : #include "llvm/CodeGen/ValueTypes.h"
48 : #include "llvm/IR/Attributes.h"
49 : #include "llvm/IR/Constant.h"
50 : #include "llvm/IR/DataLayout.h"
51 : #include "llvm/IR/DerivedTypes.h"
52 : #include "llvm/IR/Function.h"
53 : #include "llvm/IR/LLVMContext.h"
54 : #include "llvm/IR/Metadata.h"
55 : #include "llvm/Support/Casting.h"
56 : #include "llvm/Support/CodeGen.h"
57 : #include "llvm/Support/CommandLine.h"
58 : #include "llvm/Support/Compiler.h"
59 : #include "llvm/Support/Debug.h"
60 : #include "llvm/Support/ErrorHandling.h"
61 : #include "llvm/Support/KnownBits.h"
62 : #include "llvm/Support/MachineValueType.h"
63 : #include "llvm/Support/MathExtras.h"
64 : #include "llvm/Support/raw_ostream.h"
65 : #include "llvm/Target/TargetMachine.h"
66 : #include "llvm/Target/TargetOptions.h"
67 : #include <algorithm>
68 : #include <cassert>
69 : #include <cstdint>
70 : #include <functional>
71 : #include <iterator>
72 : #include <string>
73 : #include <tuple>
74 : #include <utility>
75 :
76 : using namespace llvm;
77 :
78 : #define DEBUG_TYPE "dagcombine"
79 :
80 : STATISTIC(NodesCombined , "Number of dag nodes combined");
81 : STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 : STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 : STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 : STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 : STATISTIC(SlicedLoads, "Number of load sliced");
86 : STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 :
88 : static cl::opt<bool>
89 : CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 : cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 :
92 : static cl::opt<bool>
93 : UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 : cl::desc("Enable DAG combiner's use of TBAA"));
95 :
96 : #ifndef NDEBUG
97 : static cl::opt<std::string>
98 : CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 : cl::desc("Only use DAG-combiner alias analysis in this"
100 : " function"));
101 : #endif
102 :
103 : /// Hidden option to stress test load slicing, i.e., when this option
104 : /// is enabled, load slicing bypasses most of its profitability guards.
105 : static cl::opt<bool>
106 : StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 : cl::desc("Bypass the profitability model of load slicing"),
108 : cl::init(false));
109 :
110 : static cl::opt<bool>
111 : MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 : cl::desc("DAG combiner may split indexing from loads"));
113 :
114 : namespace {
115 :
116 : class DAGCombiner {
117 : SelectionDAG &DAG;
118 : const TargetLowering &TLI;
119 : CombineLevel Level;
120 : CodeGenOpt::Level OptLevel;
121 : bool LegalOperations = false;
122 : bool LegalTypes = false;
123 : bool ForCodeSize;
124 :
125 : /// Worklist of all of the nodes that need to be simplified.
126 : ///
127 : /// This must behave as a stack -- new nodes to process are pushed onto the
128 : /// back and when processing we pop off of the back.
129 : ///
130 : /// The worklist will not contain duplicates but may contain null entries
131 : /// due to nodes being deleted from the underlying DAG.
132 : SmallVector<SDNode *, 64> Worklist;
133 :
134 : /// Mapping from an SDNode to its position on the worklist.
135 : ///
136 : /// This is used to find and remove nodes from the worklist (by nulling
137 : /// them) when they are deleted from the underlying DAG. It relies on
138 : /// stable indices of nodes within the worklist.
139 : DenseMap<SDNode *, unsigned> WorklistMap;
140 :
141 : /// Set of nodes which have been combined (at least once).
142 : ///
143 : /// This is used to allow us to reliably add any operands of a DAG node
144 : /// which have not yet been combined to the worklist.
145 : SmallPtrSet<SDNode *, 32> CombinedNodes;
146 :
147 : // AA - Used for DAG load/store alias analysis.
148 : AliasAnalysis *AA;
149 :
150 : /// When an instruction is simplified, add all users of the instruction to
151 : /// the work lists because they might get more simplified now.
152 : void AddUsersToWorklist(SDNode *N) {
153 9474537 : for (SDNode *Node : N->uses())
154 6277668 : AddToWorklist(Node);
155 : }
156 :
157 : /// Call the node-specific routine that folds each particular type of node.
158 : SDValue visit(SDNode *N);
159 :
160 : public:
161 2767992 : DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162 5535984 : : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163 5535984 : OptLevel(OL), AA(AA) {
164 2767992 : ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 :
166 2767992 : MaximumLegalStoreInBits = 0;
167 315551088 : for (MVT VT : MVT::all_valuetypes())
168 312783096 : if (EVT(VT).isSimple() && VT != MVT::Other &&
169 310015104 : TLI.isTypeLegal(EVT(VT)) &&
170 41626409 : VT.getSizeInBits() >= MaximumLegalStoreInBits)
171 34327350 : MaximumLegalStoreInBits = VT.getSizeInBits();
172 2767992 : }
173 :
174 : /// Add to the worklist making sure its instance is at the back (next to be
175 : /// processed.)
176 249948491 : void AddToWorklist(SDNode *N) {
177 : assert(N->getOpcode() != ISD::DELETED_NODE &&
178 : "Deleted Node added to Worklist");
179 :
180 : // Skip handle nodes as they can't usefully be combined and confuse the
181 : // zero-use deletion strategy.
182 249948491 : if (N->getOpcode() == ISD::HANDLENODE)
183 : return;
184 :
185 249922932 : if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186 93035691 : Worklist.push_back(N);
187 : }
188 :
189 : /// Remove all instances of N from the worklist.
190 7859353 : void removeFromWorklist(SDNode *N) {
191 : CombinedNodes.erase(N);
192 :
193 7859353 : auto It = WorklistMap.find(N);
194 7859353 : if (It == WorklistMap.end())
195 5403160 : return; // Not in the worklist.
196 :
197 : // Null out the entry rather than erasing it to avoid a linear operation.
198 4912386 : Worklist[It->second] = nullptr;
199 : WorklistMap.erase(It);
200 : }
201 :
202 : void deleteAndRecombine(SDNode *N);
203 : bool recursivelyDeleteUnusedNodes(SDNode *N);
204 :
205 : /// Replaces all uses of the results of one DAG node with new values.
206 : SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207 : bool AddTo = true);
208 :
209 : /// Replaces all uses of the results of one DAG node with new values.
210 : SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211 457951 : return CombineTo(N, &Res, 1, AddTo);
212 : }
213 :
214 : /// Replaces all uses of the results of one DAG node with new values.
215 : SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216 : bool AddTo = true) {
217 234659 : SDValue To[] = { Res0, Res1 };
218 7250 : return CombineTo(N, To, 2, AddTo);
219 : }
220 :
221 : void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 :
223 : private:
224 : unsigned MaximumLegalStoreInBits;
225 :
226 : /// Check the specified integer node value to see if it can be simplified or
227 : /// if things it uses can be simplified by bit propagation.
228 : /// If so, return true.
229 4835936 : bool SimplifyDemandedBits(SDValue Op) {
230 4835936 : unsigned BitWidth = Op.getScalarValueSizeInBits();
231 4835936 : APInt Demanded = APInt::getAllOnesValue(BitWidth);
232 4835936 : return SimplifyDemandedBits(Op, Demanded);
233 : }
234 :
235 : /// Check the specified vector node value to see if it can be simplified or
236 : /// if things it uses can be simplified as it only uses some of the
237 : /// elements. If so, return true.
238 135492 : bool SimplifyDemandedVectorElts(SDValue Op) {
239 270984 : unsigned NumElts = Op.getValueType().getVectorNumElements();
240 135492 : APInt Demanded = APInt::getAllOnesValue(NumElts);
241 135492 : return SimplifyDemandedVectorElts(Op, Demanded);
242 : }
243 :
244 : bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245 : bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
246 : bool AssumeSingleUse = false);
247 :
248 : bool CombineToPreIndexedLoadStore(SDNode *N);
249 : bool CombineToPostIndexedLoadStore(SDNode *N);
250 : SDValue SplitIndexingFromLoad(LoadSDNode *LD);
251 : bool SliceUpLoad(SDNode *N);
252 :
253 : // Scalars have size 0 to distinguish from singleton vectors.
254 : SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
255 : bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
256 : bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
257 :
258 : /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
259 : /// load.
260 : ///
261 : /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
262 : /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
263 : /// \param EltNo index of the vector element to load.
264 : /// \param OriginalLoad load that EVE came from to be replaced.
265 : /// \returns EVE on success SDValue() on failure.
266 : SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
267 : SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
268 : void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
269 : SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
270 : SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
271 : SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
272 : SDValue PromoteIntBinOp(SDValue Op);
273 : SDValue PromoteIntShiftOp(SDValue Op);
274 : SDValue PromoteExtend(SDValue Op);
275 : bool PromoteLoad(SDValue Op);
276 :
277 : /// Call the node-specific routine that knows how to fold each
278 : /// particular type of node. If that doesn't do anything, try the
279 : /// target-specific DAG combines.
280 : SDValue combine(SDNode *N);
281 :
282 : // Visitation implementation - Implement dag node combining for different
283 : // node types. The semantics are as follows:
284 : // Return Value:
285 : // SDValue.getNode() == 0 - No change was made
286 : // SDValue.getNode() == N - N was replaced, is dead and has been handled.
287 : // otherwise - N should be replaced by the returned Operand.
288 : //
289 : SDValue visitTokenFactor(SDNode *N);
290 : SDValue visitMERGE_VALUES(SDNode *N);
291 : SDValue visitADD(SDNode *N);
292 : SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
293 : SDValue visitSUB(SDNode *N);
294 : SDValue visitADDC(SDNode *N);
295 : SDValue visitUADDO(SDNode *N);
296 : SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
297 : SDValue visitSUBC(SDNode *N);
298 : SDValue visitUSUBO(SDNode *N);
299 : SDValue visitADDE(SDNode *N);
300 : SDValue visitADDCARRY(SDNode *N);
301 : SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
302 : SDValue visitSUBE(SDNode *N);
303 : SDValue visitSUBCARRY(SDNode *N);
304 : SDValue visitMUL(SDNode *N);
305 : SDValue useDivRem(SDNode *N);
306 : SDValue visitSDIV(SDNode *N);
307 : SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
308 : SDValue visitUDIV(SDNode *N);
309 : SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
310 : SDValue visitREM(SDNode *N);
311 : SDValue visitMULHU(SDNode *N);
312 : SDValue visitMULHS(SDNode *N);
313 : SDValue visitSMUL_LOHI(SDNode *N);
314 : SDValue visitUMUL_LOHI(SDNode *N);
315 : SDValue visitSMULO(SDNode *N);
316 : SDValue visitUMULO(SDNode *N);
317 : SDValue visitIMINMAX(SDNode *N);
318 : SDValue visitAND(SDNode *N);
319 : SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
320 : SDValue visitOR(SDNode *N);
321 : SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
322 : SDValue visitXOR(SDNode *N);
323 : SDValue SimplifyVBinOp(SDNode *N);
324 : SDValue visitSHL(SDNode *N);
325 : SDValue visitSRA(SDNode *N);
326 : SDValue visitSRL(SDNode *N);
327 : SDValue visitRotate(SDNode *N);
328 : SDValue visitABS(SDNode *N);
329 : SDValue visitBSWAP(SDNode *N);
330 : SDValue visitBITREVERSE(SDNode *N);
331 : SDValue visitCTLZ(SDNode *N);
332 : SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
333 : SDValue visitCTTZ(SDNode *N);
334 : SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
335 : SDValue visitCTPOP(SDNode *N);
336 : SDValue visitSELECT(SDNode *N);
337 : SDValue visitVSELECT(SDNode *N);
338 : SDValue visitSELECT_CC(SDNode *N);
339 : SDValue visitSETCC(SDNode *N);
340 : SDValue visitSETCCCARRY(SDNode *N);
341 : SDValue visitSIGN_EXTEND(SDNode *N);
342 : SDValue visitZERO_EXTEND(SDNode *N);
343 : SDValue visitANY_EXTEND(SDNode *N);
344 : SDValue visitAssertExt(SDNode *N);
345 : SDValue visitSIGN_EXTEND_INREG(SDNode *N);
346 : SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
347 : SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
348 : SDValue visitTRUNCATE(SDNode *N);
349 : SDValue visitBITCAST(SDNode *N);
350 : SDValue visitBUILD_PAIR(SDNode *N);
351 : SDValue visitFADD(SDNode *N);
352 : SDValue visitFSUB(SDNode *N);
353 : SDValue visitFMUL(SDNode *N);
354 : SDValue visitFMA(SDNode *N);
355 : SDValue visitFDIV(SDNode *N);
356 : SDValue visitFREM(SDNode *N);
357 : SDValue visitFSQRT(SDNode *N);
358 : SDValue visitFCOPYSIGN(SDNode *N);
359 : SDValue visitFPOW(SDNode *N);
360 : SDValue visitSINT_TO_FP(SDNode *N);
361 : SDValue visitUINT_TO_FP(SDNode *N);
362 : SDValue visitFP_TO_SINT(SDNode *N);
363 : SDValue visitFP_TO_UINT(SDNode *N);
364 : SDValue visitFP_ROUND(SDNode *N);
365 : SDValue visitFP_ROUND_INREG(SDNode *N);
366 : SDValue visitFP_EXTEND(SDNode *N);
367 : SDValue visitFNEG(SDNode *N);
368 : SDValue visitFABS(SDNode *N);
369 : SDValue visitFCEIL(SDNode *N);
370 : SDValue visitFTRUNC(SDNode *N);
371 : SDValue visitFFLOOR(SDNode *N);
372 : SDValue visitFMINNUM(SDNode *N);
373 : SDValue visitFMAXNUM(SDNode *N);
374 : SDValue visitBRCOND(SDNode *N);
375 : SDValue visitBR_CC(SDNode *N);
376 : SDValue visitLOAD(SDNode *N);
377 :
378 : SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
379 : SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
380 :
381 : SDValue visitSTORE(SDNode *N);
382 : SDValue visitINSERT_VECTOR_ELT(SDNode *N);
383 : SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
384 : SDValue visitBUILD_VECTOR(SDNode *N);
385 : SDValue visitCONCAT_VECTORS(SDNode *N);
386 : SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
387 : SDValue visitVECTOR_SHUFFLE(SDNode *N);
388 : SDValue visitSCALAR_TO_VECTOR(SDNode *N);
389 : SDValue visitINSERT_SUBVECTOR(SDNode *N);
390 : SDValue visitMLOAD(SDNode *N);
391 : SDValue visitMSTORE(SDNode *N);
392 : SDValue visitMGATHER(SDNode *N);
393 : SDValue visitMSCATTER(SDNode *N);
394 : SDValue visitFP_TO_FP16(SDNode *N);
395 : SDValue visitFP16_TO_FP(SDNode *N);
396 :
397 : SDValue visitFADDForFMACombine(SDNode *N);
398 : SDValue visitFSUBForFMACombine(SDNode *N);
399 : SDValue visitFMULForFMADistributiveCombine(SDNode *N);
400 :
401 : SDValue XformToShuffleWithZero(SDNode *N);
402 : SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
403 : SDValue N1, SDNodeFlags Flags);
404 :
405 : SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
406 :
407 : SDValue foldSelectOfConstants(SDNode *N);
408 : SDValue foldVSelectOfConstants(SDNode *N);
409 : SDValue foldBinOpIntoSelect(SDNode *BO);
410 : bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
411 : SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
412 : SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
413 : SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
414 : SDValue N2, SDValue N3, ISD::CondCode CC,
415 : bool NotExtCompare = false);
416 : SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
417 : SDValue N2, SDValue N3, ISD::CondCode CC);
418 : SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
419 : const SDLoc &DL);
420 : SDValue unfoldMaskedMerge(SDNode *N);
421 : SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
422 : SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
423 : const SDLoc &DL, bool foldBooleans);
424 : SDValue rebuildSetCC(SDValue N);
425 :
426 : bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
427 : SDValue &CC) const;
428 : bool isOneUseSetCC(SDValue N) const;
429 :
430 : SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
431 : unsigned HiOp);
432 : SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
433 : SDValue CombineExtLoad(SDNode *N);
434 : SDValue CombineZExtLogicopShiftLoad(SDNode *N);
435 : SDValue combineRepeatedFPDivisors(SDNode *N);
436 : SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
437 : SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
438 : SDValue BuildSDIV(SDNode *N);
439 : SDValue BuildSDIVPow2(SDNode *N);
440 : SDValue BuildUDIV(SDNode *N);
441 : SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
442 : SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
443 : SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
444 : SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
445 : SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
446 : SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
447 : SDNodeFlags Flags, bool Reciprocal);
448 : SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
449 : SDNodeFlags Flags, bool Reciprocal);
450 : SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
451 : bool DemandHighBits = true);
452 : SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
453 : SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
454 : SDValue InnerPos, SDValue InnerNeg,
455 : unsigned PosOpcode, unsigned NegOpcode,
456 : const SDLoc &DL);
457 : SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
458 : SDValue MatchLoadCombine(SDNode *N);
459 : SDValue ReduceLoadWidth(SDNode *N);
460 : SDValue ReduceLoadOpStoreWidth(SDNode *N);
461 : SDValue splitMergedValStore(StoreSDNode *ST);
462 : SDValue TransformFPLoadStorePair(SDNode *N);
463 : SDValue convertBuildVecZextToZext(SDNode *N);
464 : SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
465 : SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
466 : SDValue reduceBuildVecToShuffle(SDNode *N);
467 : SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
468 : ArrayRef<int> VectorMask, SDValue VecIn1,
469 : SDValue VecIn2, unsigned LeftIdx);
470 : SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
471 :
472 : /// Walk up chain skipping non-aliasing memory nodes,
473 : /// looking for aliasing nodes and adding them to the Aliases vector.
474 : void GatherAllAliases(SDNode *N, SDValue OriginalChain,
475 : SmallVectorImpl<SDValue> &Aliases);
476 :
477 : /// Return true if there is any possibility that the two addresses overlap.
478 : bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
479 :
480 : /// Walk up chain skipping non-aliasing memory nodes, looking for a better
481 : /// chain (aliasing node.)
482 : SDValue FindBetterChain(SDNode *N, SDValue Chain);
483 :
484 : /// Try to replace a store and any possibly adjacent stores on
485 : /// consecutive chains with better chains. Return true only if St is
486 : /// replaced.
487 : ///
488 : /// Notice that other chains may still be replaced even if the function
489 : /// returns false.
490 : bool findBetterNeighborChains(StoreSDNode *St);
491 :
492 : /// Holds a pointer to an LSBaseSDNode as well as information on where it
493 : /// is located in a sequence of memory operations connected by a chain.
494 : struct MemOpLink {
495 : // Ptr to the mem node.
496 : LSBaseSDNode *MemNode;
497 :
498 : // Offset from the base ptr.
499 : int64_t OffsetFromBase;
500 :
501 : MemOpLink(LSBaseSDNode *N, int64_t Offset)
502 1004148 : : MemNode(N), OffsetFromBase(Offset) {}
503 : };
504 :
505 : /// This is a helper function for visitMUL to check the profitability
506 : /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
507 : /// MulNode is the original multiply, AddNode is (add x, c1),
508 : /// and ConstNode is c2.
509 : bool isMulAddWithConstProfitable(SDNode *MulNode,
510 : SDValue &AddNode,
511 : SDValue &ConstNode);
512 :
513 : /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
514 : /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
515 : /// the type of the loaded value to be extended.
516 : bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
517 : EVT LoadResultTy, EVT &ExtVT);
518 :
519 : /// Helper function to calculate whether the given Load/Store can have its
520 : /// width reduced to ExtVT.
521 : bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
522 : EVT &MemVT, unsigned ShAmt = 0);
523 :
524 : /// Used by BackwardsPropagateMask to find suitable loads.
525 : bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
526 : SmallPtrSetImpl<SDNode*> &NodesWithConsts,
527 : ConstantSDNode *Mask, SDNode *&NodeToMask);
528 : /// Attempt to propagate a given AND node back to load leaves so that they
529 : /// can be combined into narrow loads.
530 : bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
531 :
532 : /// Helper function for MergeConsecutiveStores which merges the
533 : /// component store chains.
534 : SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
535 : unsigned NumStores);
536 :
537 : /// This is a helper function for MergeConsecutiveStores. When the
538 : /// source elements of the consecutive stores are all constants or
539 : /// all extracted vector elements, try to merge them into one
540 : /// larger store introducing bitcasts if necessary. \return True
541 : /// if a merged store was created.
542 : bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
543 : EVT MemVT, unsigned NumStores,
544 : bool IsConstantSrc, bool UseVector,
545 : bool UseTrunc);
546 :
547 : /// This is a helper function for MergeConsecutiveStores. Stores
548 : /// that potentially may be merged with St are placed in
549 : /// StoreNodes. RootNode is a chain predecessor to all store
550 : /// candidates.
551 : void getStoreMergeCandidates(StoreSDNode *St,
552 : SmallVectorImpl<MemOpLink> &StoreNodes,
553 : SDNode *&Root);
554 :
555 : /// Helper function for MergeConsecutiveStores. Checks if
556 : /// candidate stores have indirect dependency through their
557 : /// operands. RootNode is the predecessor to all stores calculated
558 : /// by getStoreMergeCandidates and is used to prune the dependency check.
559 : /// \return True if safe to merge.
560 : bool checkMergeStoreCandidatesForDependencies(
561 : SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
562 : SDNode *RootNode);
563 :
564 : /// Merge consecutive store operations into a wide store.
565 : /// This optimization uses wide integers or vectors when possible.
566 : /// \return number of stores that were merged into a merged store (the
567 : /// affected nodes are stored as a prefix in \p StoreNodes).
568 : bool MergeConsecutiveStores(StoreSDNode *St);
569 :
570 : /// Try to transform a truncation where C is a constant:
571 : /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
572 : ///
573 : /// \p N needs to be a truncation and its first operand an AND. Other
574 : /// requirements are checked by the function (e.g. that trunc is
575 : /// single-use) and if missed an empty SDValue is returned.
576 : SDValue distributeTruncateThroughAnd(SDNode *N);
577 :
578 : /// Helper function to determine whether the target supports operation
579 : /// given by \p Opcode for type \p VT, that is, whether the operation
580 : /// is legal or custom before legalizing operations, and whether is
581 : /// legal (but not custom) after legalization.
582 0 : bool hasOperation(unsigned Opcode, EVT VT) {
583 0 : if (LegalOperations)
584 0 : return TLI.isOperationLegal(Opcode, VT);
585 0 : return TLI.isOperationLegalOrCustom(Opcode, VT);
586 : }
587 :
588 : public:
589 : /// Runs the dag combiner on all nodes in the work list
590 : void Run(CombineLevel AtLevel);
591 :
592 0 : SelectionDAG &getDAG() const { return DAG; }
593 :
594 : /// Returns a type large enough to hold any valid shift amount - before type
595 : /// legalization these can be huge.
596 8588 : EVT getShiftAmountTy(EVT LHSTy) {
597 : assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
598 8588 : return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
599 : }
600 :
601 : /// This method returns true if we are running before type legalization or
602 : /// if the specified VT is legal.
603 0 : bool isTypeLegal(const EVT &VT) {
604 0 : if (!LegalTypes) return true;
605 15639 : return TLI.isTypeLegal(VT);
606 : }
607 :
608 : /// Convenience wrapper around TargetLowering::getSetCCResultType
609 0 : EVT getSetCCResultType(EVT VT) const {
610 0 : return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
611 : }
612 :
613 : void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
614 : SDValue OrigLoad, SDValue ExtLoad,
615 : ISD::NodeType ExtType);
616 : };
617 :
618 : /// This class is a DAGUpdateListener that removes any deleted
619 : /// nodes from the worklist.
620 4652 : class WorklistRemover : public SelectionDAG::DAGUpdateListener {
621 : DAGCombiner &DC;
622 :
623 : public:
624 : explicit WorklistRemover(DAGCombiner &dc)
625 89098941 : : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
626 :
627 24661 : void NodeDeleted(SDNode *N, SDNode *E) override {
628 24661 : DC.removeFromWorklist(N);
629 24661 : }
630 : };
631 :
632 : } // end anonymous namespace
633 :
634 : //===----------------------------------------------------------------------===//
635 : // TargetLowering::DAGCombinerInfo implementation
636 : //===----------------------------------------------------------------------===//
637 :
638 12454 : void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
639 12454 : ((DAGCombiner*)DC)->AddToWorklist(N);
640 12454 : }
641 :
642 864 : SDValue TargetLowering::DAGCombinerInfo::
643 : CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
644 864 : return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
645 : }
646 :
647 1079 : SDValue TargetLowering::DAGCombinerInfo::
648 : CombineTo(SDNode *N, SDValue Res, bool AddTo) {
649 1079 : return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
650 : }
651 :
652 5893 : SDValue TargetLowering::DAGCombinerInfo::
653 : CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
654 5893 : return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
655 : }
656 :
657 2445 : void TargetLowering::DAGCombinerInfo::
658 : CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
659 2445 : return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
660 : }
661 :
662 : //===----------------------------------------------------------------------===//
663 : // Helper Functions
664 : //===----------------------------------------------------------------------===//
665 :
666 1452273 : void DAGCombiner::deleteAndRecombine(SDNode *N) {
667 1452273 : removeFromWorklist(N);
668 :
669 : // If the operands of this node are only used by the node, they will now be
670 : // dead. Make sure to re-visit them and recursively delete dead nodes.
671 5512790 : for (const SDValue &Op : N->ops())
672 : // For an operand generating multiple values, one of the values may
673 : // become dead allowing further simplification (e.g. split index
674 : // arithmetic from an indexed load).
675 7900142 : if (Op->hasOneUse() || Op->getNumValues() > 1)
676 1096096 : AddToWorklist(Op.getNode());
677 :
678 1452273 : DAG.DeleteNode(N);
679 1452273 : }
680 :
681 : /// Return 1 if we can compute the negated form of the specified expression for
682 : /// the same cost as the expression itself, or 2 if we can compute the negated
683 : /// form more cheaply than the expression itself.
684 214881 : static char isNegatibleForFree(SDValue Op, bool LegalOperations,
685 : const TargetLowering &TLI,
686 : const TargetOptions *Options,
687 : unsigned Depth = 0) {
688 : // fneg is removable even if it has multiple uses.
689 214881 : if (Op.getOpcode() == ISD::FNEG) return 2;
690 :
691 : // Don't allow anything with multiple uses unless we know it is free.
692 213930 : EVT VT = Op.getValueType();
693 213930 : const SDNodeFlags Flags = Op->getFlags();
694 213930 : if (!Op.hasOneUse())
695 59743 : if (!(Op.getOpcode() == ISD::FP_EXTEND &&
696 957 : TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
697 59418 : return 0;
698 :
699 : // Don't recurse exponentially.
700 154512 : if (Depth > 6) return 0;
701 :
702 151467 : switch (Op.getOpcode()) {
703 : default: return false;
704 4578 : case ISD::ConstantFP: {
705 4578 : if (!LegalOperations)
706 : return 1;
707 :
708 : // Don't invert constant FP values after legalization unless the target says
709 : // the negated constant is legal.
710 209 : return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
711 1800 : TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
712 : }
713 8935 : case ISD::FADD:
714 8935 : if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
715 : return 0;
716 :
717 : // After operation legalization, it might not be legal to create new FSUBs.
718 1545 : if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
719 0 : return 0;
720 :
721 : // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
722 3090 : if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
723 : Options, Depth + 1))
724 1523 : return V;
725 : // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
726 1523 : return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
727 1523 : Depth + 1);
728 2571 : case ISD::FSUB:
729 : // We can't turn -(A-B) into B-A when we honor signed zeros.
730 2571 : if (!Options->NoSignedZerosFPMath &&
731 : !Flags.hasNoSignedZeros())
732 1706 : return 0;
733 :
734 : // fold (fneg (fsub A, B)) -> (fsub B, A)
735 : return 1;
736 :
737 36030 : case ISD::FMUL:
738 : case ISD::FDIV:
739 : // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
740 72060 : if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
741 : Options, Depth + 1))
742 34963 : return V;
743 :
744 34963 : return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
745 34963 : Depth + 1);
746 :
747 2957 : case ISD::FP_EXTEND:
748 : case ISD::FP_ROUND:
749 : case ISD::FSIN:
750 5914 : return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
751 2957 : Depth + 1);
752 : }
753 : }
754 :
755 : /// If isNegatibleForFree returns true, return the newly negated expression.
756 1032 : static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
757 : bool LegalOperations, unsigned Depth = 0) {
758 1032 : const TargetOptions &Options = DAG.getTarget().Options;
759 : // fneg is removable even if it has multiple uses.
760 1032 : if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
761 :
762 : assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
763 :
764 537 : const SDNodeFlags Flags = Op.getNode()->getFlags();
765 :
766 537 : switch (Op.getOpcode()) {
767 0 : default: llvm_unreachable("Unknown code");
768 : case ISD::ConstantFP: {
769 211 : APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
770 211 : V.changeSign();
771 422 : return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
772 : }
773 19 : case ISD::FADD:
774 : assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
775 :
776 : // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
777 38 : if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
778 : DAG.getTargetLoweringInfo(), &Options, Depth+1))
779 19 : return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
780 : GetNegatedExpression(Op.getOperand(0), DAG,
781 : LegalOperations, Depth+1),
782 38 : Op.getOperand(1), Flags);
783 : // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
784 0 : return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
785 : GetNegatedExpression(Op.getOperand(1), DAG,
786 : LegalOperations, Depth+1),
787 0 : Op.getOperand(0), Flags);
788 21 : case ISD::FSUB:
789 : // fold (fneg (fsub 0, B)) -> B
790 : if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
791 10 : if (N0CFP->isZero())
792 5 : return Op.getOperand(1);
793 :
794 : // fold (fneg (fsub A, B)) -> (fsub B, A)
795 16 : return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
796 16 : Op.getOperand(1), Op.getOperand(0), Flags);
797 :
798 242 : case ISD::FMUL:
799 : case ISD::FDIV:
800 : // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
801 484 : if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
802 : DAG.getTargetLoweringInfo(), &Options, Depth+1))
803 115 : return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
804 : GetNegatedExpression(Op.getOperand(0), DAG,
805 : LegalOperations, Depth+1),
806 230 : Op.getOperand(1), Flags);
807 :
808 : // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
809 127 : return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
810 : Op.getOperand(0),
811 : GetNegatedExpression(Op.getOperand(1), DAG,
812 254 : LegalOperations, Depth+1), Flags);
813 :
814 : case ISD::FP_EXTEND:
815 : case ISD::FSIN:
816 20 : return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
817 : GetNegatedExpression(Op.getOperand(0), DAG,
818 80 : LegalOperations, Depth+1));
819 : case ISD::FP_ROUND:
820 24 : return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
821 : GetNegatedExpression(Op.getOperand(0), DAG,
822 : LegalOperations, Depth+1),
823 96 : Op.getOperand(1));
824 : }
825 : }
826 :
827 : // APInts must be the same size for most operations, this helper
828 : // function zero extends the shorter of the pair so that they match.
829 : // We provide an Offset so that we can create bitwidths that won't overflow.
830 9664 : static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
831 9664 : unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
832 9664 : LHS = LHS.zextOrSelf(Bits);
833 9664 : RHS = RHS.zextOrSelf(Bits);
834 9664 : }
835 :
836 : // Return true if this node is a setcc, or is a select_cc
837 : // that selects between the target values used for true and false, making it
838 : // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
839 : // the appropriate nodes based on the type of node we are checking. This
840 : // simplifies life a bit for the callers.
841 0 : bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
842 : SDValue &CC) const {
843 0 : if (N.getOpcode() == ISD::SETCC) {
844 0 : LHS = N.getOperand(0);
845 0 : RHS = N.getOperand(1);
846 0 : CC = N.getOperand(2);
847 0 : return true;
848 : }
849 :
850 0 : if (N.getOpcode() != ISD::SELECT_CC ||
851 0 : !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
852 0 : !TLI.isConstFalseVal(N.getOperand(3).getNode()))
853 0 : return false;
854 :
855 0 : if (TLI.getBooleanContents(N.getValueType()) ==
856 : TargetLowering::UndefinedBooleanContent)
857 0 : return false;
858 :
859 0 : LHS = N.getOperand(0);
860 0 : RHS = N.getOperand(1);
861 0 : CC = N.getOperand(4);
862 0 : return true;
863 : }
864 :
865 : /// Return true if this is a SetCC-equivalent operation with only one use.
866 : /// If this is true, it allows the users to invert the operation for free when
867 : /// it is profitable to do so.
868 0 : bool DAGCombiner::isOneUseSetCC(SDValue N) const {
869 0 : SDValue N0, N1, N2;
870 0 : if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
871 0 : return true;
872 : return false;
873 : }
874 :
875 : // Returns the SDNode if it is a constant float BuildVector
876 : // or constant float.
877 0 : static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
878 : if (isa<ConstantFPSDNode>(N))
879 : return N.getNode();
880 149586 : if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
881 0 : return N.getNode();
882 : return nullptr;
883 : }
884 :
885 : // Determines if it is a constant integer or a build vector of constant
886 : // integers (and undefs).
887 : // Do not permit build vector implicit truncation.
888 3407251 : static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
889 : if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
890 2892631 : return !(Const->isOpaque() && NoOpaques);
891 514620 : if (N.getOpcode() != ISD::BUILD_VECTOR)
892 : return false;
893 104759 : unsigned BitWidth = N.getScalarValueSizeInBits();
894 357702 : for (const SDValue &Op : N->op_values()) {
895 507308 : if (Op.isUndef())
896 : continue;
897 : ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
898 252069 : if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
899 251965 : (Const->isOpaque() && NoOpaques))
900 : return false;
901 : }
902 : return true;
903 : }
904 :
905 : // Determines if it is a constant null integer or a splatted vector of a
906 : // constant null integer (with no undefs).
907 : // Build vector implicit truncation is not an issue for null values.
908 831862 : static bool isNullConstantOrNullSplatConstant(SDValue N) {
909 : // TODO: may want to use peekThroughBitcast() here.
910 831862 : if (ConstantSDNode *Splat = isConstOrConstSplat(N))
911 27452 : return Splat->isNullValue();
912 : return false;
913 : }
914 :
915 : // Determines if it is a constant integer of one or a splatted vector of a
916 : // constant integer of one (with no undefs).
917 : // Do not permit build vector implicit truncation.
918 1093 : static bool isOneConstantOrOneSplatConstant(SDValue N) {
919 : // TODO: may want to use peekThroughBitcast() here.
920 1093 : unsigned BitWidth = N.getScalarValueSizeInBits();
921 1093 : if (ConstantSDNode *Splat = isConstOrConstSplat(N))
922 2150 : return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
923 : return false;
924 : }
925 :
926 : // Determines if it is a constant integer of all ones or a splatted vector of a
927 : // constant integer of all ones (with no undefs).
928 : // Do not permit build vector implicit truncation.
929 618075 : static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
930 618075 : N = peekThroughBitcasts(N);
931 618075 : unsigned BitWidth = N.getScalarValueSizeInBits();
932 618075 : if (ConstantSDNode *Splat = isConstOrConstSplat(N))
933 155004 : return Splat->isAllOnesValue() &&
934 : Splat->getAPIntValue().getBitWidth() == BitWidth;
935 : return false;
936 : }
937 :
938 : // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
939 : // undef's.
940 25434 : static bool isAnyConstantBuildVector(const SDNode *N) {
941 48272 : return ISD::isBuildVectorOfConstantSDNodes(N) ||
942 22838 : ISD::isBuildVectorOfConstantFPSDNodes(N);
943 : }
944 :
945 4018534 : SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
946 : SDValue N1, SDNodeFlags Flags) {
947 : // Don't reassociate reductions.
948 4018534 : if (Flags.hasVectorReduction())
949 468 : return SDValue();
950 :
951 4018066 : EVT VT = N0.getValueType();
952 4018066 : if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
953 1023756 : if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
954 226574 : if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
955 : // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
956 223392 : if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
957 446726 : return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
958 29 : return SDValue();
959 : }
960 3182 : if (N0.hasOneUse()) {
961 : // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
962 : // use
963 3301 : SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
964 2047 : if (!OpNode.getNode())
965 0 : return SDValue();
966 2047 : AddToWorklist(OpNode.getNode());
967 4094 : return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
968 : }
969 : }
970 : }
971 :
972 3792627 : if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
973 14972 : if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
974 1645 : if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
975 : // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
976 0 : if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
977 0 : return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
978 0 : return SDValue();
979 : }
980 1645 : if (N1.hasOneUse()) {
981 : // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
982 : // use
983 1197 : SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
984 1019 : if (!OpNode.getNode())
985 0 : return SDValue();
986 1019 : AddToWorklist(OpNode.getNode());
987 2038 : return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
988 : }
989 : }
990 : }
991 :
992 3791608 : return SDValue();
993 : }
994 :
995 731867 : SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
996 : bool AddTo) {
997 : assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
998 : ++NodesCombined;
999 : LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1000 : To[0].getNode()->dump(&DAG);
1001 : dbgs() << " and " << NumTo - 1 << " other values\n");
1002 : for (unsigned i = 0, e = NumTo; i != e; ++i)
1003 : assert((!To[i].getNode() ||
1004 : N->getValueType(i) == To[i].getValueType()) &&
1005 : "Cannot combine value to value of different type!");
1006 :
1007 : WorklistRemover DeadNodes(*this);
1008 731867 : DAG.ReplaceAllUsesWith(N, To);
1009 731867 : if (AddTo) {
1010 : // Push the new nodes and any users onto the worklist
1011 859501 : for (unsigned i = 0, e = NumTo; i != e; ++i) {
1012 547753 : if (To[i].getNode()) {
1013 547753 : AddToWorklist(To[i].getNode());
1014 547753 : AddUsersToWorklist(To[i].getNode());
1015 : }
1016 : }
1017 : }
1018 :
1019 : // Finally, if the node is now dead, remove it from the graph. The node
1020 : // may not be dead if the replacement process recursively simplified to
1021 : // something else needing this node.
1022 731867 : if (N->use_empty())
1023 731825 : deleteAndRecombine(N);
1024 1463734 : return SDValue(N, 0);
1025 : }
1026 :
1027 160144 : void DAGCombiner::
1028 : CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1029 : // Replace all uses. If any nodes become isomorphic to other nodes and
1030 : // are deleted, make sure to remove them from our worklist.
1031 : WorklistRemover DeadNodes(*this);
1032 160144 : DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1033 :
1034 : // Push the new node and any (possibly new) users onto the worklist.
1035 160144 : AddToWorklist(TLO.New.getNode());
1036 160144 : AddUsersToWorklist(TLO.New.getNode());
1037 :
1038 : // Finally, if the node is now dead, remove it from the graph. The node
1039 : // may not be dead if the replacement process recursively simplified to
1040 : // something else needing this node.
1041 160144 : if (TLO.Old.getNode()->use_empty())
1042 159243 : deleteAndRecombine(TLO.Old.getNode());
1043 160144 : }
1044 :
1045 : /// Check the specified integer node value to see if it can be simplified or if
1046 : /// things it uses can be simplified by bit propagation. If so, return true.
1047 4887719 : bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1048 4887719 : TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1049 4887719 : KnownBits Known;
1050 4887719 : if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1051 : return false;
1052 :
1053 : // Revisit the node.
1054 155126 : AddToWorklist(Op.getNode());
1055 :
1056 : // Replace the old value with the new one.
1057 : ++NodesCombined;
1058 : LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1059 : dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1060 : dbgs() << '\n');
1061 :
1062 155126 : CommitTargetLoweringOpt(TLO);
1063 155126 : return true;
1064 : }
1065 :
1066 : /// Check the specified vector node value to see if it can be simplified or
1067 : /// if things it uses can be simplified as it only uses some of the elements.
1068 : /// If so, return true.
1069 407069 : bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1070 : bool AssumeSingleUse) {
1071 407069 : TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1072 : APInt KnownUndef, KnownZero;
1073 407069 : if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1074 : 0, AssumeSingleUse))
1075 : return false;
1076 :
1077 : // Revisit the node.
1078 2573 : AddToWorklist(Op.getNode());
1079 :
1080 : // Replace the old value with the new one.
1081 : ++NodesCombined;
1082 : LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1083 : dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1084 : dbgs() << '\n');
1085 :
1086 2573 : CommitTargetLoweringOpt(TLO);
1087 2573 : return true;
1088 : }
1089 :
1090 309 : void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1091 : SDLoc DL(Load);
1092 309 : EVT VT = Load->getValueType(0);
1093 618 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1094 :
1095 : LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1096 : Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1097 : WorklistRemover DeadNodes(*this);
1098 618 : DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1099 618 : DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1100 309 : deleteAndRecombine(Load);
1101 309 : AddToWorklist(Trunc.getNode());
1102 309 : }
1103 :
1104 8689 : SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1105 8689 : Replace = false;
1106 : SDLoc DL(Op);
1107 : if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1108 : LoadSDNode *LD = cast<LoadSDNode>(Op);
1109 399 : EVT MemVT = LD->getMemoryVT();
1110 : ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1111 : : LD->getExtensionType();
1112 399 : Replace = true;
1113 399 : return DAG.getExtLoad(ExtType, DL, PVT,
1114 : LD->getChain(), LD->getBasePtr(),
1115 798 : MemVT, LD->getMemOperand());
1116 : }
1117 :
1118 : unsigned Opc = Op.getOpcode();
1119 8290 : switch (Opc) {
1120 : default: break;
1121 0 : case ISD::AssertSext:
1122 0 : if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1123 0 : return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1124 0 : break;
1125 36 : case ISD::AssertZext:
1126 36 : if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1127 72 : return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1128 0 : break;
1129 1703 : case ISD::Constant: {
1130 : unsigned ExtOpc =
1131 3406 : Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1132 3406 : return DAG.getNode(ExtOpc, DL, PVT, Op);
1133 : }
1134 : }
1135 :
1136 6551 : if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1137 0 : return SDValue();
1138 13102 : return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1139 : }
1140 :
1141 0 : SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1142 0 : if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1143 0 : return SDValue();
1144 0 : EVT OldVT = Op.getValueType();
1145 : SDLoc DL(Op);
1146 0 : bool Replace = false;
1147 0 : SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1148 0 : if (!NewOp.getNode())
1149 0 : return SDValue();
1150 0 : AddToWorklist(NewOp.getNode());
1151 :
1152 0 : if (Replace)
1153 0 : ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1154 0 : return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1155 0 : DAG.getValueType(OldVT));
1156 : }
1157 :
1158 532 : SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1159 532 : EVT OldVT = Op.getValueType();
1160 : SDLoc DL(Op);
1161 532 : bool Replace = false;
1162 532 : SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1163 532 : if (!NewOp.getNode())
1164 0 : return SDValue();
1165 532 : AddToWorklist(NewOp.getNode());
1166 :
1167 532 : if (Replace)
1168 78 : ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1169 532 : return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1170 : }
1171 :
1172 : /// Promote the specified integer binary operation if the target indicates it is
1173 : /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1174 : /// i32 since i16 instructions are longer.
1175 3975682 : SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1176 3975682 : if (!LegalOperations)
1177 2255948 : return SDValue();
1178 :
1179 1719734 : EVT VT = Op.getValueType();
1180 1719734 : if (VT.isVector() || !VT.isInteger())
1181 182037 : return SDValue();
1182 :
1183 : // If operation type is 'undesirable', e.g. i16 on x86, consider
1184 : // promoting it.
1185 : unsigned Opc = Op.getOpcode();
1186 1537697 : if (TLI.isTypeDesirableForOp(Opc, VT))
1187 1533219 : return SDValue();
1188 :
1189 4478 : EVT PVT = VT;
1190 : // Consult target whether it is a good idea to promote this operation and
1191 : // what's the right type to promote it to.
1192 4478 : if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1193 : assert(PVT != VT && "Don't know what type to promote to!");
1194 :
1195 : LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1196 :
1197 3519 : bool Replace0 = false;
1198 3519 : SDValue N0 = Op.getOperand(0);
1199 3519 : SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1200 :
1201 3519 : bool Replace1 = false;
1202 3519 : SDValue N1 = Op.getOperand(1);
1203 3519 : SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1204 : SDLoc DL(Op);
1205 :
1206 : SDValue RV =
1207 7038 : DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1208 :
1209 : // We are always replacing N0/N1's use in N and only need
1210 : // additional replacements if there are additional uses.
1211 3519 : Replace0 &= !N0->hasOneUse();
1212 3519 : Replace1 &= (N0 != N1) && !N1->hasOneUse();
1213 :
1214 : // Combine Op here so it is preserved past replacements.
1215 3519 : CombineTo(Op.getNode(), RV);
1216 :
1217 : // If operands have a use ordering, make sure we deal with
1218 : // predecessor first.
1219 3521 : if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1220 : std::swap(N0, N1);
1221 : std::swap(NN0, NN1);
1222 : }
1223 :
1224 3519 : if (Replace0) {
1225 89 : AddToWorklist(NN0.getNode());
1226 89 : ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1227 : }
1228 3519 : if (Replace1) {
1229 88 : AddToWorklist(NN1.getNode());
1230 88 : ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1231 : }
1232 3519 : return Op;
1233 : }
1234 959 : return SDValue();
1235 : }
1236 :
1237 : /// Promote the specified integer shift operation if the target indicates it is
1238 : /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1239 : /// i32 since i16 instructions are longer.
1240 259809 : SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1241 259809 : if (!LegalOperations)
1242 111529 : return SDValue();
1243 :
1244 148280 : EVT VT = Op.getValueType();
1245 148280 : if (VT.isVector() || !VT.isInteger())
1246 2580 : return SDValue();
1247 :
1248 : // If operation type is 'undesirable', e.g. i16 on x86, consider
1249 : // promoting it.
1250 : unsigned Opc = Op.getOpcode();
1251 145700 : if (TLI.isTypeDesirableForOp(Opc, VT))
1252 140181 : return SDValue();
1253 :
1254 5519 : EVT PVT = VT;
1255 : // Consult target whether it is a good idea to promote this operation and
1256 : // what's the right type to promote it to.
1257 5519 : if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1258 : assert(PVT != VT && "Don't know what type to promote to!");
1259 :
1260 : LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1261 :
1262 1615 : bool Replace = false;
1263 1615 : SDValue N0 = Op.getOperand(0);
1264 1615 : SDValue N1 = Op.getOperand(1);
1265 1615 : if (Opc == ISD::SRA)
1266 0 : N0 = SExtPromoteOperand(N0, PVT);
1267 1615 : else if (Opc == ISD::SRL)
1268 496 : N0 = ZExtPromoteOperand(N0, PVT);
1269 : else
1270 1119 : N0 = PromoteOperand(N0, PVT, Replace);
1271 :
1272 1615 : if (!N0.getNode())
1273 0 : return SDValue();
1274 :
1275 : SDLoc DL(Op);
1276 : SDValue RV =
1277 3230 : DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1278 :
1279 1615 : AddToWorklist(N0.getNode());
1280 1615 : if (Replace)
1281 54 : ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1282 :
1283 : // Deal with Op being deleted.
1284 1615 : if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1285 1615 : return RV;
1286 : }
1287 3904 : return SDValue();
1288 : }
1289 :
1290 189609 : SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1291 189609 : if (!LegalOperations)
1292 117783 : return SDValue();
1293 :
1294 71826 : EVT VT = Op.getValueType();
1295 71826 : if (VT.isVector() || !VT.isInteger())
1296 1469 : return SDValue();
1297 :
1298 : // If operation type is 'undesirable', e.g. i16 on x86, consider
1299 : // promoting it.
1300 : unsigned Opc = Op.getOpcode();
1301 70357 : if (TLI.isTypeDesirableForOp(Opc, VT))
1302 70090 : return SDValue();
1303 :
1304 267 : EVT PVT = VT;
1305 : // Consult target whether it is a good idea to promote this operation and
1306 : // what's the right type to promote it to.
1307 267 : if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1308 : assert(PVT != VT && "Don't know what type to promote to!");
1309 : // fold (aext (aext x)) -> (aext x)
1310 : // fold (aext (zext x)) -> (zext x)
1311 : // fold (aext (sext x)) -> (sext x)
1312 : LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1313 506 : return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1314 : }
1315 14 : return SDValue();
1316 : }
1317 :
1318 6143598 : bool DAGCombiner::PromoteLoad(SDValue Op) {
1319 6143598 : if (!LegalOperations)
1320 : return false;
1321 :
1322 : if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1323 : return false;
1324 :
1325 2619814 : EVT VT = Op.getValueType();
1326 2619814 : if (VT.isVector() || !VT.isInteger())
1327 : return false;
1328 :
1329 : // If operation type is 'undesirable', e.g. i16 on x86, consider
1330 : // promoting it.
1331 : unsigned Opc = Op.getOpcode();
1332 2338745 : if (TLI.isTypeDesirableForOp(Opc, VT))
1333 : return false;
1334 :
1335 4662 : EVT PVT = VT;
1336 : // Consult target whether it is a good idea to promote this operation and
1337 : // what's the right type to promote it to.
1338 4662 : if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1339 : assert(PVT != VT && "Don't know what type to promote to!");
1340 :
1341 : SDLoc DL(Op);
1342 : SDNode *N = Op.getNode();
1343 : LoadSDNode *LD = cast<LoadSDNode>(N);
1344 0 : EVT MemVT = LD->getMemoryVT();
1345 : ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1346 : : LD->getExtensionType();
1347 0 : SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1348 : LD->getChain(), LD->getBasePtr(),
1349 0 : MemVT, LD->getMemOperand());
1350 0 : SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1351 :
1352 : LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1353 : Result.getNode()->dump(&DAG); dbgs() << '\n');
1354 : WorklistRemover DeadNodes(*this);
1355 0 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1356 0 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1357 0 : deleteAndRecombine(N);
1358 0 : AddToWorklist(Result.getNode());
1359 : return true;
1360 : }
1361 : return false;
1362 : }
1363 :
1364 : /// Recursively delete a node which has no uses and any operands for
1365 : /// which it is the only use.
1366 : ///
1367 : /// Note that this both deletes the nodes and removes them from the worklist.
1368 : /// It also adds any nodes who have had a user deleted to the worklist as they
1369 : /// may now have only one use and subject to other combines.
1370 92486438 : bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1371 92486438 : if (!N->use_empty())
1372 : return false;
1373 :
1374 : SmallSetVector<SDNode *, 16> Nodes;
1375 3949125 : Nodes.insert(N);
1376 : do {
1377 12725499 : N = Nodes.pop_back_val();
1378 12725499 : if (!N)
1379 : continue;
1380 :
1381 12725499 : if (N->use_empty()) {
1382 15971072 : for (const SDValue &ChildN : N->op_values())
1383 9588653 : Nodes.insert(ChildN.getNode());
1384 :
1385 6382419 : removeFromWorklist(N);
1386 6382419 : DAG.DeleteNode(N);
1387 : } else {
1388 6343080 : AddToWorklist(N);
1389 : }
1390 12725498 : } while (!Nodes.empty());
1391 : return true;
1392 : }
1393 :
1394 : //===----------------------------------------------------------------------===//
1395 : // Main DAG Combiner implementation
1396 : //===----------------------------------------------------------------------===//
1397 :
1398 2767992 : void DAGCombiner::Run(CombineLevel AtLevel) {
1399 : // set the instance variables, so that the various visit routines may use it.
1400 2767992 : Level = AtLevel;
1401 2767992 : LegalOperations = Level >= AfterLegalizeVectorOps;
1402 2767992 : LegalTypes = Level >= AfterLegalizeTypes;
1403 :
1404 : // Add all the dag nodes to the worklist.
1405 85262820 : for (SDNode &Node : DAG.allnodes())
1406 82494828 : AddToWorklist(&Node);
1407 :
1408 : // Create a dummy node (which is not added to allnodes), that adds a reference
1409 : // to the root node, preventing it from being deleted, and tracking any
1410 : // changes of the root.
1411 8303976 : HandleSDNode Dummy(DAG.getRoot());
1412 :
1413 : // While the worklist isn't empty, find a node and try to combine it.
1414 93347489 : while (!WorklistMap.empty()) {
1415 : SDNode *N;
1416 : // The Worklist holds the SDNodes in order, but it may contain null entries.
1417 : do {
1418 93035690 : N = Worklist.pop_back_val();
1419 93035690 : } while (!N);
1420 :
1421 90579497 : bool GoodWorklistEntry = WorklistMap.erase(N);
1422 : (void)GoodWorklistEntry;
1423 : assert(GoodWorklistEntry &&
1424 : "Found a worklist entry without a corresponding map entry!");
1425 :
1426 : // If N has no uses, it is dead. Make sure to revisit all N's operands once
1427 : // N is deleted from the DAG, since they too may now be dead or may have a
1428 : // reduced number of uses, allowing other xforms.
1429 90579498 : if (recursivelyDeleteUnusedNodes(N))
1430 88673233 : continue;
1431 :
1432 : WorklistRemover DeadNodes(*this);
1433 :
1434 : // If this combine is running after legalizing the DAG, re-legalize any
1435 : // nodes pulled off the worklist.
1436 88536543 : if (Level == AfterLegalizeDAG) {
1437 : SmallSetVector<SDNode *, 16> UpdatedNodes;
1438 39041762 : bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1439 :
1440 39066393 : for (SDNode *LN : UpdatedNodes) {
1441 24631 : AddToWorklist(LN);
1442 : AddUsersToWorklist(LN);
1443 : }
1444 39041762 : if (!NIsValid)
1445 : continue;
1446 : }
1447 :
1448 : LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1449 :
1450 : // Add any operands of the new node which have not yet been combined to the
1451 : // worklist as well. Because the worklist uniques things already, this
1452 : // won't repeatedly process the same operand.
1453 88525903 : CombinedNodes.insert(N);
1454 250096475 : for (const SDValue &ChildN : N->op_values())
1455 161570572 : if (!CombinedNodes.count(ChildN.getNode()))
1456 149634654 : AddToWorklist(ChildN.getNode());
1457 :
1458 88525903 : SDValue RV = combine(N);
1459 :
1460 88525902 : if (!RV.getNode())
1461 : continue;
1462 :
1463 : ++NodesCombined;
1464 :
1465 : // If we get back the same node we passed in, rather than a new node or
1466 : // zero, we know that the node must have defined multiple values and
1467 : // CombineTo was used. Since CombineTo takes care of the worklist
1468 : // mechanics for us, we have no work to do in this case.
1469 3305870 : if (RV.getNode() == N)
1470 : continue;
1471 :
1472 : assert(N->getOpcode() != ISD::DELETED_NODE &&
1473 : RV.getOpcode() != ISD::DELETED_NODE &&
1474 : "Node was deleted but visit returned new node!");
1475 :
1476 : LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1477 :
1478 1906264 : if (N->getNumValues() == RV.getNode()->getNumValues())
1479 1718196 : DAG.ReplaceAllUsesWith(N, RV.getNode());
1480 : else {
1481 : assert(N->getValueType(0) == RV.getValueType() &&
1482 : N->getNumValues() == 1 && "Type mismatch");
1483 188068 : DAG.ReplaceAllUsesWith(N, &RV);
1484 : }
1485 :
1486 : // Push the new node and any users onto the worklist
1487 1906264 : AddToWorklist(RV.getNode());
1488 1906264 : AddUsersToWorklist(RV.getNode());
1489 :
1490 : // Finally, if the node is now dead, remove it from the graph. The node
1491 : // may not be dead if the replacement process recursively simplified to
1492 : // something else needing this node. This will also take care of adding any
1493 : // operands which have lost a user to the worklist.
1494 1906264 : recursivelyDeleteUnusedNodes(N);
1495 : }
1496 :
1497 : // If the root changed (e.g. it was a dead load, update the root).
1498 2767992 : DAG.setRoot(Dummy.getValue());
1499 2767992 : DAG.RemoveDeadNodes();
1500 2767992 : }
1501 :
1502 88526629 : SDValue DAGCombiner::visit(SDNode *N) {
1503 177053258 : switch (N->getOpcode()) {
1504 : default: break;
1505 4725134 : case ISD::TokenFactor: return visitTokenFactor(N);
1506 436984 : case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1507 3332748 : case ISD::ADD: return visitADD(N);
1508 532128 : case ISD::SUB: return visitSUB(N);
1509 522 : case ISD::ADDC: return visitADDC(N);
1510 130962 : case ISD::UADDO: return visitUADDO(N);
1511 89 : case ISD::SUBC: return visitSUBC(N);
1512 1913 : case ISD::USUBO: return visitUSUBO(N);
1513 2544 : case ISD::ADDE: return visitADDE(N);
1514 156807 : case ISD::ADDCARRY: return visitADDCARRY(N);
1515 79 : case ISD::SUBE: return visitSUBE(N);
1516 839 : case ISD::SUBCARRY: return visitSUBCARRY(N);
1517 39729 : case ISD::MUL: return visitMUL(N);
1518 5883 : case ISD::SDIV: return visitSDIV(N);
1519 4531 : case ISD::UDIV: return visitUDIV(N);
1520 4801 : case ISD::SREM:
1521 4801 : case ISD::UREM: return visitREM(N);
1522 6357 : case ISD::MULHU: return visitMULHU(N);
1523 1513 : case ISD::MULHS: return visitMULHS(N);
1524 723 : case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1525 5613 : case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1526 99 : case ISD::SMULO: return visitSMULO(N);
1527 228 : case ISD::UMULO: return visitUMULO(N);
1528 25437 : case ISD::SMIN:
1529 : case ISD::SMAX:
1530 : case ISD::UMIN:
1531 25437 : case ISD::UMAX: return visitIMINMAX(N);
1532 400002 : case ISD::AND: return visitAND(N);
1533 146434 : case ISD::OR: return visitOR(N);
1534 87480 : case ISD::XOR: return visitXOR(N);
1535 123493 : case ISD::SHL: return visitSHL(N);
1536 30600 : case ISD::SRA: return visitSRA(N);
1537 164810 : case ISD::SRL: return visitSRL(N);
1538 2760 : case ISD::ROTR:
1539 2760 : case ISD::ROTL: return visitRotate(N);
1540 1036 : case ISD::ABS: return visitABS(N);
1541 1476 : case ISD::BSWAP: return visitBSWAP(N);
1542 559 : case ISD::BITREVERSE: return visitBITREVERSE(N);
1543 1221 : case ISD::CTLZ: return visitCTLZ(N);
1544 1564 : case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1545 500 : case ISD::CTTZ: return visitCTTZ(N);
1546 652 : case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1547 2141 : case ISD::CTPOP: return visitCTPOP(N);
1548 32115 : case ISD::SELECT: return visitSELECT(N);
1549 39265 : case ISD::VSELECT: return visitVSELECT(N);
1550 16226 : case ISD::SELECT_CC: return visitSELECT_CC(N);
1551 298267 : case ISD::SETCC: return visitSETCC(N);
1552 289 : case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1553 51003 : case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1554 142725 : case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1555 88162 : case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1556 187971 : case ISD::AssertSext:
1557 187971 : case ISD::AssertZext: return visitAssertExt(N);
1558 46974 : case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1559 3049 : case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1560 5864 : case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1561 858614 : case ISD::TRUNCATE: return visitTRUNCATE(N);
1562 821241 : case ISD::BITCAST: return visitBITCAST(N);
1563 : case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1564 43573 : case ISD::FADD: return visitFADD(N);
1565 12783 : case ISD::FSUB: return visitFSUB(N);
1566 27386 : case ISD::FMUL: return visitFMUL(N);
1567 9771 : case ISD::FMA: return visitFMA(N);
1568 10233 : case ISD::FDIV: return visitFDIV(N);
1569 283 : case ISD::FREM: return visitFREM(N);
1570 2134 : case ISD::FSQRT: return visitFSQRT(N);
1571 1440 : case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1572 250 : case ISD::FPOW: return visitFPOW(N);
1573 21933 : case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1574 12560 : case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1575 8957 : case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1576 6297 : case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1577 4291 : case ISD::FP_ROUND: return visitFP_ROUND(N);
1578 0 : case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1579 9165 : case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1580 6789 : case ISD::FNEG: return visitFNEG(N);
1581 5148 : case ISD::FABS: return visitFABS(N);
1582 1642 : case ISD::FFLOOR: return visitFFLOOR(N);
1583 3382 : case ISD::FMINNUM: return visitFMINNUM(N);
1584 3298 : case ISD::FMAXNUM: return visitFMAXNUM(N);
1585 4301 : case ISD::FCEIL: return visitFCEIL(N);
1586 1548 : case ISD::FTRUNC: return visitFTRUNC(N);
1587 258065 : case ISD::BRCOND: return visitBRCOND(N);
1588 6356 : case ISD::BR_CC: return visitBR_CC(N);
1589 6474249 : case ISD::LOAD: return visitLOAD(N);
1590 8005537 : case ISD::STORE: return visitSTORE(N);
1591 52048 : case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1592 416312 : case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1593 698113 : case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1594 30650 : case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1595 74221 : case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1596 70837 : case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1597 23174 : case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1598 19322 : case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1599 994 : case ISD::MGATHER: return visitMGATHER(N);
1600 1291 : case ISD::MLOAD: return visitMLOAD(N);
1601 287 : case ISD::MSCATTER: return visitMSCATTER(N);
1602 761 : case ISD::MSTORE: return visitMSTORE(N);
1603 : case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1604 6740 : case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1605 : }
1606 59162749 : return SDValue();
1607 : }
1608 :
1609 88526629 : SDValue DAGCombiner::combine(SDNode *N) {
1610 88526629 : SDValue RV = visit(N);
1611 :
1612 : // If nothing happened, try a target-specific DAG combine.
1613 88526629 : if (!RV.getNode()) {
1614 : assert(N->getOpcode() != ISD::DELETED_NODE &&
1615 : "Node was deleted but visit returned NULL!");
1616 :
1617 171056554 : if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1618 80949613 : TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1619 :
1620 : // Expose the DAG combiner to the target combiner impls.
1621 : TargetLowering::DAGCombinerInfo
1622 24931877 : DagCombineInfo(DAG, Level, false, this);
1623 :
1624 24931877 : RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1625 : }
1626 : }
1627 :
1628 : // If nothing happened still, try promoting the operation.
1629 88526629 : if (!RV.getNode()) {
1630 170452332 : switch (N->getOpcode()) {
1631 : default: break;
1632 : case ISD::ADD:
1633 : case ISD::SUB:
1634 : case ISD::MUL:
1635 : case ISD::AND:
1636 : case ISD::OR:
1637 : case ISD::XOR:
1638 3975682 : RV = PromoteIntBinOp(SDValue(N, 0));
1639 3975682 : break;
1640 : case ISD::SHL:
1641 : case ISD::SRA:
1642 : case ISD::SRL:
1643 259809 : RV = PromoteIntShiftOp(SDValue(N, 0));
1644 259809 : break;
1645 : case ISD::SIGN_EXTEND:
1646 : case ISD::ZERO_EXTEND:
1647 : case ISD::ANY_EXTEND:
1648 189609 : RV = PromoteExtend(SDValue(N, 0));
1649 189609 : break;
1650 : case ISD::LOAD:
1651 6143598 : if (PromoteLoad(SDValue(N, 0)))
1652 0 : RV = SDValue(N, 0);
1653 : break;
1654 : }
1655 : }
1656 :
1657 : // If N is a commutative binary node, try eliminate it if the commuted
1658 : // version is already present in the DAG.
1659 88526629 : if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1660 3695149 : N->getNumValues() == 1) {
1661 3556035 : SDValue N0 = N->getOperand(0);
1662 3556035 : SDValue N1 = N->getOperand(1);
1663 :
1664 : // Constant operands are canonicalized to RHS.
1665 : if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1666 635593 : SDValue Ops[] = {N1, N0};
1667 2542372 : SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1668 : N->getFlags());
1669 635593 : if (CSENode)
1670 20 : return SDValue(CSENode, 0);
1671 : }
1672 : }
1673 :
1674 88526609 : return RV;
1675 : }
1676 :
1677 : /// Given a node, return its input chain if it has one, otherwise return a null
1678 : /// sd operand.
1679 7171608 : static SDValue getInputChainForNode(SDNode *N) {
1680 14343216 : if (unsigned NumOps = N->getNumOperands()) {
1681 7150080 : if (N->getOperand(0).getValueType() == MVT::Other)
1682 7100244 : return N->getOperand(0);
1683 99672 : if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1684 49836 : return N->getOperand(NumOps-1);
1685 0 : for (unsigned i = 1; i < NumOps-1; ++i)
1686 0 : if (N->getOperand(i).getValueType() == MVT::Other)
1687 0 : return N->getOperand(i);
1688 : }
1689 21528 : return SDValue();
1690 : }
1691 :
1692 4725134 : SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1693 : // If N has two operands, where one has an input chain equal to the other,
1694 : // the 'other' chain is redundant.
1695 4725134 : if (N->getNumOperands() == 2) {
1696 3593192 : if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1697 14776 : return N->getOperand(0);
1698 3578416 : if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1699 87635 : return N->getOperand(1);
1700 : }
1701 :
1702 : // Don't simplify token factors if optnone.
1703 4622723 : if (OptLevel == CodeGenOpt::None)
1704 2513835 : return SDValue();
1705 :
1706 : SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1707 : SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1708 : SmallPtrSet<SDNode*, 16> SeenOps;
1709 2108888 : bool Changed = false; // If we should replace this token factor.
1710 :
1711 : // Start out with this token factor.
1712 2108888 : TFs.push_back(N);
1713 :
1714 : // Iterate through token factors. The TFs grows when new token factors are
1715 : // encountered.
1716 4950900 : for (unsigned i = 0; i < TFs.size(); ++i) {
1717 2842012 : SDNode *TF = TFs[i];
1718 :
1719 : // Check each of the operands.
1720 11192010 : for (const SDValue &Op : TF->op_values()) {
1721 16699996 : switch (Op.getOpcode()) {
1722 32291 : case ISD::EntryToken:
1723 : // Entry tokens don't need to be added to the list. They are
1724 : // redundant.
1725 32291 : Changed = true;
1726 32291 : break;
1727 :
1728 1288988 : case ISD::TokenFactor:
1729 1288988 : if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1730 : // Queue up for processing.
1731 733124 : TFs.push_back(Op.getNode());
1732 : // Clean up in case the token factor is removed.
1733 733124 : AddToWorklist(Op.getNode());
1734 733124 : Changed = true;
1735 733124 : break;
1736 : }
1737 : LLVM_FALLTHROUGH;
1738 :
1739 : default:
1740 : // Only add if it isn't already in the list.
1741 7584583 : if (SeenOps.insert(Op.getNode()).second)
1742 7460977 : Ops.push_back(Op);
1743 : else
1744 123606 : Changed = true;
1745 : break;
1746 : }
1747 : }
1748 : }
1749 :
1750 : // Remove Nodes that are chained to another node in the list. Do so
1751 : // by walking up chains breath-first stopping when we've seen
1752 : // another operand. In general we must climb to the EntryNode, but we can exit
1753 : // early if we find all remaining work is associated with just one operand as
1754 : // no further pruning is possible.
1755 :
1756 : // List of nodes to search through and original Ops from which they originate.
1757 : SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1758 : SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1759 : SmallPtrSet<SDNode *, 16> SeenChains;
1760 2108888 : bool DidPruneOps = false;
1761 :
1762 2108888 : unsigned NumLeftToConsider = 0;
1763 9569865 : for (const SDValue &Op : Ops) {
1764 7460977 : Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1765 7460977 : OpWorkCount.push_back(1);
1766 : }
1767 :
1768 : auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1769 : // If this is an Op, we can remove the op from the list. Remark any
1770 : // search associated with it as from the current OpNumber.
1771 : if (SeenOps.count(Op) != 0) {
1772 : Changed = true;
1773 : DidPruneOps = true;
1774 : unsigned OrigOpNumber = 0;
1775 : while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1776 : OrigOpNumber++;
1777 : assert((OrigOpNumber != Ops.size()) &&
1778 : "expected to find TokenFactor Operand");
1779 : // Re-mark worklist from OrigOpNumber to OpNumber
1780 : for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1781 : if (Worklist[i].second == OrigOpNumber) {
1782 : Worklist[i].second = OpNumber;
1783 : }
1784 : }
1785 : OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1786 : OpWorkCount[OrigOpNumber] = 0;
1787 : NumLeftToConsider--;
1788 : }
1789 : // Add if it's a new chain
1790 : if (SeenChains.insert(Op).second) {
1791 : OpWorkCount[OpNumber]++;
1792 : Worklist.push_back(std::make_pair(Op, OpNumber));
1793 : }
1794 2108888 : };
1795 :
1796 14624216 : for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1797 : // We need at least be consider at least 2 Ops to prune.
1798 14153618 : if (NumLeftToConsider <= 1)
1799 : break;
1800 12515328 : auto CurNode = Worklist[i].first;
1801 12515328 : auto CurOpNumber = Worklist[i].second;
1802 : assert((OpWorkCount[CurOpNumber] > 0) &&
1803 : "Node should not appear in worklist");
1804 25030656 : switch (CurNode->getOpcode()) {
1805 489576 : case ISD::EntryToken:
1806 : // Hitting EntryToken is the only way for the search to terminate without
1807 : // hitting
1808 : // another operand's search. Prevent us from marking this operand
1809 : // considered.
1810 489576 : NumLeftToConsider++;
1811 489576 : break;
1812 : case ISD::TokenFactor:
1813 4264940 : for (const SDValue &Op : CurNode->op_values())
1814 3051420 : AddToWorklist(i, Op.getNode(), CurOpNumber);
1815 : break;
1816 978552 : case ISD::CopyFromReg:
1817 : case ISD::CopyToReg:
1818 978552 : AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1819 978552 : break;
1820 : default:
1821 : if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1822 9063493 : AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1823 : break;
1824 : }
1825 12515328 : OpWorkCount[CurOpNumber]--;
1826 12515328 : if (OpWorkCount[CurOpNumber] == 0)
1827 5305132 : NumLeftToConsider--;
1828 : }
1829 :
1830 : // If we've changed things around then replace token factor.
1831 2108888 : if (Changed) {
1832 : SDValue Result;
1833 578983 : if (Ops.empty()) {
1834 : // The entry token is the only possible outcome.
1835 326 : Result = DAG.getEntryNode();
1836 : } else {
1837 578657 : if (DidPruneOps) {
1838 : SmallVector<SDValue, 8> PrunedOps;
1839 : //
1840 1811057 : for (const SDValue &Op : Ops) {
1841 1473956 : if (SeenChains.count(Op.getNode()) == 0)
1842 914705 : PrunedOps.push_back(Op);
1843 : }
1844 1287894 : Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1845 : } else {
1846 798626 : Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1847 : }
1848 : }
1849 578983 : return Result;
1850 : }
1851 1529905 : return SDValue();
1852 : }
1853 :
1854 : /// MERGE_VALUES can always be eliminated.
1855 436984 : SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1856 : WorklistRemover DeadNodes(*this);
1857 : // Replacing results may cause a different MERGE_VALUES to suddenly
1858 : // be CSE'd with N, and carry its uses with it. Iterate until no
1859 : // uses remain, to ensure that the node can be safely deleted.
1860 : // First add the users of this node to the work list so that they
1861 : // can be tried again once they have new operands.
1862 : AddUsersToWorklist(N);
1863 : do {
1864 : // Do as a single replacement to avoid rewalking use lists.
1865 : SmallVector<SDValue, 8> Ops;
1866 1316835 : for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1867 1759702 : Ops.push_back(N->getOperand(i));
1868 436984 : DAG.ReplaceAllUsesWith(N, Ops.data());
1869 436984 : } while (!N->use_empty());
1870 436984 : deleteAndRecombine(N);
1871 873968 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
1872 : }
1873 :
1874 : /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1875 : /// ConstantSDNode pointer else nullptr.
1876 0 : static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1877 : ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1878 112653 : return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1879 : }
1880 :
1881 0 : SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1882 : assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1883 :
1884 : // Don't do this unless the old select is going away. We want to eliminate the
1885 : // binary operator, not replace a binop with a select.
1886 : // TODO: Handle ISD::SELECT_CC.
1887 : unsigned SelOpNo = 0;
1888 0 : SDValue Sel = BO->getOperand(0);
1889 0 : if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1890 : SelOpNo = 1;
1891 0 : Sel = BO->getOperand(1);
1892 : }
1893 :
1894 0 : if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1895 0 : return SDValue();
1896 :
1897 0 : SDValue CT = Sel.getOperand(1);
1898 0 : if (!isConstantOrConstantVector(CT, true) &&
1899 : !isConstantFPBuildVectorOrConstantFP(CT))
1900 0 : return SDValue();
1901 :
1902 0 : SDValue CF = Sel.getOperand(2);
1903 0 : if (!isConstantOrConstantVector(CF, true) &&
1904 : !isConstantFPBuildVectorOrConstantFP(CF))
1905 0 : return SDValue();
1906 :
1907 : // Bail out if any constants are opaque because we can't constant fold those.
1908 : // The exception is "and" and "or" with either 0 or -1 in which case we can
1909 : // propagate non constant operands into select. I.e.:
1910 : // and (select Cond, 0, -1), X --> select Cond, 0, X
1911 : // or X, (select Cond, -1, 0) --> select Cond, -1, X
1912 0 : auto BinOpcode = BO->getOpcode();
1913 0 : bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1914 0 : (isNullConstantOrNullSplatConstant(CT) ||
1915 0 : isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
1916 0 : (isNullConstantOrNullSplatConstant(CF) ||
1917 0 : isAllOnesConstantOrAllOnesSplatConstant(CF));
1918 :
1919 0 : SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1920 0 : if (!CanFoldNonConst &&
1921 0 : !isConstantOrConstantVector(CBO, true) &&
1922 : !isConstantFPBuildVectorOrConstantFP(CBO))
1923 0 : return SDValue();
1924 :
1925 0 : EVT VT = Sel.getValueType();
1926 :
1927 : // In case of shift value and shift amount may have different VT. For instance
1928 : // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1929 : // swapped operands and value types do not match. NB: x86 is fine if operands
1930 : // are not swapped with shift amount VT being not bigger than shifted value.
1931 : // TODO: that is possible to check for a shift operation, correct VTs and
1932 : // still perform optimization on x86 if needed.
1933 0 : if (SelOpNo && VT != CBO.getValueType())
1934 0 : return SDValue();
1935 :
1936 : // We have a select-of-constants followed by a binary operator with a
1937 : // constant. Eliminate the binop by pulling the constant math into the select.
1938 : // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1939 0 : SDLoc DL(Sel);
1940 0 : SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1941 0 : : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1942 0 : if (!CanFoldNonConst && !NewCT.isUndef() &&
1943 0 : !isConstantOrConstantVector(NewCT, true) &&
1944 : !isConstantFPBuildVectorOrConstantFP(NewCT))
1945 0 : return SDValue();
1946 :
1947 0 : SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1948 0 : : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1949 0 : if (!CanFoldNonConst && !NewCF.isUndef() &&
1950 0 : !isConstantOrConstantVector(NewCF, true) &&
1951 : !isConstantFPBuildVectorOrConstantFP(NewCF))
1952 0 : return SDValue();
1953 :
1954 0 : return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1955 : }
1956 :
1957 3626163 : static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1958 : assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1959 : "Expecting add or sub");
1960 :
1961 : // Match a constant operand and a zext operand for the math instruction:
1962 : // add Z, C
1963 : // sub C, Z
1964 3626163 : bool IsAdd = N->getOpcode() == ISD::ADD;
1965 3626163 : SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1966 3626163 : SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1967 : auto *CN = dyn_cast<ConstantSDNode>(C);
1968 2641274 : if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1969 3625337 : return SDValue();
1970 :
1971 : // Match the zext operand as a setcc of a boolean.
1972 1652 : if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1973 143 : Z.getOperand(0).getValueType() != MVT::i1)
1974 702 : return SDValue();
1975 :
1976 : // Match the compare as: setcc (X & 1), 0, eq.
1977 : SDValue SetCC = Z.getOperand(0);
1978 124 : ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1979 37 : if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1980 175 : SetCC.getOperand(0).getOpcode() != ISD::AND ||
1981 23 : !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1982 102 : return SDValue();
1983 :
1984 : // We are adding/subtracting a constant and an inverted low bit. Turn that
1985 : // into a subtract/add of the low bit with incremented/decremented constant:
1986 : // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1987 : // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1988 22 : EVT VT = C.getValueType();
1989 : SDLoc DL(N);
1990 22 : SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1991 61 : SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1992 62 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1993 31 : return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1994 : }
1995 :
1996 : /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1997 : /// a shift and add with a different constant.
1998 3626141 : static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
1999 : assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2000 : "Expecting add or sub");
2001 :
2002 : // We need a constant operand for the add/sub, and the other operand is a
2003 : // logical shift right: add (srl), C or sub C, (srl).
2004 3626141 : bool IsAdd = N->getOpcode() == ISD::ADD;
2005 3626141 : SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2006 3626141 : SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2007 3626141 : ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2008 3626141 : if (!C || ShiftOp.getOpcode() != ISD::SRL)
2009 3624375 : return SDValue();
2010 :
2011 : // The shift must be of a 'not' value.
2012 1766 : SDValue Not = ShiftOp.getOperand(0);
2013 1766 : if (!Not.hasOneUse() || !isBitwiseNot(Not))
2014 1747 : return SDValue();
2015 :
2016 : // The shift must be moving the sign bit to the least-significant-bit.
2017 19 : EVT VT = ShiftOp.getValueType();
2018 19 : SDValue ShAmt = ShiftOp.getOperand(1);
2019 19 : ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2020 38 : if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2021 0 : return SDValue();
2022 :
2023 : // Eliminate the 'not' by adjusting the shift and add/sub constant:
2024 : // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2025 : // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2026 : SDLoc DL(N);
2027 19 : auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2028 19 : SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2029 48 : APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2030 19 : return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2031 : }
2032 :
2033 3332748 : SDValue DAGCombiner::visitADD(SDNode *N) {
2034 3332748 : SDValue N0 = N->getOperand(0);
2035 3332748 : SDValue N1 = N->getOperand(1);
2036 3332748 : EVT VT = N0.getValueType();
2037 : SDLoc DL(N);
2038 :
2039 : // fold vector ops
2040 3332748 : if (VT.isVector()) {
2041 141605 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
2042 17 : return FoldedVOp;
2043 :
2044 : // fold (add x, 0) -> x, vector edition
2045 141588 : if (ISD::isBuildVectorAllZeros(N1.getNode()))
2046 137 : return N0;
2047 141451 : if (ISD::isBuildVectorAllZeros(N0.getNode()))
2048 158 : return N1;
2049 : }
2050 :
2051 : // fold (add x, undef) -> undef
2052 3332436 : if (N0.isUndef())
2053 3 : return N0;
2054 :
2055 3332433 : if (N1.isUndef())
2056 9 : return N1;
2057 :
2058 3332424 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2059 : // canonicalize constant to RHS
2060 2098 : if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2061 2926 : return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2062 : // fold (add c1, c2) -> c1+c2
2063 635 : return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2064 635 : N1.getNode());
2065 : }
2066 :
2067 : // fold (add x, 0) -> x
2068 3330326 : if (isNullConstant(N1))
2069 327 : return N0;
2070 :
2071 3329999 : if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2072 : // fold ((c1-A)+c2) -> (c1+c2)-A
2073 2956591 : if (N0.getOpcode() == ISD::SUB &&
2074 644 : isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2075 : // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2076 31 : return DAG.getNode(ISD::SUB, DL, VT,
2077 : DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2078 31 : N0.getOperand(1));
2079 : }
2080 :
2081 : // add (sext i1 X), 1 -> zext (not i1 X)
2082 : // We don't transform this pattern:
2083 : // add (zext i1 X), -1 -> sext (not i1 X)
2084 : // because most (?) targets generate better code for the zext form.
2085 2957988 : if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2086 1002 : isOneConstantOrOneSplatConstant(N1)) {
2087 41 : SDValue X = N0.getOperand(0);
2088 41 : if ((!LegalOperations ||
2089 3 : (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2090 81 : TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2091 40 : X.getScalarValueSizeInBits() == 1) {
2092 34 : SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2093 34 : return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2094 : }
2095 : }
2096 :
2097 : // Undo the add -> or combine to merge constant offsets from a frame index.
2098 : if (N0.getOpcode() == ISD::OR &&
2099 : isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2100 2958723 : isa<ConstantSDNode>(N0.getOperand(1)) &&
2101 5648 : DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2102 5648 : SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2103 5648 : return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2104 : }
2105 : }
2106 :
2107 3327127 : if (SDValue NewSel = foldBinOpIntoSelect(N))
2108 8 : return NewSel;
2109 :
2110 : // reassociate add
2111 3327119 : if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2112 223488 : return RADD;
2113 :
2114 : // fold ((0-A) + B) -> B-A
2115 3106447 : if (N0.getOpcode() == ISD::SUB &&
2116 2816 : isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2117 54 : return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2118 :
2119 : // fold (A + (0-B)) -> A-B
2120 3109784 : if (N1.getOpcode() == ISD::SUB &&
2121 6180 : isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2122 620 : return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2123 :
2124 : // fold (A+(B-A)) -> B
2125 3103294 : if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2126 4 : return N1.getOperand(0);
2127 :
2128 : // fold ((B-A)+A) -> B
2129 3103290 : if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2130 31 : return N0.getOperand(0);
2131 :
2132 : // fold (A+(B-(A+C))) to (B-C)
2133 3103259 : if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2134 63 : N0 == N1.getOperand(1).getOperand(0))
2135 2 : return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2136 2 : N1.getOperand(1).getOperand(1));
2137 :
2138 : // fold (A+(B-(C+A))) to (B-C)
2139 3103257 : if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2140 61 : N0 == N1.getOperand(1).getOperand(1))
2141 3 : return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2142 3 : N1.getOperand(1).getOperand(0));
2143 :
2144 : // fold (A+((B-A)+or-C)) to (B+or-C)
2145 3103254 : if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2146 3107248 : N1.getOperand(0).getOpcode() == ISD::SUB &&
2147 103 : N0 == N1.getOperand(0).getOperand(1))
2148 5 : return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2149 5 : N1.getOperand(1));
2150 :
2151 : // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2152 3103249 : if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2153 18 : SDValue N00 = N0.getOperand(0);
2154 18 : SDValue N01 = N0.getOperand(1);
2155 18 : SDValue N10 = N1.getOperand(0);
2156 18 : SDValue N11 = N1.getOperand(1);
2157 :
2158 18 : if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2159 5 : return DAG.getNode(ISD::SUB, DL, VT,
2160 5 : DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2161 15 : DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2162 : }
2163 :
2164 3103244 : if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2165 13 : return V;
2166 :
2167 3103231 : if (SDValue V = foldAddSubOfSignBit(N, DAG))
2168 10 : return V;
2169 :
2170 3103221 : if (SimplifyDemandedBits(SDValue(N, 0)))
2171 5103 : return SDValue(N, 0);
2172 :
2173 : // fold (a+b) -> (a|b) iff a and b share no bits.
2174 6190470 : if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2175 3092352 : DAG.haveNoCommonBitsSet(N0, N1))
2176 61872 : return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2177 :
2178 : // fold (add (xor a, -1), 1) -> (sub 0, a)
2179 3067182 : if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
2180 9 : return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2181 9 : N0.getOperand(0));
2182 :
2183 3067173 : if (SDValue Combined = visitADDLike(N0, N1, N))
2184 804 : return Combined;
2185 :
2186 3066369 : if (SDValue Combined = visitADDLike(N1, N0, N))
2187 73 : return Combined;
2188 :
2189 3066296 : return SDValue();
2190 : }
2191 :
2192 5826665 : static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2193 : bool Masked = false;
2194 :
2195 : // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2196 : while (true) {
2197 5837978 : if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2198 9114 : V = V.getOperand(0);
2199 9114 : continue;
2200 : }
2201 :
2202 5828864 : if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2203 : Masked = true;
2204 2199 : V = V.getOperand(0);
2205 2199 : continue;
2206 : }
2207 :
2208 : break;
2209 : }
2210 :
2211 : // If this is not a carry, return.
2212 5826665 : if (V.getResNo() != 1)
2213 5813747 : return SDValue();
2214 :
2215 12646 : if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2216 24932 : V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2217 12014 : return SDValue();
2218 :
2219 : // If the result is masked, then no matter what kind of bool it is we can
2220 : // return. If it isn't, then we need to make sure the bool type is either 0 or
2221 : // 1 and not other values.
2222 904 : if (Masked ||
2223 926 : TLI.getBooleanContents(V.getValueType()) ==
2224 : TargetLoweringBase::ZeroOrOneBooleanContent)
2225 904 : return V;
2226 :
2227 0 : return SDValue();
2228 : }
2229 :
2230 0 : SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2231 0 : EVT VT = N0.getValueType();
2232 : SDLoc DL(LocReference);
2233 :
2234 : // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2235 0 : if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2236 0 : isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2237 0 : return DAG.getNode(ISD::SUB, DL, VT, N0,
2238 : DAG.getNode(ISD::SHL, DL, VT,
2239 0 : N1.getOperand(0).getOperand(1),
2240 0 : N1.getOperand(1)));
2241 :
2242 0 : if (N1.getOpcode() == ISD::AND) {
2243 0 : SDValue AndOp0 = N1.getOperand(0);
2244 0 : unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2245 : unsigned DestBits = VT.getScalarSizeInBits();
2246 :
2247 : // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2248 : // and similar xforms where the inner op is either ~0 or 0.
2249 0 : if (NumSignBits == DestBits &&
2250 0 : isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2251 0 : return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2252 : }
2253 :
2254 : // add (sext i1), X -> sub X, (zext i1)
2255 : if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2256 0 : N0.getOperand(0).getValueType() == MVT::i1 &&
2257 0 : !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2258 0 : SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2259 0 : return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2260 : }
2261 :
2262 : // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2263 0 : if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2264 : VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2265 0 : if (TN->getVT() == MVT::i1) {
2266 0 : SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2267 0 : DAG.getConstant(1, DL, VT));
2268 0 : return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2269 : }
2270 : }
2271 :
2272 : // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2273 0 : if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2274 0 : N1.getResNo() == 0)
2275 0 : return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2276 0 : N0, N1.getOperand(0), N1.getOperand(2));
2277 :
2278 : // (add X, Carry) -> (addcarry X, 0, Carry)
2279 0 : if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2280 0 : if (SDValue Carry = getAsCarry(TLI, N1))
2281 0 : return DAG.getNode(ISD::ADDCARRY, DL,
2282 0 : DAG.getVTList(VT, Carry.getValueType()), N0,
2283 0 : DAG.getConstant(0, DL, VT), Carry);
2284 :
2285 0 : return SDValue();
2286 : }
2287 :
2288 522 : SDValue DAGCombiner::visitADDC(SDNode *N) {
2289 522 : SDValue N0 = N->getOperand(0);
2290 522 : SDValue N1 = N->getOperand(1);
2291 522 : EVT VT = N0.getValueType();
2292 : SDLoc DL(N);
2293 :
2294 : // If the flag result is dead, turn this into an ADD.
2295 522 : if (!N->hasAnyUseOfValue(1))
2296 15 : return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2297 45 : DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2298 :
2299 : // canonicalize constant to RHS.
2300 : ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2301 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2302 507 : if (N0C && !N1C)
2303 0 : return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2304 :
2305 : // fold (addc x, 0) -> x + no carry out
2306 507 : if (isNullConstant(N1))
2307 11 : return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2308 22 : DL, MVT::Glue));
2309 :
2310 : // If it cannot overflow, transform into an add.
2311 496 : if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2312 11 : return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2313 33 : DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2314 :
2315 485 : return SDValue();
2316 : }
2317 :
2318 7 : static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2319 : SelectionDAG &DAG, const TargetLowering &TLI) {
2320 7 : SDValue Cst;
2321 7 : switch (TLI.getBooleanContents(VT)) {
2322 7 : case TargetLowering::ZeroOrOneBooleanContent:
2323 : case TargetLowering::UndefinedBooleanContent:
2324 7 : Cst = DAG.getConstant(1, DL, VT);
2325 7 : break;
2326 0 : case TargetLowering::ZeroOrNegativeOneBooleanContent:
2327 0 : Cst = DAG.getConstant(-1, DL, VT);
2328 0 : break;
2329 : }
2330 :
2331 7 : return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2332 : }
2333 :
2334 0 : static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2335 0 : if (V.getOpcode() != ISD::XOR) return false;
2336 : ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2337 0 : if (!Const) return false;
2338 :
2339 0 : switch(TLI.getBooleanContents(VT)) {
2340 0 : case TargetLowering::ZeroOrOneBooleanContent:
2341 0 : return Const->isOne();
2342 0 : case TargetLowering::ZeroOrNegativeOneBooleanContent:
2343 0 : return Const->isAllOnesValue();
2344 0 : case TargetLowering::UndefinedBooleanContent:
2345 0 : return (Const->getAPIntValue() & 0x01) == 1;
2346 : }
2347 0 : llvm_unreachable("Unsupported boolean content");
2348 : }
2349 :
2350 130962 : SDValue DAGCombiner::visitUADDO(SDNode *N) {
2351 130962 : SDValue N0 = N->getOperand(0);
2352 130962 : SDValue N1 = N->getOperand(1);
2353 130962 : EVT VT = N0.getValueType();
2354 130962 : if (VT.isVector())
2355 0 : return SDValue();
2356 :
2357 261924 : EVT CarryVT = N->getValueType(1);
2358 : SDLoc DL(N);
2359 :
2360 : // If the flag result is dead, turn this into an ADD.
2361 130962 : if (!N->hasAnyUseOfValue(1))
2362 405 : return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2363 810 : DAG.getUNDEF(CarryVT));
2364 :
2365 : // canonicalize constant to RHS.
2366 : ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2367 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2368 130557 : if (N0C && !N1C)
2369 10 : return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2370 :
2371 : // fold (uaddo x, 0) -> x + no carry out
2372 130552 : if (isNullConstant(N1))
2373 904 : return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2374 :
2375 : // If it cannot overflow, transform into an add.
2376 130100 : if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2377 103 : return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2378 206 : DAG.getConstant(0, DL, CarryVT));
2379 :
2380 : // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2381 129997 : if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
2382 4 : SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2383 : DAG.getConstant(0, DL, VT),
2384 4 : N0.getOperand(0));
2385 : return CombineTo(N, Sub,
2386 4 : flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2387 : }
2388 :
2389 129993 : if (SDValue Combined = visitUADDOLike(N0, N1, N))
2390 231 : return Combined;
2391 :
2392 129762 : if (SDValue Combined = visitUADDOLike(N1, N0, N))
2393 9 : return Combined;
2394 :
2395 129753 : return SDValue();
2396 : }
2397 :
2398 0 : SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2399 0 : auto VT = N0.getValueType();
2400 :
2401 : // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2402 : // If Y + 1 cannot overflow.
2403 0 : if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2404 0 : SDValue Y = N1.getOperand(0);
2405 0 : SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2406 0 : if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2407 0 : return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2408 0 : N1.getOperand(2));
2409 : }
2410 :
2411 : // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2412 0 : if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2413 0 : if (SDValue Carry = getAsCarry(TLI, N1))
2414 0 : return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2415 0 : DAG.getConstant(0, SDLoc(N), VT), Carry);
2416 :
2417 0 : return SDValue();
2418 : }
2419 :
2420 0 : SDValue DAGCombiner::visitADDE(SDNode *N) {
2421 0 : SDValue N0 = N->getOperand(0);
2422 0 : SDValue N1 = N->getOperand(1);
2423 0 : SDValue CarryIn = N->getOperand(2);
2424 :
2425 : // canonicalize constant to RHS
2426 : ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2427 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2428 0 : if (N0C && !N1C)
2429 0 : return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2430 0 : N1, N0, CarryIn);
2431 :
2432 : // fold (adde x, y, false) -> (addc x, y)
2433 0 : if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2434 0 : return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2435 :
2436 0 : return SDValue();
2437 : }
2438 :
2439 156807 : SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2440 156807 : SDValue N0 = N->getOperand(0);
2441 156807 : SDValue N1 = N->getOperand(1);
2442 156807 : SDValue CarryIn = N->getOperand(2);
2443 : SDLoc DL(N);
2444 :
2445 : // canonicalize constant to RHS
2446 : ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2447 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2448 156807 : if (N0C && !N1C)
2449 720 : return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2450 :
2451 : // fold (addcarry x, y, false) -> (uaddo x, y)
2452 156447 : if (isNullConstant(CarryIn)) {
2453 565 : if (!LegalOperations ||
2454 76 : TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2455 1130 : return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2456 : }
2457 :
2458 155882 : EVT CarryVT = CarryIn.getValueType();
2459 :
2460 : // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2461 155882 : if (isNullConstant(N0) && isNullConstant(N1)) {
2462 510 : EVT VT = N0.getValueType();
2463 510 : SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2464 510 : AddToWorklist(CarryExt.getNode());
2465 510 : return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2466 : DAG.getConstant(1, DL, VT)),
2467 1020 : DAG.getConstant(0, DL, CarryVT));
2468 : }
2469 :
2470 : // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2471 155380 : if (isBitwiseNot(N0) && isNullConstant(N1) &&
2472 8 : isBooleanFlip(CarryIn, CarryVT, TLI)) {
2473 3 : SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2474 : DAG.getConstant(0, DL, N0.getValueType()),
2475 3 : N0.getOperand(0), CarryIn.getOperand(0));
2476 : return CombineTo(N, Sub,
2477 3 : flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2478 : }
2479 :
2480 155369 : if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2481 234 : return Combined;
2482 :
2483 155135 : if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2484 0 : return Combined;
2485 :
2486 155135 : return SDValue();
2487 : }
2488 :
2489 310504 : SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2490 : SDNode *N) {
2491 : // Iff the flag result is dead:
2492 : // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2493 309060 : if ((N0.getOpcode() == ISD::ADD ||
2494 16535 : (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2495 324322 : isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2496 464 : return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2497 232 : N0.getOperand(0), N0.getOperand(1), CarryIn);
2498 :
2499 : /**
2500 : * When one of the addcarry argument is itself a carry, we may be facing
2501 : * a diamond carry propagation. In which case we try to transform the DAG
2502 : * to ensure linear carry propagation if that is possible.
2503 : *
2504 : * We are trying to get:
2505 : * (addcarry X, 0, (addcarry A, B, Z):Carry)
2506 : */
2507 310272 : if (auto Y = getAsCarry(TLI, N1)) {
2508 : /**
2509 : * (uaddo A, B)
2510 : * / \
2511 : * Carry Sum
2512 : * | \
2513 : * | (addcarry *, 0, Z)
2514 : * | /
2515 : * \ Carry
2516 : * | /
2517 : * (addcarry X, *, *)
2518 : */
2519 621 : if (Y.getOpcode() == ISD::UADDO &&
2520 621 : CarryIn.getResNo() == 1 &&
2521 12 : CarryIn.getOpcode() == ISD::ADDCARRY &&
2522 900 : isNullConstant(CarryIn.getOperand(1)) &&
2523 4 : CarryIn.getOperand(0) == Y.getValue(0)) {
2524 2 : auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2525 : Y.getOperand(0), Y.getOperand(1),
2526 2 : CarryIn.getOperand(2));
2527 2 : AddToWorklist(NewY.getNode());
2528 4 : return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2529 2 : DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2530 2 : NewY.getValue(1));
2531 : }
2532 : }
2533 :
2534 310270 : return SDValue();
2535 : }
2536 :
2537 : // Since it may not be valid to emit a fold to zero for vector initializers
2538 : // check if we can before folding.
2539 51 : static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2540 : SelectionDAG &DAG, bool LegalOperations,
2541 : bool LegalTypes) {
2542 51 : if (!VT.isVector())
2543 38 : return DAG.getConstant(0, DL, VT);
2544 13 : if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2545 13 : return DAG.getConstant(0, DL, VT);
2546 0 : return SDValue();
2547 : }
2548 :
2549 532128 : SDValue DAGCombiner::visitSUB(SDNode *N) {
2550 532128 : SDValue N0 = N->getOperand(0);
2551 532128 : SDValue N1 = N->getOperand(1);
2552 532128 : EVT VT = N0.getValueType();
2553 : SDLoc DL(N);
2554 :
2555 : // fold vector ops
2556 532128 : if (VT.isVector()) {
2557 483137 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
2558 4 : return FoldedVOp;
2559 :
2560 : // fold (sub x, 0) -> x, vector edition
2561 483133 : if (ISD::isBuildVectorAllZeros(N1.getNode()))
2562 7 : return N0;
2563 : }
2564 :
2565 : // fold (sub x, x) -> 0
2566 : // FIXME: Refactor this and xor and other similar operations together.
2567 532117 : if (N0 == N1)
2568 32 : return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2569 542117 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2570 10032 : DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2571 : // fold (sub c1, c2) -> c1-c2
2572 3 : return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2573 3 : N1.getNode());
2574 : }
2575 :
2576 532082 : if (SDValue NewSel = foldBinOpIntoSelect(N))
2577 11 : return NewSel;
2578 :
2579 532071 : ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2580 :
2581 : // fold (sub x, c) -> (add x, -c)
2582 : if (N1C) {
2583 8287 : return DAG.getNode(ISD::ADD, DL, VT, N0,
2584 24861 : DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2585 : }
2586 :
2587 523784 : if (isNullConstantOrNullSplatConstant(N0)) {
2588 : unsigned BitWidth = VT.getScalarSizeInBits();
2589 : // Right-shifting everything out but the sign bit followed by negation is
2590 : // the same as flipping arithmetic/logical shift type without the negation:
2591 : // -(X >>u 31) -> (X >>s 31)
2592 : // -(X >>s 31) -> (X >>u 31)
2593 10050 : if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2594 135 : ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2595 267 : if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2596 11 : auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2597 11 : if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2598 22 : return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2599 : }
2600 : }
2601 :
2602 : // 0 - X --> 0 if the sub is NUW.
2603 5014 : if (N->getFlags().hasNoUnsignedWrap())
2604 2 : return N0;
2605 :
2606 15036 : if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2607 : // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2608 : // N1 must be 0 because negating the minimum signed value is undefined.
2609 8 : if (N->getFlags().hasNoSignedWrap())
2610 2 : return N0;
2611 :
2612 : // 0 - X --> X if X is 0 or the minimum signed value.
2613 6 : return N1;
2614 : }
2615 : }
2616 :
2617 : // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2618 523763 : if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2619 134 : return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2620 :
2621 : // fold (A - (0-B)) -> A+B
2622 1048146 : if (N1.getOpcode() == ISD::SUB &&
2623 754 : isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2624 87 : return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2625 :
2626 : // fold A-(A-B) -> B
2627 1047334 : if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2628 212 : return N1.getOperand(1);
2629 :
2630 : // fold (A+B)-A -> B
2631 523455 : if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2632 22 : return N0.getOperand(1);
2633 :
2634 : // fold (A+B)-B -> A
2635 523433 : if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2636 3 : return N0.getOperand(0);
2637 :
2638 : // fold C2-(A+C1) -> (C2-C1)-A
2639 523430 : if (N1.getOpcode() == ISD::ADD) {
2640 597 : SDValue N11 = N1.getOperand(1);
2641 784 : if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2642 187 : isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2643 288 : SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2644 432 : return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2645 : }
2646 : }
2647 :
2648 : // fold ((A+(B+or-C))-B) -> A+or-C
2649 1777 : if (N0.getOpcode() == ISD::ADD &&
2650 1777 : (N0.getOperand(1).getOpcode() == ISD::SUB ||
2651 523286 : N0.getOperand(1).getOpcode() == ISD::ADD) &&
2652 46 : N0.getOperand(1).getOperand(0) == N1)
2653 7 : return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2654 7 : N0.getOperand(1).getOperand(1));
2655 :
2656 : // fold ((A+(C+B))-B) -> A+C
2657 523279 : if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2658 7 : N0.getOperand(1).getOperand(1) == N1)
2659 1 : return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2660 1 : N0.getOperand(1).getOperand(0));
2661 :
2662 : // fold ((A-(B-C))-C) -> A-B
2663 523278 : if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2664 3 : N0.getOperand(1).getOperand(1) == N1)
2665 3 : return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2666 3 : N0.getOperand(1).getOperand(0));
2667 :
2668 : // fold (A-(B-C)) -> A+(C-B)
2669 1046550 : if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2670 347 : return DAG.getNode(ISD::ADD, DL, VT, N0,
2671 : DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2672 694 : N1.getOperand(0)));
2673 :
2674 : // fold (X - (-Y * Z)) -> (X + (Y * Z))
2675 1045856 : if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2676 6864 : if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2677 9 : isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
2678 9 : SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2679 9 : N1.getOperand(0).getOperand(1),
2680 18 : N1.getOperand(1));
2681 18 : return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2682 : }
2683 6828 : if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2684 0 : isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
2685 0 : SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2686 : N1.getOperand(0),
2687 0 : N1.getOperand(1).getOperand(1));
2688 0 : return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2689 : }
2690 : }
2691 :
2692 : // If either operand of a sub is undef, the result is undef
2693 522919 : if (N0.isUndef())
2694 0 : return N0;
2695 1045838 : if (N1.isUndef())
2696 0 : return N1;
2697 :
2698 522919 : if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2699 9 : return V;
2700 :
2701 522910 : if (SDValue V = foldAddSubOfSignBit(N, DAG))
2702 9 : return V;
2703 :
2704 : // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2705 522901 : if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2706 30009 : if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2707 2 : SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2708 2 : SDValue S0 = N1.getOperand(0);
2709 : if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2710 : unsigned OpSizeInBits = VT.getScalarSizeInBits();
2711 4 : if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2712 4 : if (C->getAPIntValue() == (OpSizeInBits - 1))
2713 4 : return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2714 : }
2715 : }
2716 : }
2717 :
2718 : // If the relocation model supports it, consider symbol offsets.
2719 : if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2720 11 : if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2721 : // fold (sub Sym, c) -> Sym-c
2722 : if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2723 : return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2724 : GA->getOffset() -
2725 : (uint64_t)N1C->getSExtValue());
2726 : // fold (sub Sym+c1, Sym+c2) -> c1-c2
2727 : if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2728 0 : if (GA->getGlobal() == GB->getGlobal())
2729 0 : return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2730 0 : DL, VT);
2731 : }
2732 :
2733 : // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2734 1045798 : if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2735 : VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2736 : if (TN->getVT() == MVT::i1) {
2737 58 : SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2738 58 : DAG.getConstant(1, DL, VT));
2739 116 : return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2740 : }
2741 : }
2742 :
2743 : // Prefer an add for more folding potential and possibly better codegen:
2744 : // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2745 522841 : if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2746 75 : SDValue ShAmt = N1.getOperand(1);
2747 75 : ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2748 150 : if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2749 45 : SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2750 30 : return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2751 : }
2752 : }
2753 :
2754 522826 : return SDValue();
2755 : }
2756 :
2757 89 : SDValue DAGCombiner::visitSUBC(SDNode *N) {
2758 89 : SDValue N0 = N->getOperand(0);
2759 89 : SDValue N1 = N->getOperand(1);
2760 89 : EVT VT = N0.getValueType();
2761 : SDLoc DL(N);
2762 :
2763 : // If the flag result is dead, turn this into an SUB.
2764 89 : if (!N->hasAnyUseOfValue(1))
2765 24 : return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2766 72 : DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2767 :
2768 : // fold (subc x, x) -> 0 + no borrow
2769 : if (N0 == N1)
2770 0 : return CombineTo(N, DAG.getConstant(0, DL, VT),
2771 0 : DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2772 :
2773 : // fold (subc x, 0) -> x + no borrow
2774 65 : if (isNullConstant(N1))
2775 0 : return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2776 :
2777 : // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2778 65 : if (isAllOnesConstant(N0))
2779 0 : return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2780 0 : DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2781 :
2782 65 : return SDValue();
2783 : }
2784 :
2785 1913 : SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2786 1913 : SDValue N0 = N->getOperand(0);
2787 1913 : SDValue N1 = N->getOperand(1);
2788 1913 : EVT VT = N0.getValueType();
2789 1913 : if (VT.isVector())
2790 0 : return SDValue();
2791 :
2792 3826 : EVT CarryVT = N->getValueType(1);
2793 : SDLoc DL(N);
2794 :
2795 : // If the flag result is dead, turn this into an SUB.
2796 1913 : if (!N->hasAnyUseOfValue(1))
2797 106 : return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2798 212 : DAG.getUNDEF(CarryVT));
2799 :
2800 : // fold (usubo x, x) -> 0 + no borrow
2801 : if (N0 == N1)
2802 7 : return CombineTo(N, DAG.getConstant(0, DL, VT),
2803 14 : DAG.getConstant(0, DL, CarryVT));
2804 :
2805 : // fold (usubo x, 0) -> x + no borrow
2806 1800 : if (isNullConstant(N1))
2807 32 : return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2808 :
2809 : // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2810 1784 : if (isAllOnesConstant(N0))
2811 4 : return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2812 8 : DAG.getConstant(0, DL, CarryVT));
2813 :
2814 1780 : return SDValue();
2815 : }
2816 :
2817 0 : SDValue DAGCombiner::visitSUBE(SDNode *N) {
2818 0 : SDValue N0 = N->getOperand(0);
2819 0 : SDValue N1 = N->getOperand(1);
2820 0 : SDValue CarryIn = N->getOperand(2);
2821 :
2822 : // fold (sube x, y, false) -> (subc x, y)
2823 0 : if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2824 0 : return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2825 :
2826 0 : return SDValue();
2827 : }
2828 :
2829 839 : SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2830 839 : SDValue N0 = N->getOperand(0);
2831 839 : SDValue N1 = N->getOperand(1);
2832 839 : SDValue CarryIn = N->getOperand(2);
2833 :
2834 : // fold (subcarry x, y, false) -> (usubo x, y)
2835 839 : if (isNullConstant(CarryIn)) {
2836 120 : if (!LegalOperations ||
2837 109 : TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2838 240 : return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2839 : }
2840 :
2841 719 : return SDValue();
2842 : }
2843 :
2844 39729 : SDValue DAGCombiner::visitMUL(SDNode *N) {
2845 39729 : SDValue N0 = N->getOperand(0);
2846 39729 : SDValue N1 = N->getOperand(1);
2847 79458 : EVT VT = N0.getValueType();
2848 :
2849 : // fold (mul x, undef) -> 0
2850 39729 : if (N0.isUndef() || N1.isUndef())
2851 56 : return DAG.getConstant(0, SDLoc(N), VT);
2852 :
2853 : bool N0IsConst = false;
2854 : bool N1IsConst = false;
2855 : bool N1IsOpaqueConst = false;
2856 : bool N0IsOpaqueConst = false;
2857 : APInt ConstValue0, ConstValue1;
2858 : // fold vector ops
2859 39701 : if (VT.isVector()) {
2860 9791 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
2861 12 : return FoldedVOp;
2862 :
2863 9779 : N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2864 9779 : N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2865 : assert((!N0IsConst ||
2866 : ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2867 : "Splat APInt should be element width");
2868 : assert((!N1IsConst ||
2869 : ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2870 : "Splat APInt should be element width");
2871 : } else {
2872 : N0IsConst = isa<ConstantSDNode>(N0);
2873 : if (N0IsConst) {
2874 12 : ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2875 6 : N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2876 : }
2877 : N1IsConst = isa<ConstantSDNode>(N1);
2878 : if (N1IsConst) {
2879 35058 : ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2880 17529 : N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2881 : }
2882 : }
2883 :
2884 : // fold (mul c1, c2) -> c1*c2
2885 39689 : if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2886 6 : return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2887 6 : N0.getNode(), N1.getNode());
2888 :
2889 : // canonicalize constant to RHS (vector doesn't have to splat)
2890 39709 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2891 26 : !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2892 52 : return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2893 : // fold (mul x, 0) -> 0
2894 58438 : if (N1IsConst && ConstValue1.isNullValue())
2895 25 : return N1;
2896 : // fold (mul x, 1) -> x
2897 58388 : if (N1IsConst && ConstValue1.isOneValue())
2898 2143 : return N0;
2899 :
2900 37489 : if (SDValue NewSel = foldBinOpIntoSelect(N))
2901 3 : return NewSel;
2902 :
2903 : // fold (mul x, -1) -> 0-x
2904 54096 : if (N1IsConst && ConstValue1.isAllOnesValue()) {
2905 : SDLoc DL(N);
2906 97 : return DAG.getNode(ISD::SUB, DL, VT,
2907 97 : DAG.getConstant(0, DL, VT), N0);
2908 : }
2909 : // fold (mul x, (1 << c)) -> x << c
2910 54572 : if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2911 41555 : DAG.isKnownToBeAPowerOfTwo(N1) &&
2912 286 : (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2913 : SDLoc DL(N);
2914 4166 : SDValue LogBase2 = BuildLogBase2(N1, DL);
2915 8332 : EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2916 4166 : SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2917 8332 : return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2918 : }
2919 : // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2920 82682 : if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2921 85 : unsigned Log2Val = (-ConstValue1).logBase2();
2922 : SDLoc DL(N);
2923 : // FIXME: If the input is something that is easily negated (e.g. a
2924 : // single-use add), we should put the negate there.
2925 85 : return DAG.getNode(ISD::SUB, DL, VT,
2926 85 : DAG.getConstant(0, DL, VT),
2927 : DAG.getNode(ISD::SHL, DL, VT, N0,
2928 : DAG.getConstant(Log2Val, DL,
2929 170 : getShiftAmountTy(N0.getValueType()))));
2930 : }
2931 :
2932 : // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
2933 : // mul x, (2^N + 1) --> add (shl x, N), x
2934 : // mul x, (2^N - 1) --> sub (shl x, N), x
2935 : // Examples: x * 33 --> (x << 5) + x
2936 : // x * 15 --> (x << 4) - x
2937 : // x * -33 --> -((x << 5) + x)
2938 : // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
2939 33138 : if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
2940 : // TODO: We could handle more general decomposition of any constant by
2941 : // having the target set a limit on number of ops and making a
2942 : // callback to determine that sequence (similar to sqrt expansion).
2943 : unsigned MathOp = ISD::DELETED_NODE;
2944 66 : APInt MulC = ConstValue1.abs();
2945 66 : if ((MulC - 1).isPowerOf2())
2946 : MathOp = ISD::ADD;
2947 44 : else if ((MulC + 1).isPowerOf2())
2948 : MathOp = ISD::SUB;
2949 :
2950 : if (MathOp != ISD::DELETED_NODE) {
2951 154 : unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
2952 154 : : (MulC + 1).logBase2();
2953 : assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
2954 : "Not expecting multiply-by-constant that could have simplified");
2955 : SDLoc DL(N);
2956 66 : SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
2957 66 : DAG.getConstant(ShAmt, DL, VT));
2958 132 : SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
2959 66 : if (ConstValue1.isNegative())
2960 32 : R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
2961 66 : return R;
2962 : }
2963 : }
2964 :
2965 : // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2966 33094 : if (N0.getOpcode() == ISD::SHL &&
2967 33077 : isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2968 10 : isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2969 10 : SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2970 5 : if (isConstantOrConstantVector(C3))
2971 15 : return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2972 : }
2973 :
2974 : // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2975 : // use.
2976 : {
2977 : SDValue Sh(nullptr, 0), Y(nullptr, 0);
2978 :
2979 : // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2980 33084 : if (N0.getOpcode() == ISD::SHL &&
2981 33067 : isConstantOrConstantVector(N0.getOperand(1)) &&
2982 15 : N0.getNode()->hasOneUse()) {
2983 12 : Sh = N0; Y = N1;
2984 33064 : } else if (N1.getOpcode() == ISD::SHL &&
2985 33055 : isConstantOrConstantVector(N1.getOperand(1)) &&
2986 6 : N1.getNode()->hasOneUse()) {
2987 3 : Sh = N1; Y = N0;
2988 : }
2989 :
2990 33067 : if (Sh.getNode()) {
2991 16 : SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2992 31 : return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2993 : }
2994 : }
2995 :
2996 : // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2997 46132 : if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2998 13581 : N0.getOpcode() == ISD::ADD &&
2999 34245 : DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3000 191 : isMulAddWithConstProfitable(N, N0, N1))
3001 184 : return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3002 92 : DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3003 92 : N0.getOperand(0), N1),
3004 92 : DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3005 368 : N0.getOperand(1), N1));
3006 :
3007 : // reassociate mul
3008 41538 : if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3009 542 : return RMUL;
3010 :
3011 32418 : return SDValue();
3012 : }
3013 :
3014 : /// Return true if divmod libcall is available.
3015 1431 : static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3016 : const TargetLowering &TLI) {
3017 : RTLIB::Libcall LC;
3018 1431 : EVT NodeType = Node->getValueType(0);
3019 1431 : if (!NodeType.isSimple())
3020 : return false;
3021 1431 : switch (NodeType.getSimpleVT().SimpleTy) {
3022 : default: return false; // No libcall for vector types.
3023 0 : case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3024 4 : case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3025 1232 : case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3026 188 : case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3027 7 : case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3028 : }
3029 :
3030 1431 : return TLI.getLibcallName(LC) != nullptr;
3031 : }
3032 :
3033 : /// Issue divrem if both quotient and remainder are needed.
3034 5726 : SDValue DAGCombiner::useDivRem(SDNode *Node) {
3035 5726 : if (Node->use_empty())
3036 0 : return SDValue(); // This is a dead node, leave it alone.
3037 :
3038 5726 : unsigned Opcode = Node->getOpcode();
3039 5726 : bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3040 5726 : unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3041 :
3042 : // DivMod lib calls can still work on non-legal types if using lib-calls.
3043 11452 : EVT VT = Node->getValueType(0);
3044 5726 : if (VT.isVector() || !VT.isInteger())
3045 499 : return SDValue();
3046 :
3047 6059 : if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3048 801 : return SDValue();
3049 :
3050 : // If DIVREM is going to get expanded into a libcall,
3051 : // but there is no libcall available, then don't combine.
3052 4426 : if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3053 1431 : !isDivRemLibcallAvailable(Node, isSigned, TLI))
3054 1309 : return SDValue();
3055 :
3056 : // If div is legal, it's better to do the normal expansion
3057 : unsigned OtherOpcode = 0;
3058 3117 : if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3059 1634 : OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3060 1634 : if (TLI.isOperationLegalOrCustom(Opcode, VT))
3061 211 : return SDValue();
3062 : } else {
3063 1483 : OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3064 1483 : if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3065 176 : return SDValue();
3066 : }
3067 :
3068 2730 : SDValue Op0 = Node->getOperand(0);
3069 2730 : SDValue Op1 = Node->getOperand(1);
3070 : SDValue combined;
3071 2730 : for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3072 6389 : UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3073 : SDNode *User = *UI;
3074 3659 : if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3075 929 : User->use_empty())
3076 : continue;
3077 : // Convert the other matching node(s), too;
3078 : // otherwise, the DIVREM may get target-legalized into something
3079 : // target-specific that we won't be able to recognize.
3080 : unsigned UserOpc = User->getOpcode();
3081 410 : if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3082 948 : User->getOperand(0) == Op0 &&
3083 384 : User->getOperand(1) == Op1) {
3084 168 : if (!combined) {
3085 168 : if (UserOpc == OtherOpcode) {
3086 168 : SDVTList VTs = DAG.getVTList(VT, VT);
3087 182 : combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3088 0 : } else if (UserOpc == DivRemOpc) {
3089 : combined = SDValue(User, 0);
3090 : } else {
3091 : assert(UserOpc == Opcode);
3092 : continue;
3093 : }
3094 : }
3095 168 : if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3096 138 : CombineTo(User, combined);
3097 30 : else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3098 30 : CombineTo(User, combined.getValue(1));
3099 : }
3100 : }
3101 2730 : return combined;
3102 : }
3103 :
3104 15164 : static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3105 15164 : SDValue N0 = N->getOperand(0);
3106 15164 : SDValue N1 = N->getOperand(1);
3107 30328 : EVT VT = N->getValueType(0);
3108 : SDLoc DL(N);
3109 :
3110 15164 : unsigned Opc = N->getOpcode();
3111 15164 : bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3112 15164 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
3113 :
3114 : // X / undef -> undef
3115 : // X % undef -> undef
3116 : // X / 0 -> undef
3117 : // X % 0 -> undef
3118 : // NOTE: This includes vectors where any divisor element is zero/undef.
3119 30328 : if (DAG.isUndef(Opc, {N0, N1}))
3120 1 : return DAG.getUNDEF(VT);
3121 :
3122 : // undef / X -> 0
3123 : // undef % X -> 0
3124 15163 : if (N0.isUndef())
3125 0 : return DAG.getConstant(0, DL, VT);
3126 :
3127 : // TODO: 0 / X -> 0
3128 : // TODO: 0 % X -> 0
3129 :
3130 : // X / X -> 1
3131 : // X % X -> 0
3132 : if (N0 == N1)
3133 62 : return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3134 :
3135 : // X / 1 -> X
3136 : // X % 1 -> 0
3137 24497 : if (N1C && N1C->isOne())
3138 48 : return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3139 : // If this is a boolean op (single-bit element type), we can't have
3140 : // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3141 : // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1.
3142 :
3143 15071 : return SDValue();
3144 : }
3145 :
3146 5883 : SDValue DAGCombiner::visitSDIV(SDNode *N) {
3147 5883 : SDValue N0 = N->getOperand(0);
3148 5883 : SDValue N1 = N->getOperand(1);
3149 5883 : EVT VT = N->getValueType(0);
3150 5883 : EVT CCVT = getSetCCResultType(VT);
3151 :
3152 : // fold vector ops
3153 5883 : if (VT.isVector())
3154 425 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
3155 1 : return FoldedVOp;
3156 :
3157 : SDLoc DL(N);
3158 :
3159 : // fold (sdiv c1, c2) -> c1/c2
3160 5882 : ConstantSDNode *N0C = isConstOrConstSplat(N0);
3161 5882 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
3162 5882 : if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3163 0 : return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3164 : // fold (sdiv X, -1) -> 0-X
3165 10084 : if (N1C && N1C->isAllOnesValue())
3166 28 : return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3167 : // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3168 5868 : if (N1C && N1C->getAPIntValue().isMinSignedValue())
3169 28 : return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3170 14 : DAG.getConstant(1, DL, VT),
3171 14 : DAG.getConstant(0, DL, VT));
3172 :
3173 5854 : if (SDValue V = simplifyDivRem(N, DAG))
3174 29 : return V;
3175 :
3176 5825 : if (SDValue NewSel = foldBinOpIntoSelect(N))
3177 7 : return NewSel;
3178 :
3179 : // If we know the sign bits of both operands are zero, strength reduce to a
3180 : // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3181 5818 : if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3182 50 : return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3183 :
3184 5793 : if (SDValue V = visitSDIVLike(N0, N1, N))
3185 4190 : return V;
3186 :
3187 : // sdiv, srem -> sdivrem
3188 : // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3189 : // true. Otherwise, we break the simplification logic in visitREM().
3190 1603 : AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3191 1603 : if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3192 1531 : if (SDValue DivRem = useDivRem(N))
3193 14 : return DivRem;
3194 :
3195 1589 : return SDValue();
3196 : }
3197 :
3198 6155 : SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3199 : SDLoc DL(N);
3200 6155 : EVT VT = N->getValueType(0);
3201 6155 : EVT CCVT = getSetCCResultType(VT);
3202 : unsigned BitWidth = VT.getScalarSizeInBits();
3203 :
3204 : // Helper for determining whether a value is a power-2 constant scalar or a
3205 : // vector of such elements.
3206 : auto IsPowerOfTwo = [](ConstantSDNode *C) {
3207 : if (C->isNullValue() || C->isOpaque())
3208 : return false;
3209 : if (C->getAPIntValue().isPowerOf2())
3210 : return true;
3211 : if ((-C->getAPIntValue()).isPowerOf2())
3212 : return true;
3213 : return false;
3214 : };
3215 :
3216 : // fold (sdiv X, pow2) -> simple ops after legalize
3217 : // FIXME: We check for the exact bit here because the generic lowering gives
3218 : // better results in that case. The target-specific lowering should learn how
3219 : // to handle exact sdivs efficiently.
3220 8822 : if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3221 : // Target-specific implementation of sdiv x, pow2.
3222 595 : if (SDValue Res = BuildSDIVPow2(N))
3223 32 : return Res;
3224 :
3225 : // Create constants that are functions of the shift amount value.
3226 1126 : EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3227 563 : SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3228 1126 : SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3229 563 : C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3230 1126 : SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3231 563 : if (!isConstantOrConstantVector(Inexact))
3232 0 : return SDValue();
3233 :
3234 : // Splat the sign bit into the register
3235 563 : SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3236 563 : DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3237 563 : AddToWorklist(Sign.getNode());
3238 :
3239 : // Add (N0 < 0) ? abs2 - 1 : 0;
3240 1126 : SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3241 563 : AddToWorklist(Srl.getNode());
3242 1126 : SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3243 563 : AddToWorklist(Add.getNode());
3244 1126 : SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3245 563 : AddToWorklist(Sra.getNode());
3246 :
3247 : // Special case: (sdiv X, 1) -> X
3248 : // Special Case: (sdiv X, -1) -> 0-X
3249 563 : SDValue One = DAG.getConstant(1, DL, VT);
3250 563 : SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3251 563 : SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3252 563 : SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3253 1126 : SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3254 563 : Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3255 :
3256 : // If dividing by a positive value, we're done. Otherwise, the result must
3257 : // be negated.
3258 563 : SDValue Zero = DAG.getConstant(0, DL, VT);
3259 1126 : SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3260 :
3261 : // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3262 563 : SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3263 563 : SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3264 563 : return Res;
3265 : }
3266 :
3267 : // If integer divide is expensive and we satisfy the requirements, emit an
3268 : // alternate sequence. Targets may check function attributes for size/speed
3269 : // trade-offs.
3270 5560 : AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3271 9629 : if (isConstantOrConstantVector(N1) &&
3272 8138 : !TLI.isIntDivCheap(N->getValueType(0), Attr))
3273 4025 : if (SDValue Op = BuildSDIV(N))
3274 3895 : return Op;
3275 :
3276 1665 : return SDValue();
3277 : }
3278 :
3279 4531 : SDValue DAGCombiner::visitUDIV(SDNode *N) {
3280 4531 : SDValue N0 = N->getOperand(0);
3281 4531 : SDValue N1 = N->getOperand(1);
3282 4531 : EVT VT = N->getValueType(0);
3283 4531 : EVT CCVT = getSetCCResultType(VT);
3284 :
3285 : // fold vector ops
3286 4531 : if (VT.isVector())
3287 268 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
3288 0 : return FoldedVOp;
3289 :
3290 : SDLoc DL(N);
3291 :
3292 : // fold (udiv c1, c2) -> c1/c2
3293 4531 : ConstantSDNode *N0C = isConstOrConstSplat(N0);
3294 4531 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
3295 4531 : if (N0C && N1C)
3296 0 : if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3297 0 : N0C, N1C))
3298 0 : return Folded;
3299 : // fold (udiv X, -1) -> select(X == -1, 1, 0)
3300 7414 : if (N1C && N1C->getAPIntValue().isAllOnesValue())
3301 22 : return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3302 11 : DAG.getConstant(1, DL, VT),
3303 11 : DAG.getConstant(0, DL, VT));
3304 :
3305 4520 : if (SDValue V = simplifyDivRem(N, DAG))
3306 21 : return V;
3307 :
3308 4499 : if (SDValue NewSel = foldBinOpIntoSelect(N))
3309 7 : return NewSel;
3310 :
3311 4492 : if (SDValue V = visitUDIVLike(N0, N1, N))
3312 2805 : return V;
3313 :
3314 : // sdiv, srem -> sdivrem
3315 : // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3316 : // true. Otherwise, we break the simplification logic in visitREM().
3317 1687 : AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3318 1687 : if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3319 1623 : if (SDValue DivRem = useDivRem(N))
3320 16 : return DivRem;
3321 :
3322 1671 : return SDValue();
3323 : }
3324 :
3325 5033 : SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3326 : SDLoc DL(N);
3327 5033 : EVT VT = N->getValueType(0);
3328 :
3329 : // fold (udiv x, (1 << c)) -> x >>u c
3330 8471 : if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3331 3438 : DAG.isKnownToBeAPowerOfTwo(N1)) {
3332 2438 : SDValue LogBase2 = BuildLogBase2(N1, DL);
3333 2438 : AddToWorklist(LogBase2.getNode());
3334 :
3335 4876 : EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3336 2438 : SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3337 2438 : AddToWorklist(Trunc.getNode());
3338 4876 : return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3339 : }
3340 :
3341 : // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3342 2595 : if (N1.getOpcode() == ISD::SHL) {
3343 15 : SDValue N10 = N1.getOperand(0);
3344 30 : if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3345 15 : DAG.isKnownToBeAPowerOfTwo(N10)) {
3346 15 : SDValue LogBase2 = BuildLogBase2(N10, DL);
3347 15 : AddToWorklist(LogBase2.getNode());
3348 :
3349 15 : EVT ADDVT = N1.getOperand(1).getValueType();
3350 15 : SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3351 15 : AddToWorklist(Trunc.getNode());
3352 30 : SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3353 15 : AddToWorklist(Add.getNode());
3354 30 : return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3355 : }
3356 : }
3357 :
3358 : // fold (udiv x, c) -> alternate
3359 2580 : AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3360 3580 : if (isConstantOrConstantVector(N1) &&
3361 2000 : !TLI.isIntDivCheap(N->getValueType(0), Attr))
3362 957 : if (SDValue Op = BuildUDIV(N))
3363 812 : return Op;
3364 :
3365 1768 : return SDValue();
3366 : }
3367 :
3368 : // handles ISD::SREM and ISD::UREM
3369 4801 : SDValue DAGCombiner::visitREM(SDNode *N) {
3370 4801 : unsigned Opcode = N->getOpcode();
3371 4801 : SDValue N0 = N->getOperand(0);
3372 4801 : SDValue N1 = N->getOperand(1);
3373 4801 : EVT VT = N->getValueType(0);
3374 4801 : EVT CCVT = getSetCCResultType(VT);
3375 :
3376 : bool isSigned = (Opcode == ISD::SREM);
3377 : SDLoc DL(N);
3378 :
3379 : // fold (rem c1, c2) -> c1%c2
3380 4801 : ConstantSDNode *N0C = isConstOrConstSplat(N0);
3381 4801 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
3382 4801 : if (N0C && N1C)
3383 2 : if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3384 2 : return Folded;
3385 : // fold (urem X, -1) -> select(X == -1, 0, x)
3386 6732 : if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3387 18 : return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3388 9 : DAG.getConstant(0, DL, VT), N0);
3389 :
3390 4790 : if (SDValue V = simplifyDivRem(N, DAG))
3391 43 : return V;
3392 :
3393 4747 : if (SDValue NewSel = foldBinOpIntoSelect(N))
3394 14 : return NewSel;
3395 :
3396 4733 : if (isSigned) {
3397 : // If we know the sign bits of both operands are zero, strength reduce to a
3398 : // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3399 1577 : if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3400 40 : return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3401 : } else {
3402 3156 : SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3403 3156 : if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3404 : // fold (urem x, pow2) -> (and x, pow2-1)
3405 2750 : SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3406 1375 : AddToWorklist(Add.getNode());
3407 2750 : return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3408 : }
3409 1789 : if (N1.getOpcode() == ISD::SHL &&
3410 16 : DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3411 : // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3412 12 : SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3413 6 : AddToWorklist(Add.getNode());
3414 12 : return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3415 : }
3416 : }
3417 :
3418 3332 : AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3419 :
3420 : // If X/C can be simplified by the division-by-constant logic, lower
3421 : // X%C to the equivalent of X-X/C*C.
3422 : // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3423 : // speculative DIV must not cause a DIVREM conversion. We guard against this
3424 : // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3425 : // combine will not return a DIVREM. Regardless, checking cheapness here
3426 : // makes sense since the simplification results in fatter code.
3427 3332 : if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3428 : SDValue OptimizedDiv =
3429 903 : isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3430 903 : if (OptimizedDiv.getNode()) {
3431 1520 : SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3432 1520 : SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3433 760 : AddToWorklist(OptimizedDiv.getNode());
3434 760 : AddToWorklist(Mul.getNode());
3435 760 : return Sub;
3436 : }
3437 : }
3438 :
3439 : // sdiv, srem -> sdivrem
3440 2572 : if (SDValue DivRem = useDivRem(N))
3441 138 : return DivRem.getValue(1);
3442 :
3443 2434 : return SDValue();
3444 : }
3445 :
3446 1513 : SDValue DAGCombiner::visitMULHS(SDNode *N) {
3447 1513 : SDValue N0 = N->getOperand(0);
3448 1513 : SDValue N1 = N->getOperand(1);
3449 3026 : EVT VT = N->getValueType(0);
3450 : SDLoc DL(N);
3451 :
3452 1513 : if (VT.isVector()) {
3453 : // fold (mulhs x, 0) -> 0
3454 808 : if (ISD::isBuildVectorAllZeros(N1.getNode()))
3455 0 : return N1;
3456 808 : if (ISD::isBuildVectorAllZeros(N0.getNode()))
3457 2 : return N0;
3458 : }
3459 :
3460 : // fold (mulhs x, 0) -> 0
3461 1511 : if (isNullConstant(N1))
3462 0 : return N1;
3463 : // fold (mulhs x, 1) -> (sra x, size(x)-1)
3464 1511 : if (isOneConstant(N1))
3465 0 : return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3466 0 : DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3467 0 : getShiftAmountTy(N0.getValueType())));
3468 :
3469 : // fold (mulhs x, undef) -> 0
3470 3022 : if (N0.isUndef() || N1.isUndef())
3471 0 : return DAG.getConstant(0, DL, VT);
3472 :
3473 : // If the type twice as wide is legal, transform the mulhs to a wider multiply
3474 : // plus a shift.
3475 1511 : if (VT.isSimple() && !VT.isVector()) {
3476 705 : MVT Simple = VT.getSimpleVT();
3477 705 : unsigned SimpleSize = Simple.getSizeInBits();
3478 705 : EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3479 705 : if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3480 138 : N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3481 138 : N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3482 138 : N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3483 69 : N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3484 : DAG.getConstant(SimpleSize, DL,
3485 69 : getShiftAmountTy(N1.getValueType())));
3486 138 : return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3487 : }
3488 : }
3489 :
3490 1442 : return SDValue();
3491 : }
3492 :
3493 6357 : SDValue DAGCombiner::visitMULHU(SDNode *N) {
3494 6357 : SDValue N0 = N->getOperand(0);
3495 6357 : SDValue N1 = N->getOperand(1);
3496 12714 : EVT VT = N->getValueType(0);
3497 : SDLoc DL(N);
3498 :
3499 6357 : if (VT.isVector()) {
3500 : // fold (mulhu x, 0) -> 0
3501 1177 : if (ISD::isBuildVectorAllZeros(N1.getNode()))
3502 0 : return N1;
3503 1177 : if (ISD::isBuildVectorAllZeros(N0.getNode()))
3504 0 : return N0;
3505 : }
3506 :
3507 : // fold (mulhu x, 0) -> 0
3508 6357 : if (isNullConstant(N1))
3509 0 : return N1;
3510 : // fold (mulhu x, 1) -> 0
3511 6357 : if (isOneConstant(N1))
3512 0 : return DAG.getConstant(0, DL, N0.getValueType());
3513 : // fold (mulhu x, undef) -> 0
3514 6357 : if (N0.isUndef() || N1.isUndef())
3515 0 : return DAG.getConstant(0, DL, VT);
3516 :
3517 : // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3518 8279 : if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3519 6495 : DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3520 : SDLoc DL(N);
3521 : unsigned NumEltBits = VT.getScalarSizeInBits();
3522 79 : SDValue LogBase2 = BuildLogBase2(N1, DL);
3523 79 : SDValue SRLAmt = DAG.getNode(
3524 79 : ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3525 79 : EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3526 79 : SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3527 158 : return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3528 : }
3529 :
3530 : // If the type twice as wide is legal, transform the mulhu to a wider multiply
3531 : // plus a shift.
3532 6278 : if (VT.isSimple() && !VT.isVector()) {
3533 5180 : MVT Simple = VT.getSimpleVT();
3534 5180 : unsigned SimpleSize = Simple.getSizeInBits();
3535 5180 : EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3536 5180 : if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3537 484 : N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3538 484 : N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3539 484 : N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3540 242 : N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3541 : DAG.getConstant(SimpleSize, DL,
3542 242 : getShiftAmountTy(N1.getValueType())));
3543 484 : return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3544 : }
3545 : }
3546 :
3547 6036 : return SDValue();
3548 : }
3549 :
3550 : /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3551 : /// give the opcodes for the two computations that are being performed. Return
3552 : /// true if a simplification was made.
3553 6336 : SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3554 : unsigned HiOp) {
3555 : // If the high half is not needed, just compute the low half.
3556 6336 : bool HiExists = N->hasAnyUseOfValue(1);
3557 6336 : if (!HiExists &&
3558 36 : (!LegalOperations ||
3559 20 : TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3560 48 : SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3561 : return CombineTo(N, Res, Res);
3562 : }
3563 :
3564 : // If the low half is not needed, just compute the high half.
3565 6320 : bool LoExists = N->hasAnyUseOfValue(0);
3566 6320 : if (!LoExists &&
3567 1411 : (!LegalOperations ||
3568 716 : TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3569 2276 : SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3570 : return CombineTo(N, Res, Res);
3571 : }
3572 :
3573 : // If both halves are used, return as it is.
3574 5625 : if (LoExists && HiExists)
3575 4899 : return SDValue();
3576 :
3577 : // If the two computed results can be simplified separately, separate them.
3578 726 : if (LoExists) {
3579 20 : SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3580 10 : AddToWorklist(Lo.getNode());
3581 10 : SDValue LoOpt = combine(Lo.getNode());
3582 10 : if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3583 0 : (!LegalOperations ||
3584 0 : TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3585 0 : return CombineTo(N, LoOpt, LoOpt);
3586 : }
3587 :
3588 726 : if (HiExists) {
3589 1580 : SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3590 716 : AddToWorklist(Hi.getNode());
3591 716 : SDValue HiOpt = combine(Hi.getNode());
3592 716 : if (HiOpt.getNode() && HiOpt != Hi &&
3593 0 : (!LegalOperations ||
3594 0 : TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3595 0 : return CombineTo(N, HiOpt, HiOpt);
3596 : }
3597 :
3598 726 : return SDValue();
3599 : }
3600 :
3601 723 : SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3602 723 : if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3603 179 : return Res;
3604 :
3605 1088 : EVT VT = N->getValueType(0);
3606 : SDLoc DL(N);
3607 :
3608 : // If the type is twice as wide is legal, transform the mulhu to a wider
3609 : // multiply plus a shift.
3610 544 : if (VT.isSimple() && !VT.isVector()) {
3611 544 : MVT Simple = VT.getSimpleVT();
3612 544 : unsigned SimpleSize = Simple.getSizeInBits();
3613 544 : EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3614 544 : if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3615 0 : SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3616 0 : SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3617 0 : Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3618 : // Compute the high part as N1.
3619 0 : Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3620 : DAG.getConstant(SimpleSize, DL,
3621 0 : getShiftAmountTy(Lo.getValueType())));
3622 0 : Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3623 : // Compute the low part as N0.
3624 0 : Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3625 : return CombineTo(N, Lo, Hi);
3626 : }
3627 : }
3628 :
3629 544 : return SDValue();
3630 : }
3631 :
3632 5613 : SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3633 5613 : if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3634 532 : return Res;
3635 :
3636 10162 : EVT VT = N->getValueType(0);
3637 : SDLoc DL(N);
3638 :
3639 : // If the type is twice as wide is legal, transform the mulhu to a wider
3640 : // multiply plus a shift.
3641 5081 : if (VT.isSimple() && !VT.isVector()) {
3642 5081 : MVT Simple = VT.getSimpleVT();
3643 5081 : unsigned SimpleSize = Simple.getSizeInBits();
3644 5081 : EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3645 5081 : if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3646 0 : SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3647 0 : SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3648 0 : Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3649 : // Compute the high part as N1.
3650 0 : Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3651 : DAG.getConstant(SimpleSize, DL,
3652 0 : getShiftAmountTy(Lo.getValueType())));
3653 0 : Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3654 : // Compute the low part as N0.
3655 0 : Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3656 : return CombineTo(N, Lo, Hi);
3657 : }
3658 : }
3659 :
3660 5081 : return SDValue();
3661 : }
3662 :
3663 0 : SDValue DAGCombiner::visitSMULO(SDNode *N) {
3664 : // (smulo x, 2) -> (saddo x, x)
3665 0 : if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3666 0 : if (C2->getAPIntValue() == 2)
3667 0 : return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3668 0 : N->getOperand(0), N->getOperand(0));
3669 :
3670 0 : return SDValue();
3671 : }
3672 :
3673 0 : SDValue DAGCombiner::visitUMULO(SDNode *N) {
3674 : // (umulo x, 2) -> (uaddo x, x)
3675 0 : if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3676 0 : if (C2->getAPIntValue() == 2)
3677 0 : return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3678 0 : N->getOperand(0), N->getOperand(0));
3679 :
3680 0 : return SDValue();
3681 : }
3682 :
3683 25437 : SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3684 25437 : SDValue N0 = N->getOperand(0);
3685 25437 : SDValue N1 = N->getOperand(1);
3686 25437 : EVT VT = N0.getValueType();
3687 :
3688 : // fold vector ops
3689 25437 : if (VT.isVector())
3690 20293 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
3691 392 : return FoldedVOp;
3692 :
3693 : // fold operation with constant operands.
3694 : ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3695 : ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3696 25045 : if (N0C && N1C)
3697 0 : return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3698 :
3699 : // canonicalize constant to RHS
3700 25055 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3701 10 : !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3702 20 : return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3703 :
3704 : // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3705 : // Only do this if the current op isn't legal and the flipped is.
3706 25035 : unsigned Opcode = N->getOpcode();
3707 25035 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3708 5455 : if (!TLI.isOperationLegal(Opcode, VT) &&
3709 5455 : (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3710 52 : (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3711 : unsigned AltOpcode;
3712 : switch (Opcode) {
3713 : case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3714 : case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3715 : case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3716 : case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3717 0 : default: llvm_unreachable("Unknown MINMAX opcode");
3718 : }
3719 : if (TLI.isOperationLegal(AltOpcode, VT))
3720 10 : return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3721 : }
3722 :
3723 25030 : return SDValue();
3724 : }
3725 :
3726 : /// If this is a binary operator with two operands of the same opcode, try to
3727 : /// simplify it.
3728 66791 : SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3729 66791 : SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3730 66791 : EVT VT = N0.getValueType();
3731 : assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3732 :
3733 : // Bail early if none of these transforms apply.
3734 66791 : if (N0.getNumOperands() == 0) return SDValue();
3735 :
3736 : // For each of OP in AND/OR/XOR:
3737 : // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3738 : // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3739 : // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3740 : // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3741 : // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3742 : //
3743 : // do not sink logical op inside of a vector extend, since it may combine
3744 : // into a vsetcc.
3745 133546 : EVT Op0VT = N0.getOperand(0).getValueType();
3746 66611 : if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3747 66530 : N0.getOpcode() == ISD::SIGN_EXTEND ||
3748 66530 : N0.getOpcode() == ISD::BSWAP ||
3749 : // Avoid infinite looping with PromoteIntBinOp.
3750 1255 : (N0.getOpcode() == ISD::ANY_EXTEND &&
3751 66530 : (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3752 1093 : (N0.getOpcode() == ISD::TRUNCATE &&
3753 1374 : (!TLI.isZExtFree(VT, Op0VT) ||
3754 281 : !TLI.isTruncateFree(Op0VT, VT)) &&
3755 1859 : TLI.isTypeLegal(Op0VT))) &&
3756 9 : !VT.isVector() &&
3757 67629 : Op0VT == N1.getOperand(0).getValueType() &&
3758 834 : (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3759 834 : SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3760 : N0.getOperand(0).getValueType(),
3761 852 : N0.getOperand(0), N1.getOperand(0));
3762 834 : AddToWorklist(ORNode.getNode());
3763 1681 : return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3764 : }
3765 :
3766 : // For each of OP in SHL/SRL/SRA/AND...
3767 : // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3768 : // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3769 : // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3770 65690 : if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3771 131344 : N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3772 8717 : N0.getOperand(1) == N1.getOperand(1)) {
3773 99 : SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3774 : N0.getOperand(0).getValueType(),
3775 103 : N0.getOperand(0), N1.getOperand(0));
3776 99 : AddToWorklist(ORNode.getNode());
3777 99 : return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3778 198 : ORNode, N0.getOperand(1));
3779 : }
3780 :
3781 : // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3782 : // Only perform this optimization up until type legalization, before
3783 : // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3784 : // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3785 : // we don't want to undo this promotion.
3786 : // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3787 : // on scalars.
3788 37630 : if ((N0.getOpcode() == ISD::BITCAST ||
3789 65840 : N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3790 28215 : Level <= AfterLegalizeTypes) {
3791 697 : SDValue In0 = N0.getOperand(0);
3792 697 : SDValue In1 = N1.getOperand(0);
3793 697 : EVT In0Ty = In0.getValueType();
3794 697 : EVT In1Ty = In1.getValueType();
3795 : SDLoc DL(N);
3796 : // If both incoming values are integers, and the original types are the
3797 : // same.
3798 727 : if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3799 354 : SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3800 236 : SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3801 118 : AddToWorklist(Op.getNode());
3802 118 : return BC;
3803 : }
3804 : }
3805 :
3806 : // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3807 : // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3808 : // If both shuffles use the same mask, and both shuffle within a single
3809 : // vector, then it is worthwhile to move the swizzle after the operation.
3810 : // The type-legalizer generates this pattern when loading illegal
3811 : // vector types from memory. In many cases this allows additional shuffle
3812 : // optimizations.
3813 : // There are other cases where moving the shuffle after the xor/and/or
3814 : // is profitable even if shuffles don't perform a swizzle.
3815 : // If both shuffles use the same mask, and both shuffles have the same first
3816 : // or second operand, then it might still be profitable to move the shuffle
3817 : // after the xor/and/or operation.
3818 65722 : if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3819 : ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3820 : ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3821 :
3822 : assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3823 : "Inputs to shuffles are not the same type");
3824 :
3825 : // Check that both shuffles use the same mask. The masks are known to be of
3826 : // the same length because the result vector type is the same.
3827 : // Check also that shuffles have only one use to avoid introducing extra
3828 : // instructions.
3829 1065 : if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3830 1124 : SVN0->getMask().equals(SVN1->getMask())) {
3831 166 : SDValue ShOp = N0->getOperand(1);
3832 :
3833 : // Don't try to fold this node if it requires introducing a
3834 : // build vector of all zeros that might be illegal at this stage.
3835 166 : if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3836 26 : if (!LegalTypes)
3837 24 : ShOp = DAG.getConstant(0, SDLoc(N), VT);
3838 : else
3839 2 : ShOp = SDValue();
3840 : }
3841 :
3842 : // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3843 : // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
3844 : // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3845 240 : if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3846 74 : SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3847 148 : N0->getOperand(0), N1->getOperand(0));
3848 74 : AddToWorklist(NewNode.getNode());
3849 74 : return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3850 148 : SVN0->getMask());
3851 : }
3852 :
3853 : // Don't try to fold this node if it requires introducing a
3854 : // build vector of all zeros that might be illegal at this stage.
3855 92 : ShOp = N0->getOperand(0);
3856 92 : if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3857 14 : if (!LegalTypes)
3858 12 : ShOp = DAG.getConstant(0, SDLoc(N), VT);
3859 : else
3860 2 : ShOp = SDValue();
3861 : }
3862 :
3863 : // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3864 : // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
3865 : // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3866 128 : if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3867 36 : SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3868 72 : N0->getOperand(1), N1->getOperand(1));
3869 36 : AddToWorklist(NewNode.getNode());
3870 36 : return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3871 72 : SVN0->getMask());
3872 : }
3873 : }
3874 : }
3875 :
3876 65612 : return SDValue();
3877 : }
3878 :
3879 : /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3880 521342 : SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3881 : const SDLoc &DL) {
3882 521342 : SDValue LL, LR, RL, RR, N0CC, N1CC;
3883 609702 : if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3884 88360 : !isSetCCEquivalent(N1, RL, RR, N1CC))
3885 518381 : return SDValue();
3886 :
3887 : assert(N0.getValueType() == N1.getValueType() &&
3888 : "Unexpected operand types for bitwise logic op");
3889 : assert(LL.getValueType() == LR.getValueType() &&
3890 : RL.getValueType() == RR.getValueType() &&
3891 : "Unexpected operand types for setcc");
3892 :
3893 : // If we're here post-legalization or the logic op type is not i1, the logic
3894 : // op type must match a setcc result type. Also, all folds require new
3895 : // operations on the left and right operands, so those types must match.
3896 2961 : EVT VT = N0.getValueType();
3897 2961 : EVT OpVT = LL.getValueType();
3898 2961 : if (LegalOperations || VT.getScalarType() != MVT::i1)
3899 1500 : if (VT != getSetCCResultType(OpVT))
3900 24 : return SDValue();
3901 5882 : if (OpVT != RL.getValueType())
3902 109 : return SDValue();
3903 :
3904 2828 : ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3905 2828 : ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3906 2828 : bool IsInteger = OpVT.isInteger();
3907 3337 : if (LR == RR && CC0 == CC1 && IsInteger) {
3908 271 : bool IsZero = isNullConstantOrNullSplatConstant(LR);
3909 271 : bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3910 :
3911 : // All bits clear?
3912 271 : bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3913 : // All sign bits clear?
3914 271 : bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3915 : // Any bits set?
3916 271 : bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3917 : // Any sign bits set?
3918 271 : bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3919 :
3920 : // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3921 : // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3922 : // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3923 : // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3924 271 : if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3925 72 : SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3926 69 : AddToWorklist(Or.getNode());
3927 69 : return DAG.getSetCC(DL, VT, Or, LR, CC1);
3928 : }
3929 :
3930 : // All bits set?
3931 202 : bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3932 : // All sign bits set?
3933 202 : bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3934 : // Any bits clear?
3935 202 : bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3936 : // Any sign bits clear?
3937 202 : bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3938 :
3939 : // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3940 : // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3941 : // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3942 : // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3943 202 : if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3944 22 : SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3945 22 : AddToWorklist(And.getNode());
3946 22 : return DAG.getSetCC(DL, VT, And, LR, CC1);
3947 : }
3948 : }
3949 :
3950 : // TODO: What is the 'or' equivalent of this fold?
3951 : // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3952 2115 : if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3953 2824 : IsInteger && CC0 == ISD::SETNE &&
3954 70 : ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3955 38 : (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3956 3 : SDValue One = DAG.getConstant(1, DL, OpVT);
3957 3 : SDValue Two = DAG.getConstant(2, DL, OpVT);
3958 3 : SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3959 3 : AddToWorklist(Add.getNode());
3960 3 : return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3961 : }
3962 :
3963 : // Try more general transforms if the predicates match and the only user of
3964 : // the compares is the 'and' or 'or'.
3965 2252 : if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3966 2966 : N0.hasOneUse() && N1.hasOneUse()) {
3967 : // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3968 : // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3969 232 : if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3970 81 : SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3971 81 : SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3972 158 : SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3973 79 : SDValue Zero = DAG.getConstant(0, DL, OpVT);
3974 79 : return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3975 : }
3976 : }
3977 :
3978 : // Canonicalize equivalent operands to LL == RL.
3979 2695 : if (LL == RR && LR == RL) {
3980 0 : CC1 = ISD::getSetCCSwappedOperands(CC1);
3981 : std::swap(RL, RR);
3982 : }
3983 :
3984 : // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3985 : // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3986 3292 : if (LL == RL && LR == RR) {
3987 86 : ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3988 43 : : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3989 86 : if (NewCC != ISD::SETCC_INVALID &&
3990 71 : (!LegalOperations ||
3991 39 : (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3992 : TLI.isOperationLegal(ISD::SETCC, OpVT))))
3993 32 : return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3994 : }
3995 :
3996 2623 : return SDValue();
3997 : }
3998 :
3999 : /// This contains all DAGCombine rules which reduce two values combined by
4000 : /// an And operation to a single value. This makes them reusable in the context
4001 : /// of visitSELECT(). Rules involving constants are not included as
4002 : /// visitSELECT() already handles those cases.
4003 377699 : SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4004 755398 : EVT VT = N1.getValueType();
4005 : SDLoc DL(N);
4006 :
4007 : // fold (and x, undef) -> 0
4008 377699 : if (N0.isUndef() || N1.isUndef())
4009 2 : return DAG.getConstant(0, DL, VT);
4010 :
4011 377697 : if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4012 93 : return V;
4013 :
4014 377604 : if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4015 280 : VT.getSizeInBits() <= 64) {
4016 : if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4017 : if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4018 : // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4019 : // immediate for an add, but it is legal if its top c2 bits are set,
4020 : // transform the ADD so the immediate doesn't need to be materialized
4021 : // in a register.
4022 62 : APInt ADDC = ADDI->getAPIntValue();
4023 62 : APInt SRLC = SRLI->getAPIntValue();
4024 124 : if (ADDC.getMinSignedBits() <= 64 &&
4025 123 : SRLC.ult(VT.getSizeInBits()) &&
4026 122 : !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4027 : APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4028 13 : SRLC.getZExtValue());
4029 26 : if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4030 : ADDC |= Mask;
4031 26 : if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4032 : SDLoc DL0(N0);
4033 : SDValue NewAdd =
4034 13 : DAG.getNode(ISD::ADD, DL0, VT,
4035 13 : N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4036 13 : CombineTo(N0.getNode(), NewAdd);
4037 : // Return N so it doesn't get rechecked!
4038 13 : return SDValue(N, 0);
4039 : }
4040 : }
4041 : }
4042 : }
4043 : }
4044 : }
4045 :
4046 : // Reduce bit extract of low half of an integer to the narrower type.
4047 : // (and (srl i64:x, K), KMask) ->
4048 : // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4049 405727 : if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4050 : if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4051 : if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4052 19572 : unsigned Size = VT.getSizeInBits();
4053 19572 : const APInt &AndMask = CAnd->getAPIntValue();
4054 19572 : unsigned ShiftBits = CShift->getZExtValue();
4055 :
4056 : // Bail out, this node will probably disappear anyway.
4057 19572 : if (ShiftBits == 0)
4058 2 : return SDValue();
4059 :
4060 : unsigned MaskBits = AndMask.countTrailingOnes();
4061 19570 : EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4062 :
4063 36135 : if (AndMask.isMask() &&
4064 : // Required bits must not span the two halves of the integer and
4065 : // must fit in the half size type.
4066 25745 : (ShiftBits + MaskBits <= Size / 2) &&
4067 9303 : TLI.isNarrowingProfitable(VT, HalfVT) &&
4068 246 : TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4069 246 : TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4070 19816 : TLI.isTruncateFree(VT, HalfVT) &&
4071 123 : TLI.isZExtFree(HalfVT, VT)) {
4072 : // The isNarrowingProfitable is to avoid regressions on PPC and
4073 : // AArch64 which match a few 64-bit bit insert / bit extract patterns
4074 : // on downstream users of this. Those patterns could probably be
4075 : // extended to handle extensions mixed in.
4076 :
4077 : SDValue SL(N0);
4078 : assert(MaskBits <= Size);
4079 :
4080 : // Extracting the highest bit of the low half.
4081 116 : EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4082 116 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4083 116 : N0.getOperand(0));
4084 :
4085 278 : SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4086 162 : SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4087 162 : SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4088 162 : SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4089 278 : return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4090 : }
4091 : }
4092 : }
4093 : }
4094 :
4095 377473 : return SDValue();
4096 : }
4097 :
4098 0 : bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4099 : EVT LoadResultTy, EVT &ExtVT) {
4100 0 : if (!AndC->getAPIntValue().isMask())
4101 0 : return false;
4102 :
4103 : unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4104 :
4105 0 : ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4106 0 : EVT LoadedVT = LoadN->getMemoryVT();
4107 :
4108 0 : if (ExtVT == LoadedVT &&
4109 0 : (!LegalOperations ||
4110 0 : TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4111 : // ZEXTLOAD will match without needing to change the size of the value being
4112 : // loaded.
4113 0 : return true;
4114 : }
4115 :
4116 : // Do not change the width of a volatile load.
4117 0 : if (LoadN->isVolatile())
4118 0 : return false;
4119 :
4120 : // Do not generate loads of non-round integer types since these can
4121 : // be expensive (and would be wrong if the type is not byte sized).
4122 0 : if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4123 0 : return false;
4124 :
4125 0 : if (LegalOperations &&
4126 0 : !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4127 0 : return false;
4128 :
4129 0 : if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4130 0 : return false;
4131 :
4132 : return true;
4133 : }
4134 :
4135 149739 : bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4136 : ISD::LoadExtType ExtType, EVT &MemVT,
4137 : unsigned ShAmt) {
4138 149739 : if (!LDST)
4139 : return false;
4140 : // Only allow byte offsets.
4141 149739 : if (ShAmt % 8)
4142 : return false;
4143 :
4144 : // Do not generate loads of non-round integer types since these can
4145 : // be expensive (and would be wrong if the type is not byte sized).
4146 : if (!MemVT.isRound())
4147 : return false;
4148 :
4149 : // Don't change the width of a volatile load.
4150 54685 : if (LDST->isVolatile())
4151 : return false;
4152 :
4153 : // Verify that we are actually reducing a load width here.
4154 53804 : if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4155 : return false;
4156 :
4157 : // Ensure that this isn't going to produce an unsupported unaligned access.
4158 65082 : if (ShAmt &&
4159 11741 : !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4160 : LDST->getAddressSpace(), ShAmt / 8))
4161 : return false;
4162 :
4163 : // It's not possible to generate a constant of extended or untyped type.
4164 53335 : EVT PtrType = LDST->getBasePtr().getValueType();
4165 53335 : if (PtrType == MVT::Untyped || PtrType.isExtended())
4166 : return false;
4167 :
4168 53335 : if (isa<LoadSDNode>(LDST)) {
4169 : LoadSDNode *Load = cast<LoadSDNode>(LDST);
4170 : // Don't transform one with multiple uses, this would require adding a new
4171 : // load.
4172 53335 : if (!SDValue(Load, 0).hasOneUse())
4173 : return false;
4174 :
4175 22761 : if (LegalOperations &&
4176 18448 : !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4177 : return false;
4178 :
4179 : // For the transform to be legal, the load must produce only two values
4180 : // (the value loaded and the chain). Don't transform a pre-increment
4181 : // load, for example, which produces an extra value. Otherwise the
4182 : // transformation is not equivalent, and the downstream logic to replace
4183 : // uses gets things wrong.
4184 5154 : if (Load->getNumValues() > 2)
4185 : return false;
4186 :
4187 : // If the load that we're shrinking is an extload and we're not just
4188 : // discarding the extension we can't simply shrink the load. Bail.
4189 : // TODO: It would be possible to merge the extensions in some cases.
4190 5154 : if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4191 848 : Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4192 1 : return false;
4193 :
4194 5153 : if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4195 1874 : return false;
4196 : } else {
4197 : assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4198 : StoreSDNode *Store = cast<StoreSDNode>(LDST);
4199 : // Can't write outside the original store
4200 0 : if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4201 : return false;
4202 :
4203 0 : if (LegalOperations &&
4204 0 : !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4205 0 : return false;
4206 : }
4207 : return true;
4208 : }
4209 :
4210 200374 : bool DAGCombiner::SearchForAndLoads(SDNode *N,
4211 : SmallPtrSetImpl<LoadSDNode*> &Loads,
4212 : SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4213 : ConstantSDNode *Mask,
4214 : SDNode *&NodeToMask) {
4215 : // Recursively search for the operands, looking for loads which can be
4216 : // narrowed.
4217 553516 : for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4218 754452 : SDValue Op = N->getOperand(i);
4219 :
4220 754452 : if (Op.getValueType().isVector())
4221 : return false;
4222 :
4223 : // Some constants may need fixing up later if they are too large.
4224 : if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4225 353170 : if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4226 179238 : (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4227 89 : NodesWithConsts.insert(N);
4228 176206 : continue;
4229 : }
4230 :
4231 201020 : if (!Op.hasOneUse())
4232 : return false;
4233 :
4234 182599 : switch(Op.getOpcode()) {
4235 : case ISD::LOAD: {
4236 : auto *Load = cast<LoadSDNode>(Op);
4237 1278 : EVT ExtVT;
4238 2726 : if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4239 170 : isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4240 :
4241 : // ZEXTLOAD is already small enough.
4242 159 : if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4243 13 : ExtVT.bitsGE(Load->getMemoryVT()))
4244 159 : continue;
4245 :
4246 : // Use LE to convert equal sized loads to zext.
4247 150 : if (ExtVT.bitsLE(Load->getMemoryVT()))
4248 150 : Loads.insert(Load);
4249 :
4250 150 : continue;
4251 : }
4252 1119 : return false;
4253 : }
4254 1730 : case ISD::ZERO_EXTEND:
4255 : case ISD::AssertZext: {
4256 1730 : unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4257 1730 : EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4258 : EVT VT = Op.getOpcode() == ISD::AssertZext ?
4259 1168 : cast<VTSDNode>(Op.getOperand(1))->getVT() :
4260 1730 : Op.getOperand(0).getValueType();
4261 :
4262 : // We can accept extending nodes if the mask is wider or an equal
4263 : // width to the original type.
4264 1730 : if (ExtVT.bitsGE(VT))
4265 1162 : continue;
4266 568 : break;
4267 : }
4268 4418 : case ISD::OR:
4269 : case ISD::XOR:
4270 : case ISD::AND:
4271 4418 : if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4272 : NodeToMask))
4273 : return false;
4274 : continue;
4275 : }
4276 :
4277 : // Allow one node which will masked along with any loads found.
4278 175741 : if (NodeToMask)
4279 : return false;
4280 :
4281 : // Also ensure that the node to be masked only produces one data result.
4282 175345 : NodeToMask = Op.getNode();
4283 350690 : if (NodeToMask->getNumValues() > 1) {
4284 : bool HasValue = false;
4285 20927 : for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4286 : MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4287 14747 : if (VT != MVT::Glue && VT != MVT::Other) {
4288 7734 : if (HasValue) {
4289 777 : NodeToMask = nullptr;
4290 : return false;
4291 : }
4292 : HasValue = true;
4293 : }
4294 : }
4295 : assert(HasValue && "Node to be masked has no data result?");
4296 : }
4297 : }
4298 : return true;
4299 : }
4300 :
4301 308037 : bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4302 308037 : auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4303 : if (!Mask)
4304 : return false;
4305 :
4306 502786 : if (!Mask->getAPIntValue().isMask())
4307 : return false;
4308 :
4309 : // No need to do anything if the and directly uses a load.
4310 228221 : if (isa<LoadSDNode>(N->getOperand(0)))
4311 : return false;
4312 :
4313 : SmallPtrSet<LoadSDNode*, 8> Loads;
4314 : SmallPtrSet<SDNode*, 2> NodesWithConsts;
4315 195956 : SDNode *FixupNode = nullptr;
4316 195956 : if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4317 175243 : if (Loads.size() == 0)
4318 : return false;
4319 :
4320 : LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4321 88 : SDValue MaskOp = N->getOperand(1);
4322 :
4323 : // If it exists, fixup the single node we allow in the tree that needs
4324 : // masking.
4325 88 : if (FixupNode) {
4326 : LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4327 26 : SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4328 : FixupNode->getValueType(0),
4329 26 : SDValue(FixupNode, 0), MaskOp);
4330 52 : DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4331 26 : if (And.getOpcode() == ISD ::AND)
4332 50 : DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4333 : }
4334 :
4335 : // Narrow any constants that need it.
4336 102 : for (auto *LogicN : NodesWithConsts) {
4337 14 : SDValue Op0 = LogicN->getOperand(0);
4338 14 : SDValue Op1 = LogicN->getOperand(1);
4339 :
4340 : if (isa<ConstantSDNode>(Op0))
4341 : std::swap(Op0, Op1);
4342 :
4343 14 : SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4344 14 : Op1, MaskOp);
4345 :
4346 14 : DAG.UpdateNodeOperands(LogicN, Op0, And);
4347 : }
4348 :
4349 : // Create narrow loads.
4350 233 : for (auto *Load : Loads) {
4351 : LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4352 145 : SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4353 145 : SDValue(Load, 0), MaskOp);
4354 145 : DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4355 145 : if (And.getOpcode() == ISD ::AND)
4356 145 : And = SDValue(
4357 : DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4358 145 : SDValue NewLoad = ReduceLoadWidth(And.getNode());
4359 : assert(NewLoad &&
4360 : "Shouldn't be masking the load if it can't be narrowed");
4361 : CombineTo(Load, NewLoad, NewLoad.getValue(1));
4362 : }
4363 88 : DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4364 88 : return true;
4365 : }
4366 : return false;
4367 : }
4368 :
4369 : // Unfold
4370 : // x & (-1 'logical shift' y)
4371 : // To
4372 : // (x 'opposite logical shift' y) 'logical shift' y
4373 : // if it is better for performance.
4374 0 : SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4375 : assert(N->getOpcode() == ISD::AND);
4376 :
4377 0 : SDValue N0 = N->getOperand(0);
4378 0 : SDValue N1 = N->getOperand(1);
4379 :
4380 : // Do we actually prefer shifts over mask?
4381 0 : if (!TLI.preferShiftsToClearExtremeBits(N0))
4382 0 : return SDValue();
4383 :
4384 : // Try to match (-1 '[outer] logical shift' y)
4385 : unsigned OuterShift;
4386 : unsigned InnerShift; // The opposite direction to the OuterShift.
4387 0 : SDValue Y; // Shift amount.
4388 : auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4389 : if (!M.hasOneUse())
4390 : return false;
4391 : OuterShift = M->getOpcode();
4392 : if (OuterShift == ISD::SHL)
4393 : InnerShift = ISD::SRL;
4394 : else if (OuterShift == ISD::SRL)
4395 : InnerShift = ISD::SHL;
4396 : else
4397 : return false;
4398 : if (!isAllOnesConstant(M->getOperand(0)))
4399 : return false;
4400 : Y = M->getOperand(1);
4401 : return true;
4402 0 : };
4403 :
4404 0 : SDValue X;
4405 0 : if (matchMask(N1))
4406 0 : X = N0;
4407 0 : else if (matchMask(N0))
4408 0 : X = N1;
4409 : else
4410 0 : return SDValue();
4411 :
4412 : SDLoc DL(N);
4413 0 : EVT VT = N->getValueType(0);
4414 :
4415 : // tmp = x 'opposite logical shift' y
4416 0 : SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4417 : // ret = tmp 'logical shift' y
4418 0 : SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4419 :
4420 0 : return T1;
4421 : }
4422 :
4423 400002 : SDValue DAGCombiner::visitAND(SDNode *N) {
4424 400002 : SDValue N0 = N->getOperand(0);
4425 400002 : SDValue N1 = N->getOperand(1);
4426 400002 : EVT VT = N1.getValueType();
4427 :
4428 : // x & x --> x
4429 400002 : if (N0 == N1)
4430 21 : return N0;
4431 :
4432 : // fold vector ops
4433 399981 : if (VT.isVector()) {
4434 54116 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
4435 10 : return FoldedVOp;
4436 :
4437 : // fold (and x, 0) -> 0, vector edition
4438 54106 : if (ISD::isBuildVectorAllZeros(N0.getNode()))
4439 : // do not return N0, because undef node may exist in N0
4440 126 : return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4441 189 : SDLoc(N), N0.getValueType());
4442 54043 : if (ISD::isBuildVectorAllZeros(N1.getNode()))
4443 : // do not return N1, because undef node may exist in N1
4444 4 : return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4445 6 : SDLoc(N), N1.getValueType());
4446 :
4447 : // fold (and x, -1) -> x, vector edition
4448 54041 : if (ISD::isBuildVectorAllOnes(N0.getNode()))
4449 51 : return N1;
4450 53990 : if (ISD::isBuildVectorAllOnes(N1.getNode()))
4451 7 : return N0;
4452 : }
4453 :
4454 : // fold (and c1, c2) -> c1&c2
4455 399848 : ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4456 399848 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
4457 399848 : if (N0C && N1C && !N1C->isOpaque())
4458 44 : return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4459 : // canonicalize constant to RHS
4460 400440 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4461 614 : !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4462 1204 : return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4463 : // fold (and x, -1) -> x
4464 399224 : if (isAllOnesConstant(N1))
4465 11 : return N0;
4466 : // if (and x, c) is known to be zero, return 0
4467 : unsigned BitWidth = VT.getScalarSizeInBits();
4468 732794 : if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4469 1066316 : APInt::getAllOnesValue(BitWidth)))
4470 118 : return DAG.getConstant(0, SDLoc(N), VT);
4471 :
4472 399154 : if (SDValue NewSel = foldBinOpIntoSelect(N))
4473 190 : return NewSel;
4474 :
4475 : // reassociate and
4476 544076 : if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4477 2044 : return RAND;
4478 :
4479 : // Try to convert a constant mask AND into a shuffle clear mask.
4480 396920 : if (VT.isVector())
4481 53297 : if (SDValue Shuffle = XformToShuffleWithZero(N))
4482 1089 : return Shuffle;
4483 :
4484 : // fold (and (or x, C), D) -> D if (C & D) == D
4485 : auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4486 : return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4487 : };
4488 802719 : if (N0.getOpcode() == ISD::OR &&
4489 415656 : ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4490 2289 : return N1;
4491 : // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4492 393542 : if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4493 68021 : SDValue N0Op0 = N0.getOperand(0);
4494 68021 : APInt Mask = ~N1C->getAPIntValue();
4495 68021 : Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4496 68021 : if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4497 13463 : SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4498 27189 : N0.getValueType(), N0Op0);
4499 :
4500 : // Replace uses of the AND with uses of the Zero extend node.
4501 13463 : CombineTo(N, Zext);
4502 :
4503 : // We actually want to replace all uses of the any_extend with the
4504 : // zero_extend, to avoid duplicating things. This will later cause this
4505 : // AND to be folded.
4506 13463 : CombineTo(N0.getNode(), Zext);
4507 13463 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
4508 : }
4509 : }
4510 : // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4511 : // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4512 : // already be zero by virtue of the width of the base type of the load.
4513 : //
4514 : // the 'X' node here can either be nothing or an extract_vector_elt to catch
4515 : // more cases.
4516 389425 : if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4517 17636 : N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4518 16580 : N0.getOperand(0).getOpcode() == ISD::LOAD &&
4519 389425 : N0.getOperand(0).getResNo() == 0) ||
4520 376341 : (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4521 160864 : LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4522 : N0 : N0.getOperand(0) );
4523 :
4524 : // Get the constant (if applicable) the zero'th operand is being ANDed with.
4525 : // This can be a pure constant or a vector splat, in which case we treat the
4526 : // vector as a scalar and use the splat value.
4527 : APInt Constant = APInt::getNullValue(1);
4528 : if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4529 148002 : Constant = C->getAPIntValue();
4530 : } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4531 : APInt SplatValue, SplatUndef;
4532 : unsigned SplatBitSize;
4533 : bool HasAnyUndefs;
4534 1107 : bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4535 : SplatBitSize, HasAnyUndefs);
4536 1107 : if (IsSplat) {
4537 : // Undef bits can contribute to a possible optimisation if set, so
4538 : // set them.
4539 : SplatValue |= SplatUndef;
4540 :
4541 : // The splat value may be something like "0x00FFFFFF", which means 0 for
4542 : // the first vector value and FF for the rest, repeating. We need a mask
4543 : // that will apply equally to all members of the vector, so AND all the
4544 : // lanes of the constant together.
4545 2210 : EVT VT = Vector->getValueType(0);
4546 : unsigned BitWidth = VT.getScalarSizeInBits();
4547 :
4548 : // If the splat value has been compressed to a bitlength lower
4549 : // than the size of the vector lane, we need to re-expand it to
4550 : // the lane size.
4551 1105 : if (BitWidth > SplatBitSize)
4552 2 : for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4553 2 : SplatBitSize < BitWidth;
4554 1 : SplatBitSize = SplatBitSize * 2)
4555 2 : SplatValue |= SplatValue.shl(SplatBitSize);
4556 :
4557 : // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4558 : // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4559 1105 : if (SplatBitSize % BitWidth == 0) {
4560 1104 : Constant = APInt::getAllOnesValue(BitWidth);
4561 2253 : for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4562 2306 : Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4563 : }
4564 : }
4565 : }
4566 :
4567 : // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4568 : // actually legal and isn't going to get expanded, else this is a false
4569 : // optimisation.
4570 80432 : bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4571 : Load->getValueType(0),
4572 : Load->getMemoryVT());
4573 :
4574 : // Resize the constant to the same size as the original memory access before
4575 : // extension. If it is still the AllOnesValue then this AND is completely
4576 : // unneeded.
4577 160864 : Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4578 :
4579 : bool B;
4580 80432 : switch (Load->getExtensionType()) {
4581 : default: B = false; break;
4582 : case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4583 : case ISD::ZEXTLOAD:
4584 : case ISD::NON_EXTLOAD: B = true; break;
4585 : }
4586 :
4587 93092 : if (B && Constant.isAllOnesValue()) {
4588 : // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4589 : // preserve semantics once we get rid of the AND.
4590 : SDValue NewLoad(Load, 0);
4591 :
4592 : // Fold the AND away. NewLoad may get replaced immediately.
4593 4444 : CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4594 :
4595 2222 : if (Load->getExtensionType() == ISD::EXTLOAD) {
4596 1268 : NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4597 1268 : Load->getValueType(0), SDLoc(Load),
4598 : Load->getChain(), Load->getBasePtr(),
4599 : Load->getOffset(), Load->getMemoryVT(),
4600 1276 : Load->getMemOperand());
4601 : // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4602 1268 : if (Load->getNumValues() == 3) {
4603 : // PRE/POST_INC loads have 3 values.
4604 0 : SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4605 0 : NewLoad.getValue(2) };
4606 0 : CombineTo(Load, To, 3, true);
4607 : } else {
4608 1268 : CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4609 : }
4610 : }
4611 :
4612 2222 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
4613 : }
4614 : }
4615 :
4616 : // fold (and (load x), 255) -> (zextload x, i8)
4617 : // fold (and (extload x, i16), 255) -> (zextload x, i8)
4618 : // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4619 377857 : if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4620 54529 : (N0.getOpcode() == ISD::ANY_EXTEND &&
4621 54529 : N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4622 80073 : if (SDValue Res = ReduceLoadWidth(N)) {
4623 138 : LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4624 138 : ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4625 :
4626 138 : AddToWorklist(N);
4627 : CombineTo(LN0, Res, Res.getValue(1));
4628 138 : return SDValue(N, 0);
4629 : }
4630 : }
4631 :
4632 377719 : if (Level >= AfterLegalizeTypes) {
4633 : // Attempt to propagate the AND back up to the leaves which, if they're
4634 : // loads, can be combined to narrow loads and the AND node can be removed.
4635 : // Perform after legalization so that extend nodes will already be
4636 : // combined into the loads.
4637 308037 : if (BackwardsPropagateMask(N, DAG)) {
4638 88 : return SDValue(N, 0);
4639 : }
4640 : }
4641 :
4642 377631 : if (SDValue Combined = visitANDLike(N0, N1, N))
4643 218 : return Combined;
4644 :
4645 : // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4646 1132239 : if (N0.getOpcode() == N1.getOpcode())
4647 27632 : if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4648 299 : return Tmp;
4649 :
4650 : // Masking the negated extension of a boolean is just the zero-extended
4651 : // boolean:
4652 : // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4653 : // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4654 : //
4655 : // Note: the SimplifyDemandedBits fold below can make an information-losing
4656 : // transform, and then we have no way to find this better fold.
4657 689439 : if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4658 28 : if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4659 12 : SDValue SubRHS = N0.getOperand(1);
4660 9 : if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4661 3 : SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4662 3 : return SubRHS;
4663 6 : if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4664 3 : SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4665 6 : return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4666 : }
4667 : }
4668 :
4669 : // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4670 : // fold (and (sra)) -> (and (srl)) when possible.
4671 377108 : if (SimplifyDemandedBits(SDValue(N, 0)))
4672 110614 : return SDValue(N, 0);
4673 :
4674 : // fold (zext_inreg (extload x)) -> (zextload x)
4675 266494 : if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4676 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4677 14236 : EVT MemVT = LN0->getMemoryVT();
4678 : // If we zero all the possible extended bits, then we can turn this into
4679 : // a zextload if we are running before legalize or the operation is legal.
4680 14236 : unsigned BitWidth = N1.getScalarValueSizeInBits();
4681 28472 : if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4682 14236 : BitWidth - MemVT.getScalarSizeInBits())) &&
4683 16727 : ((!LegalOperations && !LN0->isVolatile()) ||
4684 2561 : TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4685 24544 : SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4686 : LN0->getChain(), LN0->getBasePtr(),
4687 12309 : MemVT, LN0->getMemOperand());
4688 12272 : AddToWorklist(N);
4689 12272 : CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4690 12272 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
4691 : }
4692 : }
4693 : // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4694 254568 : if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4695 346 : N0.hasOneUse()) {
4696 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4697 301 : EVT MemVT = LN0->getMemoryVT();
4698 : // If we zero all the possible extended bits, then we can turn this into
4699 : // a zextload if we are running before legalize or the operation is legal.
4700 301 : unsigned BitWidth = N1.getScalarValueSizeInBits();
4701 602 : if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4702 301 : BitWidth - MemVT.getScalarSizeInBits())) &&
4703 488 : ((!LegalOperations && !LN0->isVolatile()) ||
4704 226 : TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4705 492 : SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4706 : LN0->getChain(), LN0->getBasePtr(),
4707 246 : MemVT, LN0->getMemOperand());
4708 246 : AddToWorklist(N);
4709 246 : CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4710 246 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
4711 : }
4712 : }
4713 : // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4714 253976 : if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4715 274 : if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4716 274 : N0.getOperand(1), false))
4717 1 : return BSwap;
4718 : }
4719 :
4720 253975 : if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4721 636 : return Shifts;
4722 :
4723 253339 : return SDValue();
4724 : }
4725 :
4726 : /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4727 143554 : SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4728 : bool DemandHighBits) {
4729 143554 : if (!LegalOperations)
4730 56652 : return SDValue();
4731 :
4732 173804 : EVT VT = N->getValueType(0);
4733 : if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4734 21392 : return SDValue();
4735 65510 : if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4736 14759 : return SDValue();
4737 :
4738 : // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4739 : bool LookPassAnd0 = false;
4740 : bool LookPassAnd1 = false;
4741 50751 : if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4742 : std::swap(N0, N1);
4743 50751 : if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4744 : std::swap(N0, N1);
4745 50751 : if (N0.getOpcode() == ISD::AND) {
4746 : if (!N0.getNode()->hasOneUse())
4747 205 : return SDValue();
4748 : ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4749 : // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4750 : // This is needed for X86.
4751 12808 : if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4752 : N01C->getZExtValue() != 0xFFFF))
4753 5599 : return SDValue();
4754 999 : N0 = N0.getOperand(0);
4755 : LookPassAnd0 = true;
4756 : }
4757 :
4758 44947 : if (N1.getOpcode() == ISD::AND) {
4759 : if (!N1.getNode()->hasOneUse())
4760 24 : return SDValue();
4761 : ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4762 2288 : if (!N11C || N11C->getZExtValue() != 0xFF)
4763 1052 : return SDValue();
4764 107 : N1 = N1.getOperand(0);
4765 : LookPassAnd1 = true;
4766 : }
4767 :
4768 43871 : if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4769 : std::swap(N0, N1);
4770 43871 : if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4771 42824 : return SDValue();
4772 : if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4773 14 : return SDValue();
4774 :
4775 : ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4776 : ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4777 1033 : if (!N01C || !N11C)
4778 239 : return SDValue();
4779 1625 : if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4780 774 : return SDValue();
4781 :
4782 : // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4783 20 : SDValue N00 = N0->getOperand(0);
4784 20 : if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4785 : if (!N00.getNode()->hasOneUse())
4786 0 : return SDValue();
4787 : ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4788 12 : if (!N001C || N001C->getZExtValue() != 0xFF)
4789 2 : return SDValue();
4790 4 : N00 = N00.getOperand(0);
4791 : LookPassAnd0 = true;
4792 : }
4793 :
4794 18 : SDValue N10 = N1->getOperand(0);
4795 18 : if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4796 : if (!N10.getNode()->hasOneUse())
4797 0 : return SDValue();
4798 : ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4799 : // Also allow 0xFFFF since the bits will be shifted out. This is needed
4800 : // for X86.
4801 10 : if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4802 : N101C->getZExtValue() != 0xFFFF))
4803 0 : return SDValue();
4804 5 : N10 = N10.getOperand(0);
4805 : LookPassAnd1 = true;
4806 : }
4807 :
4808 : if (N00 != N10)
4809 0 : return SDValue();
4810 :
4811 : // Make sure everything beyond the low halfword gets set to zero since the SRL
4812 : // 16 will clear the top bits.
4813 18 : unsigned OpSizeInBits = VT.getSizeInBits();
4814 18 : if (DemandHighBits && OpSizeInBits > 16) {
4815 : // If the left-shift isn't masked out then the only way this is a bswap is
4816 : // if all bits beyond the low 8 are 0. In that case the entire pattern
4817 : // reduces to a left shift anyway: leave it for other parts of the combiner.
4818 9 : if (!LookPassAnd0)
4819 2 : return SDValue();
4820 :
4821 : // However, if the right shift isn't masked out then it might be because
4822 : // it's not needed. See if we can spot that too.
4823 9 : if (!LookPassAnd1 &&
4824 2 : !DAG.MaskedValueIsZero(
4825 11 : N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4826 0 : return SDValue();
4827 : }
4828 :
4829 16 : SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4830 16 : if (OpSizeInBits > 16) {
4831 : SDLoc DL(N);
4832 16 : Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4833 16 : DAG.getConstant(OpSizeInBits - 16, DL,
4834 16 : getShiftAmountTy(VT)));
4835 : }
4836 16 : return Res;
4837 : }
4838 :
4839 : /// Return true if the specified node is an element that makes up a 32-bit
4840 : /// packed halfword byteswap.
4841 : /// ((x & 0x000000ff) << 8) |
4842 : /// ((x & 0x0000ff00) >> 8) |
4843 : /// ((x & 0x00ff0000) << 8) |
4844 : /// ((x & 0xff000000) >> 8)
4845 0 : static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4846 : if (!N.getNode()->hasOneUse())
4847 0 : return false;
4848 :
4849 : unsigned Opc = N.getOpcode();
4850 0 : if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4851 0 : return false;
4852 :
4853 0 : SDValue N0 = N.getOperand(0);
4854 : unsigned Opc0 = N0.getOpcode();
4855 0 : if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4856 0 : return false;
4857 :
4858 : ConstantSDNode *N1C = nullptr;
4859 : // SHL or SRL: look upstream for AND mask operand
4860 0 : if (Opc == ISD::AND)
4861 : N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4862 0 : else if (Opc0 == ISD::AND)
4863 : N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4864 0 : if (!N1C)
4865 0 : return false;
4866 :
4867 : unsigned MaskByteOffset;
4868 0 : switch (N1C->getZExtValue()) {
4869 : default:
4870 : return false;
4871 : case 0xFF: MaskByteOffset = 0; break;
4872 0 : case 0xFF00: MaskByteOffset = 1; break;
4873 0 : case 0xFFFF:
4874 : // In case demanded bits didn't clear the bits that will be shifted out.
4875 : // This is needed for X86.
4876 0 : if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4877 : MaskByteOffset = 1;
4878 : break;
4879 : }
4880 : return false;
4881 0 : case 0xFF0000: MaskByteOffset = 2; break;
4882 0 : case 0xFF000000: MaskByteOffset = 3; break;
4883 : }
4884 :
4885 : // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4886 0 : if (Opc == ISD::AND) {
4887 0 : if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4888 : // (x >> 8) & 0xff
4889 : // (x >> 8) & 0xff0000
4890 0 : if (Opc0 != ISD::SRL)
4891 0 : return false;
4892 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4893 0 : if (!C || C->getZExtValue() != 8)
4894 0 : return false;
4895 : } else {
4896 : // (x << 8) & 0xff00
4897 : // (x << 8) & 0xff000000
4898 0 : if (Opc0 != ISD::SHL)
4899 0 : return false;
4900 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4901 0 : if (!C || C->getZExtValue() != 8)
4902 0 : return false;
4903 : }
4904 0 : } else if (Opc == ISD::SHL) {
4905 : // (x & 0xff) << 8
4906 : // (x & 0xff0000) << 8
4907 0 : if (MaskByteOffset != 0 && MaskByteOffset != 2)
4908 0 : return false;
4909 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4910 0 : if (!C || C->getZExtValue() != 8)
4911 0 : return false;
4912 : } else { // Opc == ISD::SRL
4913 : // (x & 0xff00) >> 8
4914 : // (x & 0xff000000) >> 8
4915 0 : if (MaskByteOffset != 1 && MaskByteOffset != 3)
4916 0 : return false;
4917 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4918 0 : if (!C || C->getZExtValue() != 8)
4919 0 : return false;
4920 : }
4921 :
4922 0 : if (Parts[MaskByteOffset])
4923 0 : return false;
4924 :
4925 0 : Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4926 0 : return true;
4927 : }
4928 :
4929 : /// Match a 32-bit packed halfword bswap. That is
4930 : /// ((x & 0x000000ff) << 8) |
4931 : /// ((x & 0x0000ff00) >> 8) |
4932 : /// ((x & 0x00ff0000) << 8) |
4933 : /// ((x & 0xff000000) >> 8)
4934 : /// => (rotl (bswap x), 16)
4935 0 : SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4936 0 : if (!LegalOperations)
4937 0 : return SDValue();
4938 :
4939 0 : EVT VT = N->getValueType(0);
4940 0 : if (VT != MVT::i32)
4941 0 : return SDValue();
4942 0 : if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4943 0 : return SDValue();
4944 :
4945 : // Look for either
4946 : // (or (or (and), (and)), (or (and), (and)))
4947 : // (or (or (or (and), (and)), (and)), (and))
4948 0 : if (N0.getOpcode() != ISD::OR)
4949 0 : return SDValue();
4950 0 : SDValue N00 = N0.getOperand(0);
4951 0 : SDValue N01 = N0.getOperand(1);
4952 0 : SDNode *Parts[4] = {};
4953 :
4954 0 : if (N1.getOpcode() == ISD::OR &&
4955 0 : N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4956 : // (or (or (and), (and)), (or (and), (and)))
4957 0 : if (!isBSwapHWordElement(N00, Parts))
4958 0 : return SDValue();
4959 :
4960 0 : if (!isBSwapHWordElement(N01, Parts))
4961 0 : return SDValue();
4962 0 : SDValue N10 = N1.getOperand(0);
4963 0 : if (!isBSwapHWordElement(N10, Parts))
4964 0 : return SDValue();
4965 0 : SDValue N11 = N1.getOperand(1);
4966 0 : if (!isBSwapHWordElement(N11, Parts))
4967 0 : return SDValue();
4968 : } else {
4969 : // (or (or (or (and), (and)), (and)), (and))
4970 0 : if (!isBSwapHWordElement(N1, Parts))
4971 0 : return SDValue();
4972 0 : if (!isBSwapHWordElement(N01, Parts))
4973 0 : return SDValue();
4974 0 : if (N00.getOpcode() != ISD::OR)
4975 0 : return SDValue();
4976 0 : SDValue N000 = N00.getOperand(0);
4977 0 : if (!isBSwapHWordElement(N000, Parts))
4978 0 : return SDValue();
4979 0 : SDValue N001 = N00.getOperand(1);
4980 0 : if (!isBSwapHWordElement(N001, Parts))
4981 0 : return SDValue();
4982 : }
4983 :
4984 : // Make sure the parts are all coming from the same node.
4985 0 : if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4986 0 : return SDValue();
4987 :
4988 : SDLoc DL(N);
4989 0 : SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4990 0 : SDValue(Parts[0], 0));
4991 :
4992 : // Result of the bswap should be rotated by 16. If it's not legal, then
4993 : // do (x << 16) | (x >> 16).
4994 0 : SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4995 0 : if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4996 0 : return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4997 0 : if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4998 0 : return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4999 0 : return DAG.getNode(ISD::OR, DL, VT,
5000 0 : DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5001 0 : DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5002 : }
5003 :
5004 : /// This contains all DAGCombine rules which reduce two values combined by
5005 : /// an Or operation to a single value \see visitANDLike().
5006 143660 : SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5007 287320 : EVT VT = N1.getValueType();
5008 : SDLoc DL(N);
5009 :
5010 : // fold (or x, undef) -> -1
5011 143660 : if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5012 15 : return DAG.getAllOnesConstant(DL, VT);
5013 :
5014 143645 : if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5015 112 : return V;
5016 :
5017 : // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
5018 143533 : if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5019 : // Don't increase # computations.
5020 : (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5021 : // We can only do this xform if we know that bits from X that are set in C2
5022 : // but not in C1 are already zero. Likewise for Y.
5023 : if (const ConstantSDNode *N0O1C =
5024 8167 : getAsNonOpaqueConstant(N0.getOperand(1))) {
5025 : if (const ConstantSDNode *N1O1C =
5026 2980 : getAsNonOpaqueConstant(N1.getOperand(1))) {
5027 : // We can only do this xform if we know that bits from X that are set in
5028 : // C2 but not in C1 are already zero. Likewise for Y.
5029 2905 : const APInt &LHSMask = N0O1C->getAPIntValue();
5030 2905 : const APInt &RHSMask = N1O1C->getAPIntValue();
5031 :
5032 8954 : if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5033 4100 : DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5034 21 : SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5035 21 : N0.getOperand(0), N1.getOperand(0));
5036 21 : return DAG.getNode(ISD::AND, DL, VT, X,
5037 42 : DAG.getConstant(LHSMask | RHSMask, DL, VT));
5038 : }
5039 : }
5040 : }
5041 : }
5042 :
5043 : // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5044 25558 : if (N0.getOpcode() == ISD::AND &&
5045 : N1.getOpcode() == ISD::AND &&
5046 151670 : N0.getOperand(0) == N1.getOperand(0) &&
5047 : // Don't increase # computations.
5048 : (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5049 188 : SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5050 188 : N0.getOperand(1), N1.getOperand(1));
5051 376 : return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5052 : }
5053 :
5054 143324 : return SDValue();
5055 : }
5056 :
5057 146434 : SDValue DAGCombiner::visitOR(SDNode *N) {
5058 146434 : SDValue N0 = N->getOperand(0);
5059 146434 : SDValue N1 = N->getOperand(1);
5060 146434 : EVT VT = N1.getValueType();
5061 :
5062 : // x | x --> x
5063 : if (N0 == N1)
5064 17 : return N0;
5065 :
5066 : // fold vector ops
5067 146417 : if (VT.isVector()) {
5068 24910 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
5069 4 : return FoldedVOp;
5070 :
5071 : // fold (or x, 0) -> x, vector edition
5072 24906 : if (ISD::isBuildVectorAllZeros(N0.getNode()))
5073 22 : return N1;
5074 24884 : if (ISD::isBuildVectorAllZeros(N1.getNode()))
5075 127 : return N0;
5076 :
5077 : // fold (or x, -1) -> -1, vector edition
5078 24757 : if (ISD::isBuildVectorAllOnes(N0.getNode()))
5079 : // do not return N0, because undef node may exist in N0
5080 10 : return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5081 24752 : if (ISD::isBuildVectorAllOnes(N1.getNode()))
5082 : // do not return N1, because undef node may exist in N1
5083 2 : return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5084 :
5085 : // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5086 : // Do this only if the resulting shuffle is legal.
5087 170 : if (isa<ShuffleVectorSDNode>(N0) &&
5088 24751 : isa<ShuffleVectorSDNode>(N1) &&
5089 : // Avoid folding a node with illegal type.
5090 143 : TLI.isTypeLegal(VT)) {
5091 142 : bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5092 142 : bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5093 142 : bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5094 142 : bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5095 : // Ensure both shuffles have a zero input.
5096 142 : if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5097 : assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5098 : assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5099 : const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5100 : const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5101 : bool CanFold = true;
5102 66 : int NumElts = VT.getVectorNumElements();
5103 66 : SmallVector<int, 4> Mask(NumElts);
5104 :
5105 300 : for (int i = 0; i != NumElts; ++i) {
5106 239 : int M0 = SV0->getMaskElt(i);
5107 239 : int M1 = SV1->getMaskElt(i);
5108 :
5109 : // Determine if either index is pointing to a zero vector.
5110 239 : bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5111 239 : bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5112 :
5113 : // If one element is zero and the otherside is undef, keep undef.
5114 : // This also handles the case that both are undef.
5115 239 : if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5116 1 : Mask[i] = -1;
5117 1 : continue;
5118 : }
5119 :
5120 : // Make sure only one of the elements is zero.
5121 238 : if (M0Zero == M1Zero) {
5122 : CanFold = false;
5123 : break;
5124 : }
5125 :
5126 : assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5127 :
5128 : // We have a zero and non-zero element. If the non-zero came from
5129 : // SV0 make the index a LHS index. If it came from SV1, make it
5130 : // a RHS index. We need to mod by NumElts because we don't care
5131 : // which operand it came from in the original shuffles.
5132 233 : Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5133 : }
5134 :
5135 66 : if (CanFold) {
5136 61 : SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5137 61 : SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5138 :
5139 122 : bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5140 61 : if (!LegalMask) {
5141 : std::swap(NewLHS, NewRHS);
5142 : ShuffleVectorSDNode::commuteMask(Mask);
5143 0 : LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5144 : }
5145 :
5146 61 : if (LegalMask)
5147 122 : return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5148 : }
5149 : }
5150 : }
5151 : }
5152 :
5153 : // fold (or c1, c2) -> c1|c2
5154 : ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5155 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5156 146197 : if (N0C && N1C && !N1C->isOpaque())
5157 182 : return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5158 : // canonicalize constant to RHS
5159 147686 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5160 1580 : !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5161 3149 : return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5162 : // fold (or x, 0) -> x
5163 144532 : if (isNullConstant(N1))
5164 831 : return N0;
5165 : // fold (or x, -1) -> -1
5166 143701 : if (isAllOnesConstant(N1))
5167 85 : return N1;
5168 :
5169 143616 : if (SDValue NewSel = foldBinOpIntoSelect(N))
5170 54 : return NewSel;
5171 :
5172 : // fold (or x, c) -> c iff (x & ~c) == 0
5173 402740 : if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5174 2 : return N1;
5175 :
5176 143560 : if (SDValue Combined = visitORLike(N0, N1, N))
5177 330 : return Combined;
5178 :
5179 : // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5180 143230 : if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5181 8 : return BSwap;
5182 143222 : if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5183 7 : return BSwap;
5184 :
5185 : // reassociate or
5186 181815 : if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5187 67 : return ROR;
5188 :
5189 : // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5190 : // iff (c1 & c2) != 0.
5191 : auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5192 : return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
5193 : };
5194 168198 : if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5195 193223 : ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
5196 25 : if (SDValue COR = DAG.FoldConstantArithmetic(
5197 50 : ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5198 25 : SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5199 25 : AddToWorklist(IOR.getNode());
5200 50 : return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5201 : }
5202 : }
5203 :
5204 : // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
5205 143123 : if (N0.getOpcode() == N1.getOpcode())
5206 24813 : if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5207 424 : return Tmp;
5208 :
5209 : // See if this is some rotate idiom.
5210 285398 : if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5211 1112 : return SDValue(Rot, 0);
5212 :
5213 141587 : if (SDValue Load = MatchLoadCombine(N))
5214 184 : return Load;
5215 :
5216 : // Simplify the operands using demanded-bits information.
5217 141403 : if (SimplifyDemandedBits(SDValue(N, 0)))
5218 6353 : return SDValue(N, 0);
5219 :
5220 135050 : return SDValue();
5221 : }
5222 :
5223 265514 : static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5224 297777 : if (Op.getOpcode() == ISD::AND &&
5225 32263 : DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5226 27712 : Mask = Op.getOperand(1);
5227 27712 : return Op.getOperand(0);
5228 : }
5229 237802 : return Op;
5230 : }
5231 :
5232 : /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5233 226104 : static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5234 : SDValue &Mask) {
5235 226104 : Op = stripConstantMask(DAG, Op, Mask);
5236 226104 : if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5237 39402 : Shift = Op;
5238 39402 : return true;
5239 : }
5240 : return false;
5241 : }
5242 :
5243 : /// Helper function for visitOR to extract the needed side of a rotate idiom
5244 : /// from a shl/srl/mul/udiv. This is meant to handle cases where
5245 : /// InstCombine merged some outside op with one of the shifts from
5246 : /// the rotate pattern.
5247 : /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5248 : /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5249 : /// patterns:
5250 : ///
5251 : /// (or (mul v c0) (shrl (mul v c1) c2)):
5252 : /// expands (mul v c0) -> (shl (mul v c1) c3)
5253 : ///
5254 : /// (or (udiv v c0) (shl (udiv v c1) c2)):
5255 : /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
5256 : ///
5257 : /// (or (shl v c0) (shrl (shl v c1) c2)):
5258 : /// expands (shl v c0) -> (shl (shl v c1) c3)
5259 : ///
5260 : /// (or (shrl v c0) (shl (shrl v c1) c2)):
5261 : /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
5262 : ///
5263 : /// Such that in all cases, c3+c2==bitwidth(op v c1).
5264 0 : static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5265 : SDValue ExtractFrom, SDValue &Mask,
5266 : const SDLoc &DL) {
5267 : assert(OppShift && ExtractFrom && "Empty SDValue");
5268 : assert(
5269 : (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5270 : "Existing shift must be valid as a rotate half");
5271 :
5272 0 : ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5273 : // Preconditions:
5274 : // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5275 : //
5276 : // Find opcode of the needed shift to be extracted from (op0 v c0).
5277 0 : unsigned Opcode = ISD::DELETED_NODE;
5278 0 : bool IsMulOrDiv = false;
5279 : // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5280 : // opcode or its arithmetic (mul or udiv) variant.
5281 : auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5282 0 : IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5283 0 : if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5284 : return false;
5285 0 : Opcode = NeededShift;
5286 : return true;
5287 : };
5288 : // op0 must be either the needed shift opcode or the mul/udiv equivalent
5289 : // that the needed shift can be extracted from.
5290 0 : if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5291 : (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5292 0 : return SDValue();
5293 :
5294 : // op0 must be the same opcode on both sides, have the same LHS argument,
5295 : // and produce the same value type.
5296 0 : SDValue OppShiftLHS = OppShift.getOperand(0);
5297 0 : EVT ShiftedVT = OppShiftLHS.getValueType();
5298 0 : if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5299 0 : OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5300 0 : ShiftedVT != ExtractFrom.getValueType())
5301 0 : return SDValue();
5302 :
5303 : // Amount of the existing shift.
5304 0 : ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5305 : // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5306 0 : ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5307 : // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5308 : ConstantSDNode *ExtractFromCst =
5309 0 : isConstOrConstSplat(ExtractFrom.getOperand(1));
5310 : // TODO: We should be able to handle non-uniform constant vectors for these values
5311 : // Check that we have constant values.
5312 0 : if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5313 0 : !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5314 0 : !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5315 0 : return SDValue();
5316 :
5317 : // Compute the shift amount we need to extract to complete the rotate.
5318 : const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5319 0 : if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5320 0 : return SDValue();
5321 0 : APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5322 : // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5323 0 : APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5324 0 : APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5325 0 : zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5326 :
5327 : // Now try extract the needed shift from the ExtractFrom op and see if the
5328 : // result matches up with the existing shift's LHS op.
5329 0 : if (IsMulOrDiv) {
5330 : // Op to extract from is a mul or udiv by a constant.
5331 : // Check:
5332 : // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5333 : // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5334 : const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5335 0 : NeededShiftAmt.getZExtValue());
5336 : APInt ResultAmt;
5337 : APInt Rem;
5338 0 : APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5339 0 : if (Rem != 0 || ResultAmt != OppLHSAmt)
5340 0 : return SDValue();
5341 : } else {
5342 : // Op to extract from is a shift by a constant.
5343 : // Check:
5344 : // c2 - (bitwidth(op0 v c0) - c1) == c0
5345 0 : if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5346 : ExtractFromAmt.getBitWidth()))
5347 0 : return SDValue();
5348 : }
5349 :
5350 : // Return the expanded shift op that should allow a rotate to be formed.
5351 0 : EVT ShiftVT = OppShift.getOperand(1).getValueType();
5352 0 : EVT ResVT = ExtractFrom.getValueType();
5353 0 : SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5354 0 : return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5355 : }
5356 :
5357 : // Return true if we can prove that, whenever Neg and Pos are both in the
5358 : // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
5359 : // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5360 : //
5361 : // (or (shift1 X, Neg), (shift2 X, Pos))
5362 : //
5363 : // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5364 : // in direction shift1 by Neg. The range [0, EltSize) means that we only need
5365 : // to consider shift amounts with defined behavior.
5366 546 : static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5367 : SelectionDAG &DAG) {
5368 : // If EltSize is a power of 2 then:
5369 : //
5370 : // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5371 : // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5372 : //
5373 : // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5374 : // for the stronger condition:
5375 : //
5376 : // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
5377 : //
5378 : // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5379 : // we can just replace Neg with Neg' for the rest of the function.
5380 : //
5381 : // In other cases we check for the even stronger condition:
5382 : //
5383 : // Neg == EltSize - Pos [B]
5384 : //
5385 : // for all Neg and Pos. Note that the (or ...) then invokes undefined
5386 : // behavior if Pos == 0 (and consequently Neg == EltSize).
5387 : //
5388 : // We could actually use [A] whenever EltSize is a power of 2, but the
5389 : // only extra cases that it would match are those uninteresting ones
5390 : // where Neg and Pos are never in range at the same time. E.g. for
5391 : // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5392 : // as well as (sub 32, Pos), but:
5393 : //
5394 : // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5395 : //
5396 : // always invokes undefined behavior for 32-bit X.
5397 : //
5398 : // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5399 : unsigned MaskLoBits = 0;
5400 546 : if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5401 83 : if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5402 83 : KnownBits Known;
5403 83 : DAG.computeKnownBits(Neg.getOperand(0), Known);
5404 : unsigned Bits = Log2_64(EltSize);
5405 246 : if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5406 243 : ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5407 78 : Neg = Neg.getOperand(0);
5408 : MaskLoBits = Bits;
5409 : }
5410 : }
5411 : }
5412 :
5413 : // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5414 546 : if (Neg.getOpcode() != ISD::SUB)
5415 : return false;
5416 400 : ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5417 400 : if (!NegC)
5418 : return false;
5419 400 : SDValue NegOp1 = Neg.getOperand(1);
5420 :
5421 : // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5422 : // Pos'. The truncation is redundant for the purpose of the equality.
5423 400 : if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5424 54 : if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5425 54 : KnownBits Known;
5426 54 : DAG.computeKnownBits(Pos.getOperand(0), Known);
5427 162 : if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5428 162 : ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5429 : MaskLoBits))
5430 42 : Pos = Pos.getOperand(0);
5431 : }
5432 : }
5433 :
5434 : // The condition we need is now:
5435 : //
5436 : // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5437 : //
5438 : // If NegOp1 == Pos then we need:
5439 : //
5440 : // EltSize & Mask == NegC & Mask
5441 : //
5442 : // (because "x & Mask" is a truncation and distributes through subtraction).
5443 : APInt Width;
5444 : if (Pos == NegOp1)
5445 780 : Width = NegC->getAPIntValue();
5446 :
5447 : // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5448 : // Then the condition we want to prove becomes:
5449 : //
5450 : // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5451 : //
5452 : // which, again because "x & Mask" is a truncation, becomes:
5453 : //
5454 : // NegC & Mask == (EltSize - PosC) & Mask
5455 : // EltSize & Mask == (NegC + PosC) & Mask
5456 10 : else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5457 3 : if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5458 9 : Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5459 : else
5460 : return false;
5461 : } else
5462 : return false;
5463 :
5464 : // Now we just need to check that EltSize & Mask == Width & Mask.
5465 393 : if (MaskLoBits)
5466 : // EltSize & Mask is 0 since Mask is EltSize - 1.
5467 60 : return Width.getLoBits(MaskLoBits) == 0;
5468 333 : return Width == EltSize;
5469 : }
5470 :
5471 : // A subroutine of MatchRotate used once we have found an OR of two opposite
5472 : // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
5473 : // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5474 : // former being preferred if supported. InnerPos and InnerNeg are Pos and
5475 : // Neg with outer conversions stripped away.
5476 0 : SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5477 : SDValue Neg, SDValue InnerPos,
5478 : SDValue InnerNeg, unsigned PosOpcode,
5479 : unsigned NegOpcode, const SDLoc &DL) {
5480 : // fold (or (shl x, (*ext y)),
5481 : // (srl x, (*ext (sub 32, y)))) ->
5482 : // (rotl x, y) or (rotr x, (sub 32, y))
5483 : //
5484 : // fold (or (shl x, (*ext (sub 32, y))),
5485 : // (srl x, (*ext y))) ->
5486 : // (rotr x, y) or (rotl x, (sub 32, y))
5487 0 : EVT VT = Shifted.getValueType();
5488 0 : if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5489 0 : bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5490 0 : return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5491 0 : HasPos ? Pos : Neg).getNode();
5492 : }
5493 :
5494 : return nullptr;
5495 : }
5496 :
5497 : // MatchRotate - Handle an 'or' of two operands. If this is one of the many
5498 : // idioms for rotate, and if the target supports rotation instructions, generate
5499 : // a rot[lr].
5500 142856 : SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5501 : // Must be a legal type. Expanded 'n promoted things won't work with rotates.
5502 142856 : EVT VT = LHS.getValueType();
5503 142856 : if (!TLI.isTypeLegal(VT)) return nullptr;
5504 :
5505 : // The target must have at least one rotate flavor.
5506 140206 : bool HasROTL = hasOperation(ISD::ROTL, VT);
5507 140206 : bool HasROTR = hasOperation(ISD::ROTR, VT);
5508 140206 : if (!HasROTL && !HasROTR) return nullptr;
5509 :
5510 : // Check for truncated rotate.
5511 113053 : if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5512 314 : LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5513 : assert(LHS.getValueType() == RHS.getValueType());
5514 157 : if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5515 1 : return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5516 1 : SDValue(Rot, 0)).getNode();
5517 : }
5518 : }
5519 :
5520 : // Match "(X shl/srl V1) & V2" where V2 may not be present.
5521 113052 : SDValue LHSShift; // The shift.
5522 113052 : SDValue LHSMask; // AND value if any.
5523 113052 : matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5524 :
5525 113052 : SDValue RHSShift; // The shift.
5526 113052 : SDValue RHSMask; // AND value if any.
5527 113052 : matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5528 :
5529 : // If neither side matched a rotate half, bail
5530 113052 : if (!LHSShift && !RHSShift)
5531 : return nullptr;
5532 :
5533 : // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5534 : // side of the rotate, so try to handle that here. In all cases we need to
5535 : // pass the matched shift from the opposite side to compute the opcode and
5536 : // needed shift amount to extract. We still want to do this if both sides
5537 : // matched a rotate half because one half may be a potential overshift that
5538 : // can be broken down (ie if InstCombine merged two shl or srl ops into a
5539 : // single one).
5540 :
5541 : // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5542 33445 : if (LHSShift)
5543 10248 : if (SDValue NewRHSShift =
5544 10248 : extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5545 12 : RHSShift = NewRHSShift;
5546 : // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5547 33445 : if (RHSShift)
5548 29162 : if (SDValue NewLHSShift =
5549 29162 : extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5550 11 : LHSShift = NewLHSShift;
5551 :
5552 : // If a side is still missing, nothing else we can do.
5553 33445 : if (!RHSShift || !LHSShift)
5554 : return nullptr;
5555 :
5556 : // At this point we've matched or extracted a shift op on each side.
5557 :
5558 : if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5559 : return nullptr; // Not shifting the same value.
5560 :
5561 1365 : if (LHSShift.getOpcode() == RHSShift.getOpcode())
5562 : return nullptr; // Shifts must disagree.
5563 :
5564 : // Canonicalize shl to left side in a shl/srl pair.
5565 1186 : if (RHSShift.getOpcode() == ISD::SHL) {
5566 : std::swap(LHS, RHS);
5567 : std::swap(LHSShift, RHSShift);
5568 : std::swap(LHSMask, RHSMask);
5569 : }
5570 :
5571 : unsigned EltSizeInBits = VT.getScalarSizeInBits();
5572 1186 : SDValue LHSShiftArg = LHSShift.getOperand(0);
5573 1186 : SDValue LHSShiftAmt = LHSShift.getOperand(1);
5574 1186 : SDValue RHSShiftArg = RHSShift.getOperand(0);
5575 1186 : SDValue RHSShiftAmt = RHSShift.getOperand(1);
5576 :
5577 : // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5578 : // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5579 : auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5580 : ConstantSDNode *RHS) {
5581 : return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5582 : };
5583 2372 : if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5584 723 : SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5585 1033 : LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5586 :
5587 : // If there is an AND of either shifted operand, apply it to the result.
5588 723 : if (LHSMask.getNode() || RHSMask.getNode()) {
5589 104 : SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5590 104 : SDValue Mask = AllOnes;
5591 :
5592 104 : if (LHSMask.getNode()) {
5593 208 : SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5594 104 : Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5595 104 : DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5596 : }
5597 104 : if (RHSMask.getNode()) {
5598 174 : SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5599 87 : Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5600 87 : DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5601 : }
5602 :
5603 208 : Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5604 : }
5605 :
5606 : return Rot.getNode();
5607 : }
5608 :
5609 : // If there is a mask here, and we have a variable shift, we can't be sure
5610 : // that we're masking out the right stuff.
5611 463 : if (LHSMask.getNode() || RHSMask.getNode())
5612 : return nullptr;
5613 :
5614 : // If the shift amount is sign/zext/any-extended just peel it off.
5615 424 : SDValue LExtOp0 = LHSShiftAmt;
5616 424 : SDValue RExtOp0 = RHSShiftAmt;
5617 424 : if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5618 413 : LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5619 413 : LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5620 511 : LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5621 87 : (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5622 76 : RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5623 76 : RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5624 : RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5625 79 : LExtOp0 = LHSShiftAmt.getOperand(0);
5626 79 : RExtOp0 = RHSShiftAmt.getOperand(0);
5627 : }
5628 :
5629 424 : SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5630 : LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5631 424 : if (TryL)
5632 : return TryL;
5633 :
5634 122 : SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5635 : RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5636 122 : if (TryR)
5637 87 : return TryR;
5638 :
5639 : return nullptr;
5640 : }
5641 :
5642 : namespace {
5643 :
5644 : /// Represents known origin of an individual byte in load combine pattern. The
5645 : /// value of the byte is either constant zero or comes from memory.
5646 : struct ByteProvider {
5647 : // For constant zero providers Load is set to nullptr. For memory providers
5648 : // Load represents the node which loads the byte from memory.
5649 : // ByteOffset is the offset of the byte in the value produced by the load.
5650 : LoadSDNode *Load = nullptr;
5651 : unsigned ByteOffset = 0;
5652 :
5653 : ByteProvider() = default;
5654 :
5655 : static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5656 : return ByteProvider(Load, ByteOffset);
5657 : }
5658 :
5659 : static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5660 :
5661 0 : bool isConstantZero() const { return !Load; }
5662 0 : bool isMemory() const { return Load; }
5663 :
5664 : bool operator==(const ByteProvider &Other) const {
5665 : return Other.Load == Load && Other.ByteOffset == ByteOffset;
5666 : }
5667 :
5668 : private:
5669 : ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5670 : : Load(Load), ByteOffset(ByteOffset) {}
5671 : };
5672 :
5673 : } // end anonymous namespace
5674 :
5675 : /// Recursively traverses the expression calculating the origin of the requested
5676 : /// byte of the given value. Returns None if the provider can't be calculated.
5677 : ///
5678 : /// For all the values except the root of the expression verifies that the value
5679 : /// has exactly one use and if it's not true return None. This way if the origin
5680 : /// of the byte is returned it's guaranteed that the values which contribute to
5681 : /// the byte are not used outside of this expression.
5682 : ///
5683 : /// Because the parts of the expression are not allowed to have more than one
5684 : /// use this function iterates over trees, not DAGs. So it never visits the same
5685 : /// node more than once.
5686 : static const Optional<ByteProvider>
5687 293077 : calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5688 : bool Root = false) {
5689 : // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5690 293077 : if (Depth == 10)
5691 : return None;
5692 :
5693 289963 : if (!Root && !Op.hasOneUse())
5694 : return None;
5695 :
5696 : assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5697 243935 : unsigned BitWidth = Op.getValueSizeInBits();
5698 243935 : if (BitWidth % 8 != 0)
5699 : return None;
5700 243935 : unsigned ByteWidth = BitWidth / 8;
5701 : assert(Index < ByteWidth && "invalid index requested");
5702 : (void) ByteWidth;
5703 :
5704 487870 : switch (Op.getOpcode()) {
5705 154519 : case ISD::OR: {
5706 309038 : auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5707 154519 : if (!LHS)
5708 : return None;
5709 32604 : auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5710 16302 : if (!RHS)
5711 : return None;
5712 :
5713 12346 : if (LHS->isConstantZero())
5714 : return RHS;
5715 6219 : if (RHS->isConstantZero())
5716 : return LHS;
5717 : return None;
5718 : }
5719 15857 : case ISD::SHL: {
5720 15857 : auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5721 : if (!ShiftOp)
5722 : return None;
5723 :
5724 15360 : uint64_t BitShift = ShiftOp->getZExtValue();
5725 15360 : if (BitShift % 8 != 0)
5726 : return None;
5727 12520 : uint64_t ByteShift = BitShift / 8;
5728 :
5729 12520 : return Index < ByteShift
5730 : ? ByteProvider::getConstantZero()
5731 : : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5732 12520 : Depth + 1);
5733 : }
5734 12218 : case ISD::ANY_EXTEND:
5735 : case ISD::SIGN_EXTEND:
5736 : case ISD::ZERO_EXTEND: {
5737 12218 : SDValue NarrowOp = Op->getOperand(0);
5738 12218 : unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5739 12218 : if (NarrowBitWidth % 8 != 0)
5740 : return None;
5741 12148 : uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5742 :
5743 12148 : if (Index >= NarrowByteWidth)
5744 1565 : return Op.getOpcode() == ISD::ZERO_EXTEND
5745 : ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5746 1565 : : None;
5747 10583 : return calculateByteProvider(NarrowOp, Index, Depth + 1);
5748 : }
5749 42 : case ISD::BSWAP:
5750 84 : return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5751 84 : Depth + 1);
5752 16760 : case ISD::LOAD: {
5753 : auto L = cast<LoadSDNode>(Op.getNode());
5754 16760 : if (L->isVolatile() || L->isIndexed())
5755 : return None;
5756 :
5757 16674 : unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5758 16674 : if (NarrowBitWidth % 8 != 0)
5759 : return None;
5760 16674 : uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5761 :
5762 16674 : if (Index >= NarrowByteWidth)
5763 : return L->getExtensionType() == ISD::ZEXTLOAD
5764 : ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5765 4496 : : None;
5766 : return ByteProvider::getMemory(L, Index);
5767 : }
5768 : }
5769 :
5770 : return None;
5771 : }
5772 :
5773 : /// Match a pattern where a wide type scalar value is loaded by several narrow
5774 : /// loads and combined by shifts and ors. Fold it into a single load or a load
5775 : /// and a BSWAP if the targets supports it.
5776 : ///
5777 : /// Assuming little endian target:
5778 : /// i8 *a = ...
5779 : /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5780 : /// =>
5781 : /// i32 val = *((i32)a)
5782 : ///
5783 : /// i8 *a = ...
5784 : /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5785 : /// =>
5786 : /// i32 val = BSWAP(*((i32)a))
5787 : ///
5788 : /// TODO: This rule matches complex patterns with OR node roots and doesn't
5789 : /// interact well with the worklist mechanism. When a part of the pattern is
5790 : /// updated (e.g. one of the loads) its direct users are put into the worklist,
5791 : /// but the root node of the pattern which triggers the load combine is not
5792 : /// necessarily a direct user of the changed node. For example, once the address
5793 : /// of t28 load is reassociated load combine won't be triggered:
5794 : /// t25: i32 = add t4, Constant:i32<2>
5795 : /// t26: i64 = sign_extend t25
5796 : /// t27: i64 = add t2, t26
5797 : /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5798 : /// t29: i32 = zero_extend t28
5799 : /// t32: i32 = shl t29, Constant:i8<8>
5800 : /// t33: i32 = or t23, t32
5801 : /// As a possible fix visitLoad can check if the load can be a part of a load
5802 : /// combine pattern and add corresponding OR roots to the worklist.
5803 0 : SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5804 : assert(N->getOpcode() == ISD::OR &&
5805 : "Can only match load combining against OR nodes");
5806 :
5807 : // Handles simple types only
5808 0 : EVT VT = N->getValueType(0);
5809 0 : if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5810 0 : return SDValue();
5811 0 : unsigned ByteWidth = VT.getSizeInBits() / 8;
5812 :
5813 0 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5814 : // Before legalize we can introduce too wide illegal loads which will be later
5815 : // split into legal sized loads. This enables us to combine i64 load by i8
5816 : // patterns to a couple of i32 loads on 32 bit targets.
5817 0 : if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5818 0 : return SDValue();
5819 :
5820 : std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5821 : unsigned BW, unsigned i) { return i; };
5822 : std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5823 4274 : unsigned BW, unsigned i) { return BW - i - 1; };
5824 :
5825 0 : bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5826 : auto MemoryByteOffset = [&] (ByteProvider P) {
5827 : assert(P.isMemory() && "Must be a memory byte provider");
5828 : unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5829 : assert(LoadBitWidth % 8 == 0 &&
5830 : "can only analyze providers for individual bytes not bit");
5831 : unsigned LoadByteWidth = LoadBitWidth / 8;
5832 : return IsBigEndianTarget
5833 : ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5834 : : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5835 0 : };
5836 :
5837 : Optional<BaseIndexOffset> Base;
5838 : SDValue Chain;
5839 :
5840 : SmallPtrSet<LoadSDNode *, 8> Loads;
5841 : Optional<ByteProvider> FirstByteProvider;
5842 : int64_t FirstOffset = INT64_MAX;
5843 :
5844 : // Check if all the bytes of the OR we are looking at are loaded from the same
5845 : // base address. Collect bytes offsets from Base address in ByteOffsets.
5846 0 : SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5847 0 : for (unsigned i = 0; i < ByteWidth; i++) {
5848 0 : auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5849 0 : if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5850 0 : return SDValue();
5851 :
5852 : LoadSDNode *L = P->Load;
5853 : assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5854 : "Must be enforced by calculateByteProvider");
5855 : assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5856 :
5857 : // All loads must share the same chain
5858 0 : SDValue LChain = L->getChain();
5859 0 : if (!Chain)
5860 0 : Chain = LChain;
5861 : else if (Chain != LChain)
5862 0 : return SDValue();
5863 :
5864 : // Loads must share the same base address
5865 0 : BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5866 0 : int64_t ByteOffsetFromBase = 0;
5867 0 : if (!Base)
5868 : Base = Ptr;
5869 0 : else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5870 0 : return SDValue();
5871 :
5872 : // Calculate the offset of the current byte from the base address
5873 0 : ByteOffsetFromBase += MemoryByteOffset(*P);
5874 0 : ByteOffsets[i] = ByteOffsetFromBase;
5875 :
5876 : // Remember the first byte load
5877 0 : if (ByteOffsetFromBase < FirstOffset) {
5878 : FirstByteProvider = P;
5879 : FirstOffset = ByteOffsetFromBase;
5880 : }
5881 :
5882 0 : Loads.insert(L);
5883 : }
5884 : assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5885 : "memory, so there must be at least one load which produces the value");
5886 : assert(Base && "Base address of the accessed memory location must be set");
5887 : assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5888 :
5889 : // Check if the bytes of the OR we are looking at match with either big or
5890 : // little endian value load
5891 : bool BigEndian = true, LittleEndian = true;
5892 0 : for (unsigned i = 0; i < ByteWidth; i++) {
5893 0 : int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5894 0 : LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5895 0 : BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5896 0 : if (!BigEndian && !LittleEndian)
5897 0 : return SDValue();
5898 : }
5899 : assert((BigEndian != LittleEndian) && "should be either or");
5900 : assert(FirstByteProvider && "must be set");
5901 :
5902 : // Ensure that the first byte is loaded from zero offset of the first load.
5903 : // So the combined value can be loaded from the first load address.
5904 0 : if (MemoryByteOffset(*FirstByteProvider) != 0)
5905 0 : return SDValue();
5906 : LoadSDNode *FirstLoad = FirstByteProvider->Load;
5907 :
5908 : // The node we are looking at matches with the pattern, check if we can
5909 : // replace it with a single load and bswap if needed.
5910 :
5911 : // If the load needs byte swap check if the target supports it
5912 0 : bool NeedsBswap = IsBigEndianTarget != BigEndian;
5913 :
5914 : // Before legalize we can introduce illegal bswaps which will be later
5915 : // converted to an explicit bswap sequence. This way we end up with a single
5916 : // load and byte shuffling instead of several loads and byte shuffling.
5917 0 : if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5918 0 : return SDValue();
5919 :
5920 : // Check that a load of the wide type is both allowed and fast on the target
5921 0 : bool Fast = false;
5922 0 : bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5923 : VT, FirstLoad->getAddressSpace(),
5924 : FirstLoad->getAlignment(), &Fast);
5925 0 : if (!Allowed || !Fast)
5926 0 : return SDValue();
5927 :
5928 : SDValue NewLoad =
5929 0 : DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5930 0 : FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5931 :
5932 : // Transfer chain users from old loads to the new load.
5933 0 : for (LoadSDNode *L : Loads)
5934 0 : DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5935 :
5936 0 : return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5937 : }
5938 :
5939 : // If the target has andn, bsl, or a similar bit-select instruction,
5940 : // we want to unfold masked merge, with canonical pattern of:
5941 : // | A | |B|
5942 : // ((x ^ y) & m) ^ y
5943 : // | D |
5944 : // Into:
5945 : // (x & m) | (y & ~m)
5946 : // If y is a constant, and the 'andn' does not work with immediates,
5947 : // we unfold into a different pattern:
5948 : // ~(~x & m) & (m | y)
5949 : // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
5950 : // the very least that breaks andnpd / andnps patterns, and because those
5951 : // patterns are simplified in IR and shouldn't be created in the DAG
5952 0 : SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
5953 : assert(N->getOpcode() == ISD::XOR);
5954 :
5955 : // Don't touch 'not' (i.e. where y = -1).
5956 0 : if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
5957 0 : return SDValue();
5958 :
5959 0 : EVT VT = N->getValueType(0);
5960 :
5961 : // There are 3 commutable operators in the pattern,
5962 : // so we have to deal with 8 possible variants of the basic pattern.
5963 0 : SDValue X, Y, M;
5964 : auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
5965 : if (And.getOpcode() != ISD::AND || !And.hasOneUse())
5966 : return false;
5967 : SDValue Xor = And.getOperand(XorIdx);
5968 : if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
5969 : return false;
5970 : SDValue Xor0 = Xor.getOperand(0);
5971 : SDValue Xor1 = Xor.getOperand(1);
5972 : // Don't touch 'not' (i.e. where y = -1).
5973 : if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
5974 : return false;
5975 : if (Other == Xor0)
5976 : std::swap(Xor0, Xor1);
5977 : if (Other != Xor1)
5978 : return false;
5979 : X = Xor0;
5980 : Y = Xor1;
5981 : M = And.getOperand(XorIdx ? 0 : 1);
5982 : return true;
5983 0 : };
5984 :
5985 0 : SDValue N0 = N->getOperand(0);
5986 0 : SDValue N1 = N->getOperand(1);
5987 0 : if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
5988 0 : !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
5989 0 : return SDValue();
5990 :
5991 : // Don't do anything if the mask is constant. This should not be reachable.
5992 : // InstCombine should have already unfolded this pattern, and DAGCombiner
5993 : // probably shouldn't produce it, too.
5994 0 : if (isa<ConstantSDNode>(M.getNode()))
5995 0 : return SDValue();
5996 :
5997 : // We can transform if the target has AndNot
5998 0 : if (!TLI.hasAndNot(M))
5999 0 : return SDValue();
6000 :
6001 : SDLoc DL(N);
6002 :
6003 : // If Y is a constant, check that 'andn' works with immediates.
6004 0 : if (!TLI.hasAndNot(Y)) {
6005 : assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6006 : // If not, we need to do a bit more work to make sure andn is still used.
6007 0 : SDValue NotX = DAG.getNOT(DL, X, VT);
6008 0 : SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6009 0 : SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6010 0 : SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6011 0 : return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6012 : }
6013 :
6014 0 : SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6015 0 : SDValue NotM = DAG.getNOT(DL, M, VT);
6016 0 : SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6017 :
6018 0 : return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6019 : }
6020 :
6021 117833 : SDValue DAGCombiner::visitXOR(SDNode *N) {
6022 117833 : SDValue N0 = N->getOperand(0);
6023 117833 : SDValue N1 = N->getOperand(1);
6024 235666 : EVT VT = N0.getValueType();
6025 :
6026 : // fold vector ops
6027 117833 : if (VT.isVector()) {
6028 24850 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
6029 0 : return FoldedVOp;
6030 :
6031 : // fold (xor x, 0) -> x, vector edition
6032 24850 : if (ISD::isBuildVectorAllZeros(N0.getNode()))
6033 0 : return N1;
6034 24850 : if (ISD::isBuildVectorAllZeros(N1.getNode()))
6035 110 : return N0;
6036 : }
6037 :
6038 : // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6039 117723 : if (N0.isUndef() && N1.isUndef())
6040 0 : return DAG.getConstant(0, SDLoc(N), VT);
6041 : // fold (xor x, undef) -> undef
6042 117723 : if (N0.isUndef())
6043 14 : return N0;
6044 235418 : if (N1.isUndef())
6045 0 : return N1;
6046 : // fold (xor c1, c2) -> c1^c2
6047 : ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6048 : ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6049 117709 : if (N0C && N1C)
6050 1808 : return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
6051 : // canonicalize constant to RHS
6052 117331 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6053 526 : !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6054 1046 : return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
6055 : // fold (xor x, 0) -> x
6056 116282 : if (isNullConstant(N1))
6057 0 : return N0;
6058 :
6059 116282 : if (SDValue NewSel = foldBinOpIntoSelect(N))
6060 6 : return NewSel;
6061 :
6062 : // reassociate xor
6063 165686 : if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1, N->getFlags()))
6064 288 : return RXOR;
6065 :
6066 : // fold !(x cc y) -> (x !cc y)
6067 115988 : SDValue LHS, RHS, CC;
6068 115988 : if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6069 22739 : bool isInt = LHS.getValueType().isInteger();
6070 45478 : ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6071 : isInt);
6072 :
6073 22739 : if (!LegalOperations ||
6074 121 : TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6075 22625 : switch (N0.getOpcode()) {
6076 0 : default:
6077 0 : llvm_unreachable("Unhandled SetCC Equivalent!");
6078 22625 : case ISD::SETCC:
6079 58896 : return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6080 0 : case ISD::SELECT_CC:
6081 0 : return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6082 0 : N0.getOperand(3), NotCC);
6083 : }
6084 : }
6085 : }
6086 :
6087 : // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6088 135505 : if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
6089 93370 : N0.getNode()->hasOneUse() &&
6090 7 : isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6091 3 : SDValue V = N0.getOperand(0);
6092 : SDLoc DL(N0);
6093 3 : V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
6094 3 : DAG.getConstant(1, DL, V.getValueType()));
6095 3 : AddToWorklist(V.getNode());
6096 6 : return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
6097 : }
6098 :
6099 : // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6100 112996 : if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6101 19310 : (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6102 82 : SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6103 82 : if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6104 53 : unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6105 55 : LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6106 55 : RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6107 53 : AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6108 108 : return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6109 : }
6110 : }
6111 : // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6112 93307 : if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6113 46654 : (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6114 612 : SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6115 : if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6116 64 : unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6117 65 : LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6118 64 : RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6119 64 : AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6120 129 : return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6121 : }
6122 : }
6123 : // fold (xor (and x, y), y) -> (and (not x), y)
6124 186486 : if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6125 1890 : N0->getOperand(1) == N1) {
6126 73 : SDValue X = N0->getOperand(0);
6127 88 : SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6128 73 : AddToWorklist(NotX.getNode());
6129 161 : return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
6130 : }
6131 :
6132 : // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6133 93170 : if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6134 19117 : SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
6135 38134 : SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
6136 19117 : if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6137 119 : SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6138 119 : SDValue S0 = S.getOperand(0);
6139 : if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6140 : unsigned OpSizeInBits = VT.getScalarSizeInBits();
6141 113 : if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6142 222 : if (C->getAPIntValue() == (OpSizeInBits - 1))
6143 225 : return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
6144 : }
6145 : }
6146 : }
6147 :
6148 : // fold (xor x, x) -> 0
6149 93059 : if (N0 == N1)
6150 38 : return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
6151 :
6152 : // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6153 : // Here is a concrete example of this equivalence:
6154 : // i16 x == 14
6155 : // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
6156 : // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6157 : //
6158 : // =>
6159 : //
6160 : // i16 ~1 == 0b1111111111111110
6161 : // i16 rol(~1, 14) == 0b1011111111111111
6162 : //
6163 : // Some additional tips to help conceptualize this transform:
6164 : // - Try to see the operation as placing a single zero in a value of all ones.
6165 : // - There exists no value for x which would allow the result to contain zero.
6166 : // - Values of x larger than the bitwidth are undefined and do not require a
6167 : // consistent result.
6168 : // - Pushing the zero left requires shifting one bits in from the right.
6169 : // A rotate left of ~1 is a nice way of achieving the desired result.
6170 143541 : if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
6171 632 : && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6172 : SDLoc DL(N);
6173 79 : return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6174 79 : N0.getOperand(1));
6175 : }
6176 :
6177 : // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
6178 92961 : if (N0.getOpcode() == N1.getOpcode())
6179 14346 : if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
6180 438 : return Tmp;
6181 :
6182 : // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
6183 92523 : if (SDValue MM = unfoldMaskedMerge(N))
6184 114 : return MM;
6185 :
6186 : // Simplify the expression using non-local knowledge.
6187 92409 : if (SimplifyDemandedBits(SDValue(N, 0)))
6188 228 : return SDValue(N, 0);
6189 :
6190 92181 : return SDValue();
6191 : }
6192 :
6193 : /// Handle transforms common to the three shifts, when the shift amount is a
6194 : /// constant.
6195 0 : SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6196 0 : SDNode *LHS = N->getOperand(0).getNode();
6197 0 : if (!LHS->hasOneUse()) return SDValue();
6198 :
6199 : // We want to pull some binops through shifts, so that we have (and (shift))
6200 : // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
6201 : // thing happens with address calculations, so it's important to canonicalize
6202 : // it.
6203 : bool HighBitSet = false; // Can we transform this if the high bit is set?
6204 :
6205 0 : switch (LHS->getOpcode()) {
6206 0 : default: return SDValue();
6207 : case ISD::OR:
6208 : case ISD::XOR:
6209 : HighBitSet = false; // We can only transform sra if the high bit is clear.
6210 : break;
6211 0 : case ISD::AND:
6212 : HighBitSet = true; // We can only transform sra if the high bit is set.
6213 0 : break;
6214 0 : case ISD::ADD:
6215 0 : if (N->getOpcode() != ISD::SHL)
6216 0 : return SDValue(); // only shl(add) not sr[al](add).
6217 : HighBitSet = false; // We can only transform sra if the high bit is clear.
6218 : break;
6219 : }
6220 :
6221 : // We require the RHS of the binop to be a constant and not opaque as well.
6222 0 : ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6223 0 : if (!BinOpCst) return SDValue();
6224 :
6225 : // FIXME: disable this unless the input to the binop is a shift by a constant
6226 : // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6227 0 : SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6228 0 : bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6229 0 : BinOpLHSVal->getOpcode() == ISD::SRA ||
6230 : BinOpLHSVal->getOpcode() == ISD::SRL;
6231 0 : bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6232 : BinOpLHSVal->getOpcode() == ISD::SELECT;
6233 :
6234 0 : if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6235 : !isCopyOrSelect)
6236 0 : return SDValue();
6237 :
6238 0 : if (isCopyOrSelect && N->hasOneUse())
6239 0 : return SDValue();
6240 :
6241 0 : EVT VT = N->getValueType(0);
6242 :
6243 : // If this is a signed shift right, and the high bit is modified by the
6244 : // logical operation, do not perform the transformation. The highBitSet
6245 : // boolean indicates the value of the high bit of the constant which would
6246 : // cause it to be modified for this operation.
6247 0 : if (N->getOpcode() == ISD::SRA) {
6248 0 : bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6249 0 : if (BinOpRHSSignSet != HighBitSet)
6250 0 : return SDValue();
6251 : }
6252 :
6253 0 : if (!TLI.isDesirableToCommuteWithShift(N, Level))
6254 0 : return SDValue();
6255 :
6256 : // Fold the constants, shifting the binop RHS by the shift amount.
6257 0 : SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6258 : N->getValueType(0),
6259 0 : LHS->getOperand(1), N->getOperand(1));
6260 : assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6261 :
6262 : // Create the new shift.
6263 0 : SDValue NewShift = DAG.getNode(N->getOpcode(),
6264 0 : SDLoc(LHS->getOperand(0)),
6265 0 : VT, LHS->getOperand(0), N->getOperand(1));
6266 :
6267 : // Create the new binop.
6268 0 : return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6269 : }
6270 :
6271 844 : SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6272 : assert(N->getOpcode() == ISD::TRUNCATE);
6273 : assert(N->getOperand(0).getOpcode() == ISD::AND);
6274 :
6275 : // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6276 827 : if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6277 587 : SDValue N01 = N->getOperand(0).getOperand(1);
6278 587 : if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6279 : SDLoc DL(N);
6280 584 : EVT TruncVT = N->getValueType(0);
6281 584 : SDValue N00 = N->getOperand(0).getOperand(0);
6282 1168 : SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6283 1168 : SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6284 584 : AddToWorklist(Trunc00.getNode());
6285 584 : AddToWorklist(Trunc01.getNode());
6286 1168 : return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6287 : }
6288 : }
6289 :
6290 260 : return SDValue();
6291 : }
6292 :
6293 2760 : SDValue DAGCombiner::visitRotate(SDNode *N) {
6294 : SDLoc dl(N);
6295 2760 : SDValue N0 = N->getOperand(0);
6296 2760 : SDValue N1 = N->getOperand(1);
6297 5520 : EVT VT = N->getValueType(0);
6298 : unsigned Bitsize = VT.getScalarSizeInBits();
6299 :
6300 : // fold (rot x, 0) -> x
6301 2760 : if (isNullConstantOrNullSplatConstant(N1))
6302 4 : return N0;
6303 :
6304 : // fold (rot x, c) -> (rot x, c % BitSize)
6305 2756 : if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6306 3303 : if (Cst->getAPIntValue().uge(Bitsize)) {
6307 10 : uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6308 10 : return DAG.getNode(N->getOpcode(), dl, VT, N0,
6309 10 : DAG.getConstant(RotAmt, dl, N1.getValueType()));
6310 : }
6311 : }
6312 :
6313 : // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6314 2746 : if (N1.getOpcode() == ISD::TRUNCATE &&
6315 257 : N1.getOperand(0).getOpcode() == ISD::AND) {
6316 53 : if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6317 159 : return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6318 : }
6319 :
6320 : unsigned NextOp = N0.getOpcode();
6321 : // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6322 2693 : if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6323 17 : SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6324 34 : SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6325 17 : if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6326 17 : EVT ShiftVT = C1->getValueType(0);
6327 17 : bool SameSide = (N->getOpcode() == NextOp);
6328 17 : unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6329 17 : if (SDValue CombinedShift =
6330 17 : DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6331 17 : SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6332 17 : SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6333 : ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6334 17 : BitsizeC.getNode());
6335 34 : return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6336 34 : CombinedShiftNorm);
6337 : }
6338 : }
6339 : }
6340 2676 : return SDValue();
6341 : }
6342 :
6343 123493 : SDValue DAGCombiner::visitSHL(SDNode *N) {
6344 123493 : SDValue N0 = N->getOperand(0);
6345 123493 : SDValue N1 = N->getOperand(1);
6346 123493 : EVT VT = N0.getValueType();
6347 : unsigned OpSizeInBits = VT.getScalarSizeInBits();
6348 :
6349 : // fold vector ops
6350 123493 : if (VT.isVector()) {
6351 3968 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
6352 6 : return FoldedVOp;
6353 :
6354 : BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6355 : // If setcc produces all-one true value then:
6356 : // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6357 2476 : if (N1CV && N1CV->isConstant()) {
6358 2350 : if (N0.getOpcode() == ISD::AND) {
6359 20 : SDValue N00 = N0->getOperand(0);
6360 20 : SDValue N01 = N0->getOperand(1);
6361 : BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6362 :
6363 19 : if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6364 12 : TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6365 : TargetLowering::ZeroOrNegativeOneBooleanContent) {
6366 4 : if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6367 8 : N01CV, N1CV))
6368 8 : return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6369 : }
6370 : }
6371 : }
6372 : }
6373 :
6374 123483 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
6375 :
6376 : // fold (shl c1, c2) -> c1<<c2
6377 : ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6378 123483 : if (N0C && N1C && !N1C->isOpaque())
6379 7530 : return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6380 : // fold (shl 0, x) -> 0
6381 119718 : if (isNullConstantOrNullSplatConstant(N0))
6382 82 : return N0;
6383 : // fold (shl x, c >= size(x)) -> undef
6384 : // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6385 : auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6386 205922 : return Val->getAPIntValue().uge(OpSizeInBits);
6387 : };
6388 239272 : if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6389 49 : return DAG.getUNDEF(VT);
6390 : // fold (shl x, 0) -> x
6391 221801 : if (N1C && N1C->isNullValue())
6392 443 : return N0;
6393 : // fold (shl undef, x) -> 0
6394 119144 : if (N0.isUndef())
6395 22 : return DAG.getConstant(0, SDLoc(N), VT);
6396 :
6397 119133 : if (SDValue NewSel = foldBinOpIntoSelect(N))
6398 12 : return NewSel;
6399 :
6400 : // if (shl x, c) is known to be zero, return 0
6401 119121 : if (DAG.MaskedValueIsZero(SDValue(N, 0),
6402 238242 : APInt::getAllOnesValue(OpSizeInBits)))
6403 4238 : return DAG.getConstant(0, SDLoc(N), VT);
6404 : // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6405 117002 : if (N1.getOpcode() == ISD::TRUNCATE &&
6406 5738 : N1.getOperand(0).getOpcode() == ISD::AND) {
6407 725 : if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6408 1022 : return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6409 : }
6410 :
6411 116518 : if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6412 6762 : return SDValue(N, 0);
6413 :
6414 : // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6415 109756 : if (N0.getOpcode() == ISD::SHL) {
6416 : auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6417 : ConstantSDNode *RHS) {
6418 : APInt c1 = LHS->getAPIntValue();
6419 : APInt c2 = RHS->getAPIntValue();
6420 : zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6421 : return (c1 + c2).uge(OpSizeInBits);
6422 : };
6423 746 : if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6424 12 : return DAG.getConstant(0, SDLoc(N), VT);
6425 :
6426 : auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6427 : ConstantSDNode *RHS) {
6428 : APInt c1 = LHS->getAPIntValue();
6429 : APInt c2 = RHS->getAPIntValue();
6430 : zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6431 : return (c1 + c2).ult(OpSizeInBits);
6432 : };
6433 734 : if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6434 : SDLoc DL(N);
6435 90 : EVT ShiftVT = N1.getValueType();
6436 180 : SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6437 180 : return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6438 : }
6439 : }
6440 :
6441 : // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6442 : // For this to be valid, the second form must not preserve any of the bits
6443 : // that are shifted out by the inner shift in the first form. This means
6444 : // the outer shift size must be >= the number of bits added by the ext.
6445 : // As a corollary, we don't care what kind of ext it is.
6446 92780 : if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6447 78960 : N0.getOpcode() == ISD::ANY_EXTEND ||
6448 126183 : N0.getOpcode() == ISD::SIGN_EXTEND) &&
6449 16523 : N0.getOperand(0).getOpcode() == ISD::SHL) {
6450 47 : SDValue N0Op0 = N0.getOperand(0);
6451 47 : if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6452 39 : APInt c1 = N0Op0C1->getAPIntValue();
6453 39 : APInt c2 = N1C->getAPIntValue();
6454 39 : zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6455 :
6456 39 : EVT InnerShiftVT = N0Op0.getValueType();
6457 39 : uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6458 78 : if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6459 : SDLoc DL(N0);
6460 15 : APInt Sum = c1 + c2;
6461 15 : if (Sum.uge(OpSizeInBits))
6462 4 : return DAG.getConstant(0, DL, VT);
6463 :
6464 11 : return DAG.getNode(
6465 : ISD::SHL, DL, VT,
6466 22 : DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6467 11 : DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6468 : }
6469 : }
6470 : }
6471 :
6472 : // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6473 : // Only fold this if the inner zext has no other uses to avoid increasing
6474 : // the total number of instructions.
6475 119083 : if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6476 8486 : N0.getOperand(0).getOpcode() == ISD::SRL) {
6477 : SDValue N0Op0 = N0.getOperand(0);
6478 103 : if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6479 103 : if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6480 103 : uint64_t c1 = N0Op0C1->getZExtValue();
6481 103 : uint64_t c2 = N1C->getZExtValue();
6482 103 : if (c1 == c2) {
6483 14 : SDValue NewOp0 = N0.getOperand(0);
6484 28 : EVT CountVT = NewOp0.getOperand(1).getValueType();
6485 : SDLoc DL(N);
6486 14 : SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6487 : NewOp0,
6488 14 : DAG.getConstant(c2, DL, CountVT));
6489 14 : AddToWorklist(NewSHL.getNode());
6490 28 : return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6491 : }
6492 : }
6493 : }
6494 : }
6495 :
6496 : // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
6497 : // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
6498 109631 : if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6499 5236 : N0->getFlags().hasExact()) {
6500 1864 : if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6501 1864 : uint64_t C1 = N0C1->getZExtValue();
6502 1864 : uint64_t C2 = N1C->getZExtValue();
6503 : SDLoc DL(N);
6504 1864 : if (C1 <= C2)
6505 1834 : return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6506 1834 : DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6507 30 : return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6508 30 : DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6509 : }
6510 : }
6511 :
6512 : // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6513 : // (and (srl x, (sub c1, c2), MASK)
6514 : // Only fold this if the inner shift has no other uses -- if it does, folding
6515 : // this will increase the total number of instructions.
6516 110328 : if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6517 1820 : if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6518 1654 : uint64_t c1 = N0C1->getZExtValue();
6519 1654 : if (c1 < OpSizeInBits) {
6520 1654 : uint64_t c2 = N1C->getZExtValue();
6521 1654 : APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6522 1654 : SDValue Shift;
6523 1654 : if (c2 > c1) {
6524 124 : Mask <<= c2 - c1;
6525 : SDLoc DL(N);
6526 124 : Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6527 124 : DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6528 : } else {
6529 1530 : Mask.lshrInPlace(c1 - c2);
6530 : SDLoc DL(N);
6531 1530 : Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6532 1530 : DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6533 : }
6534 : SDLoc DL(N0);
6535 1654 : return DAG.getNode(ISD::AND, DL, VT, Shift,
6536 1654 : DAG.getConstant(Mask, DL, VT));
6537 : }
6538 : }
6539 : }
6540 :
6541 : // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6542 106184 : if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6543 71 : isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6544 : SDLoc DL(N);
6545 71 : SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6546 142 : SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6547 142 : return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6548 : }
6549 :
6550 : // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6551 : // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6552 : // Variant of version done on multiply, except mul by a power of 2 is turned
6553 : // into a shift.
6554 102902 : if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6555 3085 : N0.getNode()->hasOneUse() &&
6556 6148 : isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6557 110241 : isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6558 1136 : TLI.isDesirableToCommuteWithShift(N, Level)) {
6559 1231 : SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6560 1062 : SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6561 1062 : AddToWorklist(Shl0.getNode());
6562 1062 : AddToWorklist(Shl1.getNode());
6563 2293 : return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6564 : }
6565 :
6566 : // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6567 1419 : if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6568 107814 : isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6569 1415 : isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6570 1287 : SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6571 1287 : if (isConstantOrConstantVector(Shl))
6572 2576 : return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6573 : }
6574 :
6575 103693 : if (N1C && !N1C->isOpaque())
6576 86829 : if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6577 636 : return NewSHL;
6578 :
6579 103057 : return SDValue();
6580 : }
6581 :
6582 30600 : SDValue DAGCombiner::visitSRA(SDNode *N) {
6583 30600 : SDValue N0 = N->getOperand(0);
6584 30600 : SDValue N1 = N->getOperand(1);
6585 30600 : EVT VT = N0.getValueType();
6586 30600 : unsigned OpSizeInBits = VT.getScalarSizeInBits();
6587 :
6588 : // Arithmetic shifting an all-sign-bit value is a no-op.
6589 : // fold (sra 0, x) -> 0
6590 : // fold (sra -1, x) -> -1
6591 30600 : if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6592 455 : return N0;
6593 :
6594 : // fold vector ops
6595 30145 : if (VT.isVector())
6596 3183 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
6597 3 : return FoldedVOp;
6598 :
6599 30142 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
6600 :
6601 : // fold (sra c1, c2) -> (sra c1, c2)
6602 : ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6603 30142 : if (N0C && N1C && !N1C->isOpaque())
6604 0 : return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6605 : // fold (sra x, c >= size(x)) -> undef
6606 : // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6607 : auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6608 55274 : return Val->getAPIntValue().uge(OpSizeInBits);
6609 30142 : };
6610 60284 : if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6611 23 : return DAG.getUNDEF(VT);
6612 : // fold (sra x, 0) -> x
6613 57255 : if (N1C && N1C->isNullValue())
6614 15 : return N0;
6615 :
6616 30104 : if (SDValue NewSel = foldBinOpIntoSelect(N))
6617 3 : return NewSel;
6618 :
6619 : // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6620 : // sext_inreg.
6621 30101 : if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6622 3133 : unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6623 3133 : EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6624 3133 : if (VT.isVector())
6625 183 : ExtVT = EVT::getVectorVT(*DAG.getContext(),
6626 183 : ExtVT, VT.getVectorNumElements());
6627 3133 : if ((!LegalOperations ||
6628 2433 : TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6629 733 : return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6630 2199 : N0.getOperand(0), DAG.getValueType(ExtVT));
6631 : }
6632 :
6633 : // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6634 : // clamp (add c1, c2) to max shift.
6635 29368 : if (N0.getOpcode() == ISD::SRA) {
6636 : SDLoc DL(N);
6637 332 : EVT ShiftVT = N1.getValueType();
6638 332 : EVT ShiftSVT = ShiftVT.getScalarType();
6639 : SmallVector<SDValue, 16> ShiftValues;
6640 :
6641 : auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6642 : APInt c1 = LHS->getAPIntValue();
6643 : APInt c2 = RHS->getAPIntValue();
6644 : zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6645 : APInt Sum = c1 + c2;
6646 : unsigned ShiftSum =
6647 : Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6648 : ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6649 : return true;
6650 : };
6651 664 : if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6652 256 : SDValue ShiftValue;
6653 256 : if (VT.isVector())
6654 30 : ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6655 : else
6656 241 : ShiftValue = ShiftValues[0];
6657 512 : return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6658 : }
6659 : }
6660 :
6661 : // fold (sra (shl X, m), (sub result_size, n))
6662 : // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6663 : // result_size - n != m.
6664 : // If truncate is free for the target sext(shl) is likely to result in better
6665 : // code.
6666 29112 : if (N0.getOpcode() == ISD::SHL && N1C) {
6667 : // Get the two constanst of the shifts, CN0 = m, CN = n.
6668 4332 : const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6669 4332 : if (N01C) {
6670 4332 : LLVMContext &Ctx = *DAG.getContext();
6671 : // Determine what the truncate's result bitsize and type would be.
6672 8664 : EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6673 :
6674 4332 : if (VT.isVector())
6675 87 : TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6676 :
6677 : // Determine the residual right-shift amount.
6678 8664 : int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6679 :
6680 : // If the shift is not a no-op (in which case this should be just a sign
6681 : // extend already), the truncated to type is legal, sign_extend is legal
6682 : // on that type, and the truncate to that type is both legal and free,
6683 : // perform the transform.
6684 : if ((ShiftAmt > 0) &&
6685 1886 : TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6686 4433 : TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6687 101 : TLI.isTruncateFree(VT, TruncVT)) {
6688 : SDLoc DL(N);
6689 13 : SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6690 26 : getShiftAmountTy(N0.getOperand(0).getValueType()));
6691 13 : SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6692 13 : N0.getOperand(0), Amt);
6693 13 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6694 13 : Shift);
6695 13 : return DAG.getNode(ISD::SIGN_EXTEND, DL,
6696 26 : N->getValueType(0), Trunc);
6697 : }
6698 : }
6699 : }
6700 :
6701 : // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6702 29099 : if (N1.getOpcode() == ISD::TRUNCATE &&
6703 320 : N1.getOperand(0).getOpcode() == ISD::AND) {
6704 18 : if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6705 30 : return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6706 : }
6707 :
6708 : // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6709 : // if c1 is equal to the number of bits the trunc removes
6710 4783 : if (N0.getOpcode() == ISD::TRUNCATE &&
6711 4783 : (N0.getOperand(0).getOpcode() == ISD::SRL ||
6712 2304 : N0.getOperand(0).getOpcode() == ISD::SRA) &&
6713 4074 : N0.getOperand(0).hasOneUse() &&
6714 32736 : N0.getOperand(0).getOperand(1).hasOneUse() &&
6715 : N1C) {
6716 106 : SDValue N0Op0 = N0.getOperand(0);
6717 106 : if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6718 212 : unsigned LargeShiftVal = LargeShift->getZExtValue();
6719 106 : EVT LargeVT = N0Op0.getValueType();
6720 :
6721 106 : if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6722 : SDLoc DL(N);
6723 : SDValue Amt =
6724 100 : DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6725 300 : getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6726 100 : SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6727 100 : N0Op0.getOperand(0), Amt);
6728 200 : return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6729 : }
6730 : }
6731 : }
6732 :
6733 : // Simplify, based on bits shifted out of the LHS.
6734 28984 : if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6735 1502 : return SDValue(N, 0);
6736 :
6737 : // If the sign bit is known to be zero, switch this to a SRL.
6738 27482 : if (DAG.SignBitIsZero(N0))
6739 238 : return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6740 :
6741 27363 : if (N1C && !N1C->isOpaque())
6742 24524 : if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6743 0 : return NewSRA;
6744 :
6745 27363 : return SDValue();
6746 : }
6747 :
6748 164810 : SDValue DAGCombiner::visitSRL(SDNode *N) {
6749 164810 : SDValue N0 = N->getOperand(0);
6750 164810 : SDValue N1 = N->getOperand(1);
6751 164810 : EVT VT = N0.getValueType();
6752 : unsigned OpSizeInBits = VT.getScalarSizeInBits();
6753 :
6754 : // fold vector ops
6755 164810 : if (VT.isVector())
6756 4830 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
6757 4 : return FoldedVOp;
6758 :
6759 164806 : ConstantSDNode *N1C = isConstOrConstSplat(N1);
6760 :
6761 : // fold (srl c1, c2) -> c1 >>u c2
6762 : ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6763 164806 : if (N0C && N1C && !N1C->isOpaque())
6764 5270 : return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6765 : // fold (srl 0, x) -> 0
6766 162171 : if (isNullConstantOrNullSplatConstant(N0))
6767 59 : return N0;
6768 : // fold (srl x, c >= size(x)) -> undef
6769 : // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6770 : auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6771 299900 : return Val->getAPIntValue().uge(OpSizeInBits);
6772 : };
6773 324224 : if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6774 27 : return DAG.getUNDEF(VT);
6775 : // fold (srl x, 0) -> x
6776 311245 : if (N1C && N1C->isNullValue())
6777 2218 : return N0;
6778 :
6779 159867 : if (SDValue NewSel = foldBinOpIntoSelect(N))
6780 4 : return NewSel;
6781 :
6782 : // if (srl x, c) is known to be zero, return 0
6783 306802 : if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6784 453695 : APInt::getAllOnesValue(OpSizeInBits)))
6785 92 : return DAG.getConstant(0, SDLoc(N), VT);
6786 :
6787 : // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6788 159817 : if (N0.getOpcode() == ISD::SRL) {
6789 : auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6790 : ConstantSDNode *RHS) {
6791 : APInt c1 = LHS->getAPIntValue();
6792 : APInt c2 = RHS->getAPIntValue();
6793 : zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6794 : return (c1 + c2).uge(OpSizeInBits);
6795 : };
6796 10598 : if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6797 10 : return DAG.getConstant(0, SDLoc(N), VT);
6798 :
6799 : auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6800 : ConstantSDNode *RHS) {
6801 : APInt c1 = LHS->getAPIntValue();
6802 : APInt c2 = RHS->getAPIntValue();
6803 : zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6804 : return (c1 + c2).ult(OpSizeInBits);
6805 : };
6806 10588 : if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6807 : SDLoc DL(N);
6808 4426 : EVT ShiftVT = N1.getValueType();
6809 8852 : SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6810 8852 : return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6811 : }
6812 : }
6813 :
6814 : // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6815 155386 : if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6816 14494 : N0.getOperand(0).getOpcode() == ISD::SRL) {
6817 4673 : if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6818 4673 : uint64_t c1 = N001C->getZExtValue();
6819 4673 : uint64_t c2 = N1C->getZExtValue();
6820 9346 : EVT InnerShiftVT = N0.getOperand(0).getValueType();
6821 9346 : EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6822 4673 : uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6823 : // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6824 4673 : if (c1 + OpSizeInBits == InnerShiftSize) {
6825 : SDLoc DL(N0);
6826 3217 : if (c1 + c2 >= InnerShiftSize)
6827 0 : return DAG.getConstant(0, DL, VT);
6828 3217 : return DAG.getNode(ISD::TRUNCATE, DL, VT,
6829 : DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6830 3217 : N0.getOperand(0).getOperand(0),
6831 : DAG.getConstant(c1 + c2, DL,
6832 3217 : ShiftCountVT)));
6833 : }
6834 : }
6835 : }
6836 :
6837 : // fold (srl (shl x, c), c) -> (and x, cst2)
6838 153501 : if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6839 1332 : isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6840 : SDLoc DL(N);
6841 : SDValue Mask =
6842 410 : DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6843 205 : AddToWorklist(Mask.getNode());
6844 410 : return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6845 : }
6846 :
6847 : // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6848 151964 : if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6849 : // Shifting in all undef bits?
6850 770 : EVT SmallVT = N0.getOperand(0).getValueType();
6851 : unsigned BitSize = SmallVT.getScalarSizeInBits();
6852 770 : if (N1C->getZExtValue() >= BitSize)
6853 171 : return DAG.getUNDEF(VT);
6854 :
6855 385 : if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6856 171 : uint64_t ShiftAmt = N1C->getZExtValue();
6857 : SDLoc DL0(N0);
6858 171 : SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6859 : N0.getOperand(0),
6860 : DAG.getConstant(ShiftAmt, DL0,
6861 171 : getShiftAmountTy(SmallVT)));
6862 171 : AddToWorklist(SmallShift.getNode());
6863 171 : APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6864 : SDLoc DL(N);
6865 171 : return DAG.getNode(ISD::AND, DL, VT,
6866 171 : DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6867 171 : DAG.getConstant(Mask, DL, VT));
6868 : }
6869 : }
6870 :
6871 : // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
6872 : // bit, which is unmodified by sra.
6873 290672 : if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6874 4372 : if (N0.getOpcode() == ISD::SRA)
6875 826 : return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6876 : }
6877 :
6878 : // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
6879 151401 : if (N1C && N0.getOpcode() == ISD::CTLZ &&
6880 348 : N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6881 171 : KnownBits Known;
6882 348 : DAG.computeKnownBits(N0.getOperand(0), Known);
6883 :
6884 : // If any of the input bits are KnownOne, then the input couldn't be all
6885 : // zeros, thus the result of the srl will always be zero.
6886 174 : if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6887 :
6888 : // If all of the bits input the to ctlz node are known to be zero, then
6889 : // the result of the ctlz is "32" and the result of the shift is one.
6890 174 : APInt UnknownBits = ~Known.Zero;
6891 174 : if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6892 :
6893 : // Otherwise, check to see if there is exactly one bit input to the ctlz.
6894 174 : if (UnknownBits.isPowerOf2()) {
6895 : // Okay, we know that only that the single bit specified by UnknownBits
6896 : // could be set on input to the CTLZ node. If this bit is set, the SRL
6897 : // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6898 : // to an SRL/XOR pair, which is likely to simplify more.
6899 3 : unsigned ShAmt = UnknownBits.countTrailingZeros();
6900 3 : SDValue Op = N0.getOperand(0);
6901 :
6902 3 : if (ShAmt) {
6903 : SDLoc DL(N0);
6904 3 : Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6905 : DAG.getConstant(ShAmt, DL,
6906 3 : getShiftAmountTy(Op.getValueType())));
6907 3 : AddToWorklist(Op.getNode());
6908 : }
6909 :
6910 : SDLoc DL(N);
6911 3 : return DAG.getNode(ISD::XOR, DL, VT,
6912 3 : Op, DAG.getConstant(1, DL, VT));
6913 : }
6914 : }
6915 :
6916 : // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6917 151398 : if (N1.getOpcode() == ISD::TRUNCATE &&
6918 3190 : N1.getOperand(0).getOpcode() == ISD::AND) {
6919 48 : if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6920 65 : return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6921 : }
6922 :
6923 : // fold operands of srl based on knowledge that the low bits are not
6924 : // demanded.
6925 151366 : if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6926 7862 : return SDValue(N, 0);
6927 :
6928 143504 : if (N1C && !N1C->isOpaque())
6929 130607 : if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6930 600 : return NewSRL;
6931 :
6932 : // Attempt to convert a srl of a load into a narrower zero-extending load.
6933 142904 : if (SDValue NarrowLoad = ReduceLoadWidth(N))
6934 120 : return NarrowLoad;
6935 :
6936 : // Here is a common situation. We want to optimize:
6937 : //
6938 : // %a = ...
6939 : // %b = and i32 %a, 2
6940 : // %c = srl i32 %b, 1
6941 : // brcond i32 %c ...
6942 : //
6943 : // into
6944 : //
6945 : // %a = ...
6946 : // %b = and %a, 2
6947 : // %c = setcc eq %b, 0
6948 : // brcond %c ...
6949 : //
6950 : // However when after the source operand of SRL is optimized into AND, the SRL
6951 : // itself may not be optimized further. Look for it and add the BRCOND into
6952 : // the worklist.
6953 : if (N->hasOneUse()) {
6954 : SDNode *Use = *N->use_begin();
6955 271004 : if (Use->getOpcode() == ISD::BRCOND)
6956 4 : AddToWorklist(Use);
6957 135498 : else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6958 : // Also look pass the truncate.
6959 : Use = *Use->use_begin();
6960 30099 : if (Use->getOpcode() == ISD::BRCOND)
6961 2 : AddToWorklist(Use);
6962 : }
6963 : }
6964 :
6965 142784 : return SDValue();
6966 : }
6967 :
6968 0 : SDValue DAGCombiner::visitABS(SDNode *N) {
6969 0 : SDValue N0 = N->getOperand(0);
6970 0 : EVT VT = N->getValueType(0);
6971 :
6972 : // fold (abs c1) -> c2
6973 0 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6974 0 : return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6975 : // fold (abs (abs x)) -> (abs x)
6976 0 : if (N0.getOpcode() == ISD::ABS)
6977 0 : return N0;
6978 : // fold (abs x) -> x iff not-negative
6979 0 : if (DAG.SignBitIsZero(N0))
6980 0 : return N0;
6981 0 : return SDValue();
6982 : }
6983 :
6984 0 : SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6985 0 : SDValue N0 = N->getOperand(0);
6986 0 : EVT VT = N->getValueType(0);
6987 :
6988 : // fold (bswap c1) -> c2
6989 0 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6990 0 : return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6991 : // fold (bswap (bswap x)) -> x
6992 0 : if (N0.getOpcode() == ISD::BSWAP)
6993 0 : return N0->getOperand(0);
6994 0 : return SDValue();
6995 : }
6996 :
6997 0 : SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6998 0 : SDValue N0 = N->getOperand(0);
6999 0 : EVT VT = N->getValueType(0);
7000 :
7001 : // fold (bitreverse c1) -> c2
7002 0 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7003 0 : return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7004 : // fold (bitreverse (bitreverse x)) -> x
7005 0 : if (N0.getOpcode() == ISD::BITREVERSE)
7006 0 : return N0.getOperand(0);
7007 0 : return SDValue();
7008 : }
7009 :
7010 1221 : SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7011 1221 : SDValue N0 = N->getOperand(0);
7012 1221 : EVT VT = N->getValueType(0);
7013 :
7014 : // fold (ctlz c1) -> c2
7015 1221 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7016 4 : return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7017 :
7018 : // If the value is known never to be zero, switch to the undef version.
7019 1219 : if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7020 400 : if (DAG.isKnownNeverZero(N0))
7021 14 : return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7022 : }
7023 :
7024 1212 : return SDValue();
7025 : }
7026 :
7027 0 : SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7028 0 : SDValue N0 = N->getOperand(0);
7029 0 : EVT VT = N->getValueType(0);
7030 :
7031 : // fold (ctlz_zero_undef c1) -> c2
7032 0 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7033 0 : return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7034 0 : return SDValue();
7035 : }
7036 :
7037 500 : SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7038 500 : SDValue N0 = N->getOperand(0);
7039 500 : EVT VT = N->getValueType(0);
7040 :
7041 : // fold (cttz c1) -> c2
7042 500 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7043 0 : return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7044 :
7045 : // If the value is known never to be zero, switch to the undef version.
7046 500 : if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7047 322 : if (DAG.isKnownNeverZero(N0))
7048 34 : return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7049 : }
7050 :
7051 483 : return SDValue();
7052 : }
7053 :
7054 0 : SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7055 0 : SDValue N0 = N->getOperand(0);
7056 0 : EVT VT = N->getValueType(0);
7057 :
7058 : // fold (cttz_zero_undef c1) -> c2
7059 0 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7060 0 : return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7061 0 : return SDValue();
7062 : }
7063 :
7064 0 : SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7065 0 : SDValue N0 = N->getOperand(0);
7066 0 : EVT VT = N->getValueType(0);
7067 :
7068 : // fold (ctpop c1) -> c2
7069 0 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7070 0 : return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7071 0 : return SDValue();
7072 : }
7073 :
7074 : // FIXME: This should be checking for no signed zeros on individual operands, as
7075 : // well as no nans.
7076 28878 : static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
7077 28878 : const TargetOptions &Options = DAG.getTarget().Options;
7078 28878 : EVT VT = LHS.getValueType();
7079 :
7080 808 : return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7081 29477 : DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7082 : }
7083 :
7084 : /// Generate Min/Max node
7085 0 : static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7086 : SDValue RHS, SDValue True, SDValue False,
7087 : ISD::CondCode CC, const TargetLowering &TLI,
7088 : SelectionDAG &DAG) {
7089 : if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7090 0 : return SDValue();
7091 :
7092 0 : EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7093 0 : switch (CC) {
7094 0 : case ISD::SETOLT:
7095 : case ISD::SETOLE:
7096 : case ISD::SETLT:
7097 : case ISD::SETLE:
7098 : case ISD::SETULT:
7099 : case ISD::SETULE: {
7100 : unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7101 0 : if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7102 0 : return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7103 0 : return SDValue();
7104 : }
7105 0 : case ISD::SETOGT:
7106 : case ISD::SETOGE:
7107 : case ISD::SETGT:
7108 : case ISD::SETGE:
7109 : case ISD::SETUGT:
7110 : case ISD::SETUGE: {
7111 : unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7112 0 : if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7113 0 : return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7114 0 : return SDValue();
7115 : }
7116 0 : default:
7117 0 : return SDValue();
7118 : }
7119 : }
7120 :
7121 31935 : SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7122 31935 : SDValue Cond = N->getOperand(0);
7123 31935 : SDValue N1 = N->getOperand(1);
7124 31935 : SDValue N2 = N->getOperand(2);
7125 63870 : EVT VT = N->getValueType(0);
7126 31935 : EVT CondVT = Cond.getValueType();
7127 : SDLoc DL(N);
7128 :
7129 31935 : if (!VT.isInteger())
7130 7164 : return SDValue();
7131 :
7132 : auto *C1 = dyn_cast<ConstantSDNode>(N1);
7133 : auto *C2 = dyn_cast<ConstantSDNode>(N2);
7134 24771 : if (!C1 || !C2)
7135 20499 : return SDValue();
7136 :
7137 : // Only do this before legalization to avoid conflicting with target-specific
7138 : // transforms in the other direction (create a select from a zext/sext). There
7139 : // is also a target-independent combine here in DAGCombiner in the other
7140 : // direction for (select Cond, -1, 0) when the condition is not i1.
7141 2865 : if (CondVT == MVT::i1 && !LegalOperations) {
7142 2849 : if (C1->isNullValue() && C2->isOne()) {
7143 : // select Cond, 0, 1 --> zext (!Cond)
7144 66 : SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7145 : if (VT != MVT::i1)
7146 66 : NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7147 33 : return NotCond;
7148 : }
7149 1438 : if (C1->isNullValue() && C2->isAllOnesValue()) {
7150 : // select Cond, 0, -1 --> sext (!Cond)
7151 52 : SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7152 : if (VT != MVT::i1)
7153 52 : NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7154 26 : return NotCond;
7155 : }
7156 1446 : if (C1->isOne() && C2->isNullValue()) {
7157 : // select Cond, 1, 0 --> zext (Cond)
7158 : if (VT != MVT::i1)
7159 156 : Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7160 78 : return Cond;
7161 : }
7162 1329 : if (C1->isAllOnesValue() && C2->isNullValue()) {
7163 : // select Cond, -1, 0 --> sext (Cond)
7164 : if (VT != MVT::i1)
7165 114 : Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7166 57 : return Cond;
7167 : }
7168 :
7169 : // For any constants that differ by 1, we can transform the select into an
7170 : // extend and add. Use a target hook because some targets may prefer to
7171 : // transform in the other direction.
7172 1151 : if (TLI.convertSelectOfConstantsToMath(VT)) {
7173 1800 : if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7174 : // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7175 : if (VT != MVT::i1)
7176 66 : Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7177 66 : return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7178 : }
7179 1701 : if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7180 : // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7181 : if (VT != MVT::i1)
7182 170 : Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7183 170 : return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7184 : }
7185 : }
7186 :
7187 1033 : return SDValue();
7188 : }
7189 :
7190 : // fold (select Cond, 0, 1) -> (xor Cond, 1)
7191 : // We can't do this reliably if integer based booleans have different contents
7192 : // to floating point based booleans. This is because we can't tell whether we
7193 : // have an integer-based boolean or a floating-point-based boolean unless we
7194 : // can find the SETCC that produced it and inspect its operands. This is
7195 : // fairly easy if C is the SETCC node, but it can potentially be
7196 : // undiscoverable (or not reasonably discoverable). For example, it could be
7197 : // in another basic block or it could require searching a complicated
7198 : // expression.
7199 2927 : if (CondVT.isInteger() &&
7200 2927 : TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7201 1273 : TargetLowering::ZeroOrOneBooleanContent &&
7202 : TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7203 1273 : TargetLowering::ZeroOrOneBooleanContent &&
7204 4942 : C1->isNullValue() && C2->isOne()) {
7205 : SDValue NotCond =
7206 0 : DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7207 0 : if (VT.bitsEq(CondVT))
7208 0 : return NotCond;
7209 0 : return DAG.getZExtOrTrunc(NotCond, DL, VT);
7210 : }
7211 :
7212 2927 : return SDValue();
7213 : }
7214 :
7215 32115 : SDValue DAGCombiner::visitSELECT(SDNode *N) {
7216 32115 : SDValue N0 = N->getOperand(0);
7217 32115 : SDValue N1 = N->getOperand(1);
7218 32115 : SDValue N2 = N->getOperand(2);
7219 64230 : EVT VT = N->getValueType(0);
7220 32115 : EVT VT0 = N0.getValueType();
7221 : SDLoc DL(N);
7222 :
7223 : // fold (select C, X, X) -> X
7224 32115 : if (N1 == N2)
7225 70 : return N1;
7226 :
7227 : if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
7228 : // fold (select true, X, Y) -> X
7229 : // fold (select false, X, Y) -> Y
7230 230 : return !N0C->isNullValue() ? N1 : N2;
7231 : }
7232 :
7233 : // fold (select X, X, Y) -> (or X, Y)
7234 : // fold (select X, 1, Y) -> (or C, Y)
7235 102 : if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7236 54 : return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7237 :
7238 31935 : if (SDValue V = foldSelectOfConstants(N))
7239 312 : return V;
7240 :
7241 : // fold (select C, 0, X) -> (and (not C), X)
7242 81 : if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7243 7 : SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7244 7 : AddToWorklist(NOTNode.getNode());
7245 14 : return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7246 : }
7247 : // fold (select C, X, 1) -> (or (not C), X)
7248 74 : if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7249 2 : SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7250 2 : AddToWorklist(NOTNode.getNode());
7251 4 : return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7252 : }
7253 : // fold (select X, Y, X) -> (and X, Y)
7254 : // fold (select X, Y, 0) -> (and X, Y)
7255 70 : if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7256 30 : return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7257 :
7258 : // If we can fold this based on the true/false value, do so.
7259 31599 : if (SimplifySelectOps(N, N1, N2))
7260 108 : return SDValue(N, 0); // Don't revisit N.
7261 :
7262 : if (VT0 == MVT::i1) {
7263 : // The code in this block deals with the following 2 equivalences:
7264 : // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7265 : // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7266 : // The target can specify its preferred form with the
7267 : // shouldNormalizeToSelectSequence() callback. However we always transform
7268 : // to the right anyway if we find the inner select exists in the DAG anyway
7269 : // and we always transform to the left side if we know that we can further
7270 : // optimize the combination of the conditions.
7271 : bool normalizeToSequence =
7272 21913 : TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7273 : // select (and Cond0, Cond1), X, Y
7274 : // -> select Cond0, (select Cond1, X, Y), Y
7275 21913 : if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7276 688 : SDValue Cond0 = N0->getOperand(0);
7277 688 : SDValue Cond1 = N0->getOperand(1);
7278 : SDValue InnerSelect =
7279 2064 : DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7280 688 : if (normalizeToSequence || !InnerSelect.use_empty())
7281 38 : return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7282 38 : InnerSelect, N2);
7283 : }
7284 : // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7285 21875 : if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7286 177 : SDValue Cond0 = N0->getOperand(0);
7287 177 : SDValue Cond1 = N0->getOperand(1);
7288 : SDValue InnerSelect =
7289 531 : DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7290 177 : if (normalizeToSequence || !InnerSelect.use_empty())
7291 54 : return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7292 54 : InnerSelect);
7293 : }
7294 :
7295 : // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7296 21821 : if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7297 1425 : SDValue N1_0 = N1->getOperand(0);
7298 1425 : SDValue N1_1 = N1->getOperand(1);
7299 1425 : SDValue N1_2 = N1->getOperand(2);
7300 1425 : if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7301 : // Create the actual and node if we can generate good code for it.
7302 214 : if (!normalizeToSequence) {
7303 292 : SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7304 438 : return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7305 : }
7306 : // Otherwise see if we can optimize the "and" to a better pattern.
7307 68 : if (SDValue Combined = visitANDLike(N0, N1_0, N))
7308 6 : return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7309 12 : N2);
7310 : }
7311 : }
7312 : // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7313 21669 : if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7314 833 : SDValue N2_0 = N2->getOperand(0);
7315 833 : SDValue N2_1 = N2->getOperand(1);
7316 833 : SDValue N2_2 = N2->getOperand(2);
7317 833 : if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7318 : // Create the actual or node if we can generate good code for it.
7319 117 : if (!normalizeToSequence) {
7320 34 : SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7321 34 : return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7322 : }
7323 : // Otherwise see if we can optimize to a better pattern.
7324 100 : if (SDValue Combined = visitORLike(N0, N2_0, N))
7325 6 : return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7326 6 : N2_2);
7327 : }
7328 : }
7329 : }
7330 :
7331 : if (VT0 == MVT::i1) {
7332 : // select (not Cond), N1, N2 -> select Cond, N2, N1
7333 21646 : if (isBitwiseNot(N0))
7334 564 : return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7335 : }
7336 :
7337 : // Fold selects based on a setcc into other things, such as min/max/abs.
7338 31036 : if (N0.getOpcode() == ISD::SETCC) {
7339 25213 : SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7340 25213 : ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7341 :
7342 : // select (fcmp lt x, y), x, y -> fminnum x, y
7343 : // select (fcmp gt x, y), x, y -> fmaxnum x, y
7344 : //
7345 : // This is OK if we don't care what happens if either operand is a NaN.
7346 25213 : if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
7347 136 : if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7348 136 : CC, TLI, DAG))
7349 103 : return FMinMax;
7350 :
7351 : // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7352 : // This is conservatively limited to pre-legal-operations to give targets
7353 : // a chance to reverse the transform if they want to do that. Also, it is
7354 : // unlikely that the pattern would be formed late, so it's probably not
7355 : // worth going through the other checks.
7356 25036 : if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7357 703 : CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7358 25147 : N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7359 : auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7360 : auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7361 68 : if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7362 : // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7363 : // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7364 : //
7365 : // The IR equivalent of this transform would have this form:
7366 : // %a = add %x, C
7367 : // %c = icmp ugt %x, ~C
7368 : // %r = select %c, -1, %a
7369 : // =>
7370 : // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7371 : // %u0 = extractvalue %u, 0
7372 : // %u1 = extractvalue %u, 1
7373 : // %r = select %u1, -1, %u0
7374 10 : SDVTList VTs = DAG.getVTList(VT, VT0);
7375 20 : SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7376 10 : return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7377 : }
7378 : }
7379 :
7380 25100 : if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7381 24739 : (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7382 3598 : return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7383 3598 : N0.getOperand(2));
7384 :
7385 21502 : return SimplifySelect(DL, N0, N1, N2);
7386 : }
7387 :
7388 5823 : return SDValue();
7389 : }
7390 :
7391 : static
7392 19 : std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7393 : SDLoc DL(N);
7394 : EVT LoVT, HiVT;
7395 38 : std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7396 :
7397 : // Split the inputs.
7398 : SDValue Lo, Hi, LL, LH, RL, RH;
7399 19 : std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7400 19 : std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7401 :
7402 57 : Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7403 57 : Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7404 :
7405 19 : return std::make_pair(Lo, Hi);
7406 : }
7407 :
7408 : // This function assumes all the vselect's arguments are CONCAT_VECTOR
7409 : // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7410 43 : static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7411 : SDLoc DL(N);
7412 43 : SDValue Cond = N->getOperand(0);
7413 43 : SDValue LHS = N->getOperand(1);
7414 43 : SDValue RHS = N->getOperand(2);
7415 86 : EVT VT = N->getValueType(0);
7416 43 : int NumElems = VT.getVectorNumElements();
7417 : assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7418 : RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7419 : Cond.getOpcode() == ISD::BUILD_VECTOR);
7420 :
7421 : // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7422 : // binary ones here.
7423 43 : if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7424 11 : return SDValue();
7425 :
7426 : // We're sure we have an even number of elements due to the
7427 : // concat_vectors we have as arguments to vselect.
7428 : // Skip BV elements until we find one that's not an UNDEF
7429 : // After we find an UNDEF element, keep looping until we get to half the
7430 : // length of the BV and see if all the non-undef nodes are the same.
7431 : ConstantSDNode *BottomHalf = nullptr;
7432 98 : for (int i = 0; i < NumElems / 2; ++i) {
7433 170 : if (Cond->getOperand(i)->isUndef())
7434 : continue;
7435 :
7436 85 : if (BottomHalf == nullptr)
7437 : BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7438 53 : else if (Cond->getOperand(i).getNode() != BottomHalf)
7439 19 : return SDValue();
7440 : }
7441 :
7442 : // Do the same for the second half of the BuildVector
7443 : ConstantSDNode *TopHalf = nullptr;
7444 48 : for (int i = NumElems / 2; i < NumElems; ++i) {
7445 76 : if (Cond->getOperand(i)->isUndef())
7446 : continue;
7447 :
7448 38 : if (TopHalf == nullptr)
7449 : TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7450 25 : else if (Cond->getOperand(i).getNode() != TopHalf)
7451 3 : return SDValue();
7452 : }
7453 :
7454 : assert(TopHalf && BottomHalf &&
7455 : "One half of the selector was all UNDEFs and the other was all the "
7456 : "same value. This should have been addressed before this function.");
7457 : return DAG.getNode(
7458 : ISD::CONCAT_VECTORS, DL, VT,
7459 20 : BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7460 30 : TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7461 : }
7462 :
7463 287 : SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7464 287 : if (Level >= AfterLegalizeTypes)
7465 166 : return SDValue();
7466 :
7467 : MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7468 121 : SDValue Mask = MSC->getMask();
7469 121 : SDValue Data = MSC->getValue();
7470 : SDLoc DL(N);
7471 :
7472 : // If the MSCATTER data type requires splitting and the mask is provided by a
7473 : // SETCC, then split both nodes and its operands before legalization. This
7474 : // prevents the type legalizer from unrolling SETCC into scalar comparisons
7475 : // and enables future optimizations (e.g. min/max pattern matching on X86).
7476 121 : if (Mask.getOpcode() != ISD::SETCC)
7477 96 : return SDValue();
7478 :
7479 : // Check if any splitting is required.
7480 50 : if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7481 : TargetLowering::TypeSplitVector)
7482 20 : return SDValue();
7483 : SDValue MaskLo, MaskHi;
7484 5 : std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7485 :
7486 : EVT LoVT, HiVT;
7487 10 : std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7488 :
7489 5 : SDValue Chain = MSC->getChain();
7490 :
7491 5 : EVT MemoryVT = MSC->getMemoryVT();
7492 5 : unsigned Alignment = MSC->getOriginalAlignment();
7493 :
7494 5 : EVT LoMemVT, HiMemVT;
7495 5 : std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7496 :
7497 : SDValue DataLo, DataHi;
7498 5 : std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7499 :
7500 5 : SDValue Scale = MSC->getScale();
7501 5 : SDValue BasePtr = MSC->getBasePtr();
7502 : SDValue IndexLo, IndexHi;
7503 10 : std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7504 :
7505 5 : MachineMemOperand *MMO = DAG.getMachineFunction().
7506 15 : getMachineMemOperand(MSC->getPointerInfo(),
7507 : MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
7508 5 : Alignment, MSC->getAAInfo(), MSC->getRanges());
7509 :
7510 5 : SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7511 5 : SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7512 10 : DataLo.getValueType(), DL, OpsLo, MMO);
7513 :
7514 : // The order of the Scatter operation after split is well defined. The "Hi"
7515 : // part comes after the "Lo". So these two operations should be chained one
7516 : // after another.
7517 5 : SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7518 5 : return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7519 10 : DL, OpsHi, MMO);
7520 : }
7521 :
7522 761 : SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7523 761 : if (Level >= AfterLegalizeTypes)
7524 448 : return SDValue();
7525 :
7526 : MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7527 313 : SDValue Mask = MST->getMask();
7528 313 : SDValue Data = MST->getValue();
7529 626 : EVT VT = Data.getValueType();
7530 : SDLoc DL(N);
7531 :
7532 : // If the MSTORE data type requires splitting and the mask is provided by a
7533 : // SETCC, then split both nodes and its operands before legalization. This
7534 : // prevents the type legalizer from unrolling SETCC into scalar comparisons
7535 : // and enables future optimizations (e.g. min/max pattern matching on X86).
7536 313 : if (Mask.getOpcode() == ISD::SETCC) {
7537 : // Check if any splitting is required.
7538 36 : if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7539 : TargetLowering::TypeSplitVector)
7540 34 : return SDValue();
7541 :
7542 : SDValue MaskLo, MaskHi, Lo, Hi;
7543 2 : std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7544 :
7545 2 : SDValue Chain = MST->getChain();
7546 2 : SDValue Ptr = MST->getBasePtr();
7547 :
7548 2 : EVT MemoryVT = MST->getMemoryVT();
7549 2 : unsigned Alignment = MST->getOriginalAlignment();
7550 :
7551 : // if Alignment is equal to the vector size,
7552 : // take the half of it for the second part
7553 : unsigned SecondHalfAlignment =
7554 2 : (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7555 :
7556 2 : EVT LoMemVT, HiMemVT;
7557 2 : std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7558 :
7559 : SDValue DataLo, DataHi;
7560 2 : std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7561 :
7562 2 : MachineMemOperand *MMO = DAG.getMachineFunction().
7563 6 : getMachineMemOperand(MST->getPointerInfo(),
7564 : MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
7565 2 : Alignment, MST->getAAInfo(), MST->getRanges());
7566 :
7567 2 : Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7568 2 : MST->isTruncatingStore(),
7569 4 : MST->isCompressingStore());
7570 :
7571 2 : Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7572 2 : MST->isCompressingStore());
7573 : unsigned HiOffset = LoMemVT.getStoreSize();
7574 :
7575 6 : MMO = DAG.getMachineFunction().getMachineMemOperand(
7576 : MST->getPointerInfo().getWithOffset(HiOffset),
7577 : MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7578 2 : MST->getAAInfo(), MST->getRanges());
7579 :
7580 2 : Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7581 2 : MST->isTruncatingStore(),
7582 4 : MST->isCompressingStore());
7583 :
7584 2 : AddToWorklist(Lo.getNode());
7585 2 : AddToWorklist(Hi.getNode());
7586 :
7587 4 : return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7588 : }
7589 277 : return SDValue();
7590 : }
7591 :
7592 994 : SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7593 994 : if (Level >= AfterLegalizeTypes)
7594 508 : return SDValue();
7595 :
7596 : MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7597 486 : SDValue Mask = MGT->getMask();
7598 : SDLoc DL(N);
7599 :
7600 : // If the MGATHER result requires splitting and the mask is provided by a
7601 : // SETCC, then split both nodes and its operands before legalization. This
7602 : // prevents the type legalizer from unrolling SETCC into scalar comparisons
7603 : // and enables future optimizations (e.g. min/max pattern matching on X86).
7604 :
7605 486 : if (Mask.getOpcode() != ISD::SETCC)
7606 456 : return SDValue();
7607 :
7608 30 : EVT VT = N->getValueType(0);
7609 :
7610 : // Check if any splitting is required.
7611 30 : if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7612 : TargetLowering::TypeSplitVector)
7613 24 : return SDValue();
7614 :
7615 : SDValue MaskLo, MaskHi, Lo, Hi;
7616 6 : std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7617 :
7618 6 : SDValue PassThru = MGT->getPassThru();
7619 : SDValue PassThruLo, PassThruHi;
7620 6 : std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7621 :
7622 : EVT LoVT, HiVT;
7623 6 : std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7624 :
7625 6 : SDValue Chain = MGT->getChain();
7626 6 : EVT MemoryVT = MGT->getMemoryVT();
7627 6 : unsigned Alignment = MGT->getOriginalAlignment();
7628 :
7629 6 : EVT LoMemVT, HiMemVT;
7630 6 : std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7631 :
7632 6 : SDValue Scale = MGT->getScale();
7633 6 : SDValue BasePtr = MGT->getBasePtr();
7634 6 : SDValue Index = MGT->getIndex();
7635 : SDValue IndexLo, IndexHi;
7636 6 : std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7637 :
7638 6 : MachineMemOperand *MMO = DAG.getMachineFunction().
7639 18 : getMachineMemOperand(MGT->getPointerInfo(),
7640 : MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
7641 6 : Alignment, MGT->getAAInfo(), MGT->getRanges());
7642 :
7643 6 : SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7644 6 : Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7645 6 : MMO);
7646 :
7647 6 : SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7648 6 : Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7649 6 : MMO);
7650 :
7651 6 : AddToWorklist(Lo.getNode());
7652 6 : AddToWorklist(Hi.getNode());
7653 :
7654 : // Build a factor node to remember that this load is independent of the
7655 : // other one.
7656 6 : Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7657 6 : Hi.getValue(1));
7658 :
7659 : // Legalized the chain result - switch anything that used the old chain to
7660 : // use the new one.
7661 12 : DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7662 :
7663 12 : SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7664 :
7665 6 : SDValue RetOps[] = { GatherRes, Chain };
7666 12 : return DAG.getMergeValues(RetOps, DL);
7667 : }
7668 :
7669 1291 : SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7670 1291 : if (Level >= AfterLegalizeTypes)
7671 722 : return SDValue();
7672 :
7673 : MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7674 569 : SDValue Mask = MLD->getMask();
7675 : SDLoc DL(N);
7676 :
7677 : // If the MLOAD result requires splitting and the mask is provided by a
7678 : // SETCC, then split both nodes and its operands before legalization. This
7679 : // prevents the type legalizer from unrolling SETCC into scalar comparisons
7680 : // and enables future optimizations (e.g. min/max pattern matching on X86).
7681 569 : if (Mask.getOpcode() == ISD::SETCC) {
7682 76 : EVT VT = N->getValueType(0);
7683 :
7684 : // Check if any splitting is required.
7685 76 : if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7686 : TargetLowering::TypeSplitVector)
7687 70 : return SDValue();
7688 :
7689 : SDValue MaskLo, MaskHi, Lo, Hi;
7690 6 : std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7691 :
7692 6 : SDValue PassThru = MLD->getPassThru();
7693 : SDValue PassThruLo, PassThruHi;
7694 6 : std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7695 :
7696 : EVT LoVT, HiVT;
7697 12 : std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7698 :
7699 6 : SDValue Chain = MLD->getChain();
7700 6 : SDValue Ptr = MLD->getBasePtr();
7701 6 : EVT MemoryVT = MLD->getMemoryVT();
7702 6 : unsigned Alignment = MLD->getOriginalAlignment();
7703 :
7704 : // if Alignment is equal to the vector size,
7705 : // take the half of it for the second part
7706 : unsigned SecondHalfAlignment =
7707 12 : (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7708 : Alignment/2 : Alignment;
7709 :
7710 6 : EVT LoMemVT, HiMemVT;
7711 6 : std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7712 :
7713 6 : MachineMemOperand *MMO = DAG.getMachineFunction().
7714 18 : getMachineMemOperand(MLD->getPointerInfo(),
7715 : MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
7716 6 : Alignment, MLD->getAAInfo(), MLD->getRanges());
7717 :
7718 6 : Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
7719 6 : MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7720 :
7721 6 : Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7722 6 : MLD->isExpandingLoad());
7723 : unsigned HiOffset = LoMemVT.getStoreSize();
7724 :
7725 18 : MMO = DAG.getMachineFunction().getMachineMemOperand(
7726 : MLD->getPointerInfo().getWithOffset(HiOffset),
7727 : MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7728 6 : MLD->getAAInfo(), MLD->getRanges());
7729 :
7730 6 : Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
7731 6 : MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7732 :
7733 6 : AddToWorklist(Lo.getNode());
7734 6 : AddToWorklist(Hi.getNode());
7735 :
7736 : // Build a factor node to remember that this load is independent of the
7737 : // other one.
7738 6 : Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7739 6 : Hi.getValue(1));
7740 :
7741 : // Legalized the chain result - switch anything that used the old chain to
7742 : // use the new one.
7743 12 : DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7744 :
7745 12 : SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7746 :
7747 6 : SDValue RetOps[] = { LoadRes, Chain };
7748 12 : return DAG.getMergeValues(RetOps, DL);
7749 : }
7750 493 : return SDValue();
7751 : }
7752 :
7753 : /// A vector select of 2 constant vectors can be simplified to math/logic to
7754 : /// avoid a variable select instruction and possibly avoid constant loads.
7755 0 : SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7756 0 : SDValue Cond = N->getOperand(0);
7757 0 : SDValue N1 = N->getOperand(1);
7758 0 : SDValue N2 = N->getOperand(2);
7759 0 : EVT VT = N->getValueType(0);
7760 0 : if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7761 0 : !TLI.convertSelectOfConstantsToMath(VT) ||
7762 0 : !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7763 0 : !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7764 0 : return SDValue();
7765 :
7766 : // Check if we can use the condition value to increment/decrement a single
7767 : // constant value. This simplifies a select to an add and removes a constant
7768 : // load/materialization from the general case.
7769 : bool AllAddOne = true;
7770 : bool AllSubOne = true;
7771 : unsigned Elts = VT.getVectorNumElements();
7772 0 : for (unsigned i = 0; i != Elts; ++i) {
7773 0 : SDValue N1Elt = N1.getOperand(i);
7774 0 : SDValue N2Elt = N2.getOperand(i);
7775 0 : if (N1Elt.isUndef() || N2Elt.isUndef())
7776 0 : continue;
7777 :
7778 0 : const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7779 0 : const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7780 0 : if (C1 != C2 + 1)
7781 : AllAddOne = false;
7782 0 : if (C1 != C2 - 1)
7783 : AllSubOne = false;
7784 : }
7785 :
7786 : // Further simplifications for the extra-special cases where the constants are
7787 : // all 0 or all -1 should be implemented as folds of these patterns.
7788 : SDLoc DL(N);
7789 0 : if (AllAddOne || AllSubOne) {
7790 : // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7791 : // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7792 0 : auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7793 0 : SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7794 0 : return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7795 : }
7796 :
7797 : // The general case for select-of-constants:
7798 : // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7799 : // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7800 : // leave that to a machine-specific pass.
7801 0 : return SDValue();
7802 : }
7803 :
7804 39265 : SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7805 39265 : SDValue N0 = N->getOperand(0);
7806 39265 : SDValue N1 = N->getOperand(1);
7807 39265 : SDValue N2 = N->getOperand(2);
7808 : SDLoc DL(N);
7809 :
7810 : // fold (vselect C, X, X) -> X
7811 : if (N1 == N2)
7812 4 : return N1;
7813 :
7814 : // Canonicalize integer abs.
7815 : // vselect (setg[te] X, 0), X, -X ->
7816 : // vselect (setgt X, -1), X, -X ->
7817 : // vselect (setl[te] X, 0), -X, X ->
7818 : // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7819 39261 : if (N0.getOpcode() == ISD::SETCC) {
7820 12694 : SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7821 12694 : ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7822 : bool isAbs = false;
7823 12694 : bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7824 :
7825 20386 : if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7826 12564 : (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7827 13062 : N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7828 356 : isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7829 7722 : else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7830 12425 : N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7831 87 : isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7832 :
7833 443 : if (isAbs) {
7834 443 : EVT VT = LHS.getValueType();
7835 443 : if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7836 712 : return DAG.getNode(ISD::ABS, DL, VT, LHS);
7837 :
7838 87 : SDValue Shift = DAG.getNode(
7839 : ISD::SRA, DL, VT, LHS,
7840 87 : DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7841 174 : SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7842 87 : AddToWorklist(Shift.getNode());
7843 87 : AddToWorklist(Add.getNode());
7844 174 : return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7845 : }
7846 :
7847 : // vselect x, y (fcmp lt x, y) -> fminnum x, y
7848 : // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
7849 : //
7850 : // This is OK if we don't care about what happens if either operand is a
7851 : // NaN.
7852 : //
7853 24502 : EVT VT = N->getValueType(0);
7854 12251 : if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
7855 35 : ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7856 35 : if (SDValue FMinMax = combineMinNumMaxNum(
7857 70 : DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
7858 10 : return FMinMax;
7859 : }
7860 :
7861 : // If this select has a condition (setcc) with narrower operands than the
7862 : // select, try to widen the compare to match the select width.
7863 : // TODO: This should be extended to handle any constant.
7864 : // TODO: This could be extended to handle non-loading patterns, but that
7865 : // requires thorough testing to avoid regressions.
7866 12241 : if (isNullConstantOrNullSplatConstant(RHS)) {
7867 : EVT NarrowVT = LHS.getValueType();
7868 4772 : EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
7869 4772 : EVT SetCCVT = getSetCCResultType(LHS.getValueType());
7870 : unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
7871 : unsigned WideWidth = WideVT.getScalarSizeInBits();
7872 : bool IsSigned = isSignedIntSetCC(CC);
7873 4772 : auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7874 53 : if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
7875 51 : SetCCWidth != 1 && SetCCWidth < WideWidth &&
7876 4790 : TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
7877 : TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
7878 : // Both compare operands can be widened for free. The LHS can use an
7879 : // extended load, and the RHS is a constant:
7880 : // vselect (ext (setcc load(X), C)), N1, N2 -->
7881 : // vselect (setcc extload(X), C'), N1, N2
7882 10 : auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7883 20 : SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
7884 20 : SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
7885 10 : EVT WideSetCCVT = getSetCCResultType(WideVT);
7886 10 : SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
7887 20 : return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
7888 : }
7889 : }
7890 : }
7891 :
7892 38798 : if (SimplifySelectOps(N, N1, N2))
7893 6 : return SDValue(N, 0); // Don't revisit N.
7894 :
7895 : // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7896 38792 : if (ISD::isBuildVectorAllOnes(N0.getNode()))
7897 69 : return N1;
7898 : // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7899 38723 : if (ISD::isBuildVectorAllZeros(N0.getNode()))
7900 365 : return N2;
7901 :
7902 : // The ConvertSelectToConcatVector function is assuming both the above
7903 : // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7904 : // and addressed.
7905 639 : if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7906 38544 : N2.getOpcode() == ISD::CONCAT_VECTORS &&
7907 186 : ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7908 43 : if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7909 10 : return CV;
7910 : }
7911 :
7912 38348 : if (SDValue V = foldVSelectOfConstants(N))
7913 40 : return V;
7914 :
7915 38308 : return SDValue();
7916 : }
7917 :
7918 16226 : SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7919 16226 : SDValue N0 = N->getOperand(0);
7920 16226 : SDValue N1 = N->getOperand(1);
7921 16226 : SDValue N2 = N->getOperand(2);
7922 16226 : SDValue N3 = N->getOperand(3);
7923 16226 : SDValue N4 = N->getOperand(4);
7924 16226 : ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7925 :
7926 : // fold select_cc lhs, rhs, x, x, cc -> x
7927 : if (N2 == N3)
7928 27 : return N2;
7929 :
7930 : // Determine if the condition we're dealing with is constant
7931 16199 : if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7932 32406 : CC, SDLoc(N), false)) {
7933 325 : AddToWorklist(SCC.getNode());
7934 :
7935 : if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7936 54 : if (!SCCC->isNullValue())
7937 17 : return N2; // cond always true -> true val
7938 : else
7939 10 : return N3; // cond always false -> false val
7940 298 : } else if (SCC->isUndef()) {
7941 : // When the condition is UNDEF, just return the first operand. This is
7942 : // coherent the DAG creation, no setcc node is created in this case
7943 0 : return N2;
7944 298 : } else if (SCC.getOpcode() == ISD::SETCC) {
7945 : // Fold to a simpler select_cc
7946 556 : return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7947 : SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7948 278 : SCC.getOperand(2));
7949 : }
7950 : }
7951 :
7952 : // If we can fold this based on the true/false value, do so.
7953 15894 : if (SimplifySelectOps(N, N2, N3))
7954 0 : return SDValue(N, 0); // Don't revisit N.
7955 :
7956 : // fold select_cc into other things, such as min/max/abs
7957 31788 : return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7958 : }
7959 :
7960 298267 : SDValue DAGCombiner::visitSETCC(SDNode *N) {
7961 : // setcc is very commonly used as an argument to brcond. This pattern
7962 : // also lend itself to numerous combines and, as a result, it is desired
7963 : // we keep the argument to a brcond as a setcc as much as possible.
7964 : bool PreferSetCC =
7965 292820 : N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7966 :
7967 : SDValue Combined = SimplifySetCC(
7968 : N->getValueType(0), N->getOperand(0), N->getOperand(1),
7969 573879 : cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7970 :
7971 298267 : if (!Combined)
7972 281078 : return SDValue();
7973 :
7974 : // If we prefer to have a setcc, and we don't, we'll try our best to
7975 : // recreate one using rebuildSetCC.
7976 17189 : if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7977 685 : SDValue NewSetCC = rebuildSetCC(Combined);
7978 :
7979 : // We don't have anything interesting to combine to.
7980 685 : if (NewSetCC.getNode() == N)
7981 115 : return SDValue();
7982 :
7983 570 : if (NewSetCC)
7984 0 : return NewSetCC;
7985 : }
7986 :
7987 17074 : return Combined;
7988 : }
7989 :
7990 0 : SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7991 0 : SDValue LHS = N->getOperand(0);
7992 0 : SDValue RHS = N->getOperand(1);
7993 0 : SDValue Carry = N->getOperand(2);
7994 0 : SDValue Cond = N->getOperand(3);
7995 :
7996 : // If Carry is false, fold to a regular SETCC.
7997 0 : if (isNullConstant(Carry))
7998 0 : return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7999 :
8000 0 : return SDValue();
8001 : }
8002 :
8003 : /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8004 : /// a build_vector of constants.
8005 : /// This function is called by the DAGCombiner when visiting sext/zext/aext
8006 : /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8007 : /// Vector extends are not folded if operations are legal; this is to
8008 : /// avoid introducing illegal build_vector dag nodes.
8009 290803 : static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8010 : SelectionDAG &DAG, bool LegalTypes,
8011 : bool LegalOperations) {
8012 290803 : unsigned Opcode = N->getOpcode();
8013 290803 : SDValue N0 = N->getOperand(0);
8014 581606 : EVT VT = N->getValueType(0);
8015 :
8016 : assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8017 : Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8018 : Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8019 : && "Expected EXTEND dag node in input!");
8020 :
8021 : // fold (sext c1) -> c1
8022 : // fold (zext c1) -> c1
8023 : // fold (aext c1) -> c1
8024 : if (isa<ConstantSDNode>(N0))
8025 567 : return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
8026 :
8027 : // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8028 : // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8029 : // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8030 290236 : EVT SVT = VT.getScalarType();
8031 311556 : if (!(VT.isVector() &&
8032 9658 : (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
8033 21320 : ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8034 290158 : return nullptr;
8035 :
8036 : // We can fold this node into a build_vector.
8037 78 : unsigned VTBits = SVT.getSizeInBits();
8038 156 : unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8039 : SmallVector<SDValue, 8> Elts;
8040 : unsigned NumElts = VT.getVectorNumElements();
8041 : SDLoc DL(N);
8042 :
8043 486 : for (unsigned i=0; i != NumElts; ++i) {
8044 408 : SDValue Op = N0->getOperand(i);
8045 408 : if (Op->isUndef()) {
8046 82 : Elts.push_back(DAG.getUNDEF(SVT));
8047 82 : continue;
8048 : }
8049 :
8050 : SDLoc DL(Op);
8051 : // Get the constant value and if needed trunc it to the size of the type.
8052 : // Nodes like build_vector might have constants wider than the scalar type.
8053 652 : APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8054 326 : if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8055 112 : Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8056 : else
8057 214 : Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8058 : }
8059 :
8060 78 : return DAG.getBuildVector(VT, DL, Elts).getNode();
8061 : }
8062 :
8063 : // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8064 : // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8065 : // transformation. Returns true if extension are possible and the above
8066 : // mentioned transformation is profitable.
8067 0 : static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8068 : unsigned ExtOpc,
8069 : SmallVectorImpl<SDNode *> &ExtendNodes,
8070 : const TargetLowering &TLI) {
8071 : bool HasCopyToRegUses = false;
8072 0 : bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8073 0 : for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8074 : UE = N0.getNode()->use_end();
8075 0 : UI != UE; ++UI) {
8076 0 : SDNode *User = *UI;
8077 0 : if (User == N)
8078 0 : continue;
8079 0 : if (UI.getUse().getResNo() != N0.getResNo())
8080 0 : continue;
8081 : // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8082 0 : if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8083 0 : ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8084 0 : if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8085 : // Sign bits will be lost after a zext.
8086 0 : return false;
8087 : bool Add = false;
8088 0 : for (unsigned i = 0; i != 2; ++i) {
8089 0 : SDValue UseOp = User->getOperand(i);
8090 0 : if (UseOp == N0)
8091 0 : continue;
8092 : if (!isa<ConstantSDNode>(UseOp))
8093 0 : return false;
8094 : Add = true;
8095 : }
8096 0 : if (Add)
8097 0 : ExtendNodes.push_back(User);
8098 0 : continue;
8099 : }
8100 : // If truncates aren't free and there are users we can't
8101 : // extend, it isn't worthwhile.
8102 0 : if (!isTruncFree)
8103 0 : return false;
8104 : // Remember if this value is live-out.
8105 0 : if (User->getOpcode() == ISD::CopyToReg)
8106 : HasCopyToRegUses = true;
8107 : }
8108 :
8109 0 : if (HasCopyToRegUses) {
8110 : bool BothLiveOut = false;
8111 0 : for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8112 0 : UI != UE; ++UI) {
8113 : SDUse &Use = UI.getUse();
8114 0 : if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8115 : BothLiveOut = true;
8116 : break;
8117 : }
8118 : }
8119 0 : if (BothLiveOut)
8120 : // Both unextended and extended values are live out. There had better be
8121 : // a good reason for the transformation.
8122 0 : return ExtendNodes.size();
8123 : }
8124 : return true;
8125 : }
8126 :
8127 0 : void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8128 : SDValue OrigLoad, SDValue ExtLoad,
8129 : ISD::NodeType ExtType) {
8130 : // Extend SetCC uses if necessary.
8131 0 : SDLoc DL(ExtLoad);
8132 0 : for (SDNode *SetCC : SetCCs) {
8133 : SmallVector<SDValue, 4> Ops;
8134 :
8135 0 : for (unsigned j = 0; j != 2; ++j) {
8136 0 : SDValue SOp = SetCC->getOperand(j);
8137 0 : if (SOp == OrigLoad)
8138 0 : Ops.push_back(ExtLoad);
8139 : else
8140 0 : Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8141 : }
8142 :
8143 0 : Ops.push_back(SetCC->getOperand(2));
8144 0 : CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8145 : }
8146 0 : }
8147 :
8148 : // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8149 131440 : SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8150 131440 : SDValue N0 = N->getOperand(0);
8151 262880 : EVT DstVT = N->getValueType(0);
8152 : EVT SrcVT = N0.getValueType();
8153 :
8154 : assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8155 : N->getOpcode() == ISD::ZERO_EXTEND) &&
8156 : "Unexpected node type (not an extend)!");
8157 :
8158 : // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8159 : // For example, on a target with legal v4i32, but illegal v8i32, turn:
8160 : // (v8i32 (sext (v8i16 (load x))))
8161 : // into:
8162 : // (v8i32 (concat_vectors (v4i32 (sextload x)),
8163 : // (v4i32 (sextload (x + 16)))))
8164 : // Where uses of the original load, i.e.:
8165 : // (v8i16 (load x))
8166 : // are replaced with:
8167 : // (v8i16 (truncate
8168 : // (v8i32 (concat_vectors (v4i32 (sextload x)),
8169 : // (v4i32 (sextload (x + 16)))))))
8170 : //
8171 : // This combine is only applicable to illegal, but splittable, vectors.
8172 : // All legal types, and illegal non-vector types, are handled elsewhere.
8173 : // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8174 : //
8175 131440 : if (N0->getOpcode() != ISD::LOAD)
8176 127356 : return SDValue();
8177 :
8178 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8179 :
8180 3827 : if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8181 6313 : !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8182 2943 : !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8183 3668 : return SDValue();
8184 :
8185 : SmallVector<SDNode *, 4> SetCCs;
8186 832 : if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8187 0 : return SDValue();
8188 :
8189 : ISD::LoadExtType ExtType =
8190 416 : N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8191 :
8192 : // Try to split the vector types to get down to legal types.
8193 416 : EVT SplitSrcVT = SrcVT;
8194 416 : EVT SplitDstVT = DstVT;
8195 1702 : while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8196 : SplitSrcVT.getVectorNumElements() > 1) {
8197 541 : SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8198 541 : SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8199 : }
8200 :
8201 : if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8202 204 : return SDValue();
8203 :
8204 : SDLoc DL(N);
8205 : const unsigned NumSplits =
8206 212 : DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8207 : const unsigned Stride = SplitSrcVT.getStoreSize();
8208 : SmallVector<SDValue, 4> Loads;
8209 : SmallVector<SDValue, 4> Chains;
8210 :
8211 212 : SDValue BasePtr = LN0->getBasePtr();
8212 645 : for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8213 433 : const unsigned Offset = Idx * Stride;
8214 433 : const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8215 :
8216 433 : SDValue SplitLoad = DAG.getExtLoad(
8217 433 : ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8218 : LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8219 1732 : LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8220 :
8221 433 : BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8222 433 : DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8223 :
8224 433 : Loads.push_back(SplitLoad.getValue(0));
8225 433 : Chains.push_back(SplitLoad.getValue(1));
8226 : }
8227 :
8228 424 : SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8229 424 : SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8230 :
8231 : // Simplify TF.
8232 212 : AddToWorklist(NewChain.getNode());
8233 :
8234 212 : CombineTo(N, NewValue);
8235 :
8236 : // Replace uses of the original load (before extension)
8237 : // with a truncate of the concatenated sextloaded vectors.
8238 : SDValue Trunc =
8239 218 : DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8240 424 : ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8241 : CombineTo(N0.getNode(), Trunc, NewChain);
8242 212 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
8243 : }
8244 :
8245 : // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8246 : // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8247 100653 : SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8248 : assert(N->getOpcode() == ISD::ZERO_EXTEND);
8249 100653 : EVT VT = N->getValueType(0);
8250 :
8251 : // and/or/xor
8252 100653 : SDValue N0 = N->getOperand(0);
8253 98358 : if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8254 15638 : N0.getOpcode() == ISD::XOR) ||
8255 113996 : N0.getOperand(1).getOpcode() != ISD::Constant ||
8256 13358 : (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8257 87295 : return SDValue();
8258 :
8259 : // shl/shr
8260 13358 : SDValue N1 = N0->getOperand(0);
8261 13358 : if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8262 14388 : N1.getOperand(1).getOpcode() != ISD::Constant ||
8263 332 : (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8264 12313 : return SDValue();
8265 :
8266 : // load
8267 1045 : if (!isa<LoadSDNode>(N1.getOperand(0)))
8268 991 : return SDValue();
8269 : LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8270 : EVT MemVT = Load->getMemoryVT();
8271 104 : if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8272 103 : Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8273 5 : return SDValue();
8274 :
8275 :
8276 : // If the shift op is SHL, the logic op must be AND, otherwise the result
8277 : // will be wrong.
8278 49 : if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8279 2 : return SDValue();
8280 :
8281 82 : if (!N0.hasOneUse() || !N1.hasOneUse())
8282 12 : return SDValue();
8283 :
8284 : SmallVector<SDNode*, 4> SetCCs;
8285 70 : if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8286 : ISD::ZERO_EXTEND, SetCCs, TLI))
8287 9 : return SDValue();
8288 :
8289 : // Actually do the transformation.
8290 26 : SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8291 : Load->getChain(), Load->getBasePtr(),
8292 34 : Load->getMemoryVT(), Load->getMemOperand());
8293 :
8294 : SDLoc DL1(N1);
8295 26 : SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8296 26 : N1.getOperand(1));
8297 :
8298 26 : APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8299 52 : Mask = Mask.zext(VT.getSizeInBits());
8300 : SDLoc DL0(N0);
8301 26 : SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8302 26 : DAG.getConstant(Mask, DL0, VT));
8303 :
8304 26 : ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8305 26 : CombineTo(N, And);
8306 26 : if (SDValue(Load, 0).hasOneUse()) {
8307 50 : DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8308 : } else {
8309 1 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8310 2 : Load->getValueType(0), ExtLoad);
8311 : CombineTo(Load, Trunc, ExtLoad.getValue(1));
8312 : }
8313 26 : return SDValue(N,0); // Return N so it doesn't get rechecked!
8314 : }
8315 :
8316 : /// If we're narrowing or widening the result of a vector select and the final
8317 : /// size is the same size as a setcc (compare) feeding the select, then try to
8318 : /// apply the cast operation to the select's operands because matching vector
8319 : /// sizes for a select condition and other operands should be more efficient.
8320 949740 : SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8321 949740 : unsigned CastOpcode = Cast->getOpcode();
8322 : assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8323 : CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8324 : CastOpcode == ISD::FP_ROUND) &&
8325 : "Unexpected opcode for vector select narrowing/widening");
8326 :
8327 : // We only do this transform before legal ops because the pattern may be
8328 : // obfuscated by target-specific operations after legalization. Do not create
8329 : // an illegal select op, however, because that may be difficult to lower.
8330 949740 : EVT VT = Cast->getValueType(0);
8331 949740 : if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8332 477945 : return SDValue();
8333 :
8334 471795 : SDValue VSel = Cast->getOperand(0);
8335 471829 : if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8336 34 : VSel.getOperand(0).getOpcode() != ISD::SETCC)
8337 471761 : return SDValue();
8338 :
8339 : // Does the setcc have the same vector size as the casted select?
8340 34 : SDValue SetCC = VSel.getOperand(0);
8341 68 : EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8342 34 : if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8343 18 : return SDValue();
8344 :
8345 : // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8346 16 : SDValue A = VSel.getOperand(1);
8347 16 : SDValue B = VSel.getOperand(2);
8348 16 : SDValue CastA, CastB;
8349 : SDLoc DL(Cast);
8350 16 : if (CastOpcode == ISD::FP_ROUND) {
8351 : // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8352 12 : CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8353 12 : CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8354 : } else {
8355 24 : CastA = DAG.getNode(CastOpcode, DL, VT, A);
8356 24 : CastB = DAG.getNode(CastOpcode, DL, VT, B);
8357 : }
8358 32 : return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8359 : }
8360 :
8361 : // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8362 : // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8363 131172 : static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8364 : const TargetLowering &TLI, EVT VT,
8365 : bool LegalOperations, SDNode *N,
8366 : SDValue N0, ISD::LoadExtType ExtLoadType) {
8367 : SDNode *N0Node = N0.getNode();
8368 131172 : bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8369 : : ISD::isZEXTLoad(N0Node);
8370 : if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8371 131379 : !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8372 131083 : return {};
8373 :
8374 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8375 89 : EVT MemVT = LN0->getMemoryVT();
8376 170 : if ((LegalOperations || LN0->isVolatile()) &&
8377 81 : !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8378 68 : return {};
8379 :
8380 : SDValue ExtLoad =
8381 21 : DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8382 23 : LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8383 21 : Combiner.CombineTo(N, ExtLoad);
8384 42 : DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8385 21 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
8386 : }
8387 :
8388 : // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8389 : // Only generate vector extloads when 1) they're legal, and 2) they are
8390 : // deemed desirable by the target.
8391 151772 : static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8392 : const TargetLowering &TLI, EVT VT,
8393 : bool LegalOperations, SDNode *N, SDValue N0,
8394 : ISD::LoadExtType ExtLoadType,
8395 : ISD::NodeType ExtOpc) {
8396 : if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8397 24159 : !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8398 22628 : ((LegalOperations || VT.isVector() ||
8399 24121 : cast<LoadSDNode>(N0)->isVolatile()) &&
8400 5628 : !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8401 129514 : return {};
8402 :
8403 : bool DoXform = true;
8404 : SmallVector<SDNode *, 4> SetCCs;
8405 22258 : if (!N0.hasOneUse())
8406 2346 : DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8407 22258 : if (VT.isVector())
8408 4832 : DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8409 22258 : if (!DoXform)
8410 1926 : return {};
8411 :
8412 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8413 20332 : SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8414 : LN0->getBasePtr(), N0.getValueType(),
8415 22201 : LN0->getMemOperand());
8416 20332 : Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8417 : // If the load value is used only by N, replace it via CombineTo N.
8418 : bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8419 20332 : Combiner.CombineTo(N, ExtLoad);
8420 20332 : if (NoReplaceTrunc) {
8421 19570 : DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8422 : } else {
8423 : SDValue Trunc =
8424 1524 : DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8425 : Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8426 : }
8427 20332 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
8428 : }
8429 :
8430 131150 : static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8431 : bool LegalOperations) {
8432 : assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8433 : N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8434 :
8435 131150 : SDValue SetCC = N->getOperand(0);
8436 118172 : if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8437 131150 : !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8438 125939 : return SDValue();
8439 :
8440 5211 : SDValue X = SetCC.getOperand(0);
8441 5211 : SDValue Ones = SetCC.getOperand(1);
8442 5211 : ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8443 10422 : EVT VT = N->getValueType(0);
8444 : EVT XVT = X.getValueType();
8445 : // setge X, C is canonicalized to setgt, so we do not need to match that
8446 : // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8447 : // not require the 'not' op.
8448 5211 : if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8449 : // Invert and smear/shift the sign bit:
8450 : // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8451 : // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8452 : SDLoc DL(N);
8453 32 : SDValue NotX = DAG.getNOT(DL, X, VT);
8454 32 : SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8455 32 : auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8456 32 : return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8457 : }
8458 5179 : return SDValue();
8459 : }
8460 :
8461 51003 : SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8462 51003 : SDValue N0 = N->getOperand(0);
8463 102006 : EVT VT = N->getValueType(0);
8464 : SDLoc DL(N);
8465 :
8466 51003 : if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8467 51003 : LegalOperations))
8468 73 : return SDValue(Res, 0);
8469 :
8470 : // fold (sext (sext x)) -> (sext x)
8471 : // fold (sext (aext x)) -> (sext x)
8472 101860 : if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8473 12 : return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8474 :
8475 50924 : if (N0.getOpcode() == ISD::TRUNCATE) {
8476 : // fold (sext (truncate (load x))) -> (sext (smaller load x))
8477 : // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8478 6778 : if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8479 41 : SDNode *oye = N0.getOperand(0).getNode();
8480 41 : if (NarrowLoad.getNode() != N0.getNode()) {
8481 41 : CombineTo(N0.getNode(), NarrowLoad);
8482 : // CombineTo deleted the truncate, if needed, but not what's under it.
8483 41 : AddToWorklist(oye);
8484 : }
8485 41 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
8486 : }
8487 :
8488 : // See if the value being truncated is already sign extended. If so, just
8489 : // eliminate the trunc/sext pair.
8490 6737 : SDValue Op = N0.getOperand(0);
8491 6737 : unsigned OpBits = Op.getScalarValueSizeInBits();
8492 6737 : unsigned MidBits = N0.getScalarValueSizeInBits();
8493 : unsigned DestBits = VT.getScalarSizeInBits();
8494 6737 : unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8495 :
8496 6737 : if (OpBits == DestBits) {
8497 : // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
8498 : // bits, it is already ready.
8499 3880 : if (NumSignBits > DestBits-MidBits)
8500 900 : return Op;
8501 2857 : } else if (OpBits < DestBits) {
8502 : // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
8503 : // bits, just sext from i32.
8504 2542 : if (NumSignBits > OpBits-MidBits)
8505 50 : return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8506 : } else {
8507 : // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
8508 : // bits, just truncate to i32.
8509 315 : if (NumSignBits > OpBits-MidBits)
8510 44 : return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8511 : }
8512 :
8513 : // fold (sext (truncate x)) -> (sextinreg x).
8514 5790 : if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8515 : N0.getValueType())) {
8516 5790 : if (OpBits < DestBits)
8517 2539 : Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8518 3273 : else if (OpBits > DestBits)
8519 297 : Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8520 5790 : return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8521 11580 : DAG.getValueType(N0.getValueType()));
8522 : }
8523 : }
8524 :
8525 : // Try to simplify (sext (load x)).
8526 44146 : if (SDValue foldedExt =
8527 44146 : tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8528 44146 : ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8529 13459 : return foldedExt;
8530 :
8531 : // fold (sext (load x)) to multiple smaller sextloads.
8532 : // Only on illegal but splittable vectors.
8533 30687 : if (SDValue ExtLoad = CombineExtLoad(N))
8534 142 : return ExtLoad;
8535 :
8536 : // Try to simplify (sext (sextload x)).
8537 30545 : if (SDValue foldedExt = tryToFoldExtOfExtload(
8538 30545 : DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8539 4 : return foldedExt;
8540 :
8541 : // fold (sext (and/or/xor (load x), cst)) ->
8542 : // (and/or/xor (sextload x), (sext cst))
8543 30541 : if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8544 1247 : N0.getOpcode() == ISD::XOR) &&
8545 31 : isa<LoadSDNode>(N0.getOperand(0)) &&
8546 30572 : N0.getOperand(1).getOpcode() == ISD::Constant &&
8547 1 : (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8548 : LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8549 : EVT MemVT = LN00->getMemoryVT();
8550 1 : if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8551 2 : LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8552 : SmallVector<SDNode*, 4> SetCCs;
8553 1 : bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8554 : ISD::SIGN_EXTEND, SetCCs, TLI);
8555 1 : if (DoXform) {
8556 2 : SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8557 : LN00->getChain(), LN00->getBasePtr(),
8558 : LN00->getMemoryVT(),
8559 2 : LN00->getMemOperand());
8560 2 : APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8561 1 : Mask = Mask.sext(VT.getSizeInBits());
8562 1 : SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8563 1 : ExtLoad, DAG.getConstant(Mask, DL, VT));
8564 2 : ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8565 1 : bool NoReplaceTruncAnd = !N0.hasOneUse();
8566 : bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8567 1 : CombineTo(N, And);
8568 : // If N0 has multiple uses, change other uses as well.
8569 1 : if (NoReplaceTruncAnd) {
8570 : SDValue TruncAnd =
8571 0 : DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8572 0 : CombineTo(N0.getNode(), TruncAnd);
8573 : }
8574 1 : if (NoReplaceTrunc) {
8575 2 : DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8576 : } else {
8577 0 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8578 0 : LN00->getValueType(0), ExtLoad);
8579 : CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8580 : }
8581 1 : return SDValue(N,0); // Return N so it doesn't get rechecked!
8582 : }
8583 : }
8584 : }
8585 :
8586 30540 : if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8587 10 : return V;
8588 :
8589 61060 : if (N0.getOpcode() == ISD::SETCC) {
8590 5425 : SDValue N00 = N0.getOperand(0);
8591 5425 : SDValue N01 = N0.getOperand(1);
8592 5425 : ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8593 10850 : EVT N00VT = N0.getOperand(0).getValueType();
8594 :
8595 : // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8596 : // Only do this before legalize for now.
8597 5425 : if (VT.isVector() && !LegalOperations &&
8598 2921 : TLI.getBooleanContents(N00VT) ==
8599 : TargetLowering::ZeroOrNegativeOneBooleanContent) {
8600 : // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8601 : // of the same size as the compared operands. Only optimize sext(setcc())
8602 : // if this is the case.
8603 2892 : EVT SVT = getSetCCResultType(N00VT);
8604 :
8605 : // We know that the # elements of the results is the same as the
8606 : // # elements of the compare (and the # elements of the compare result
8607 : // for that matter). Check to see that they are the same size. If so,
8608 : // we know that the element size of the sext'd result matches the
8609 : // element size of the compare operands.
8610 2892 : if (VT.getSizeInBits() == SVT.getSizeInBits())
8611 2653 : return DAG.getSetCC(DL, VT, N00, N01, CC);
8612 :
8613 : // If the desired elements are smaller or larger than the source
8614 : // elements, we can use a matching integer vector type and then
8615 : // truncate/sign extend.
8616 520 : EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8617 523 : if (SVT == MatchingVecType) {
8618 281 : SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8619 281 : return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8620 : }
8621 : }
8622 :
8623 : // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8624 : // Here, T can be 1 or -1, depending on the type of the setcc and
8625 : // getBooleanContents().
8626 2772 : unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8627 :
8628 : // To determine the "true" side of the select, we need to know the high bit
8629 : // of the value returned by the setcc if it evaluates to true.
8630 : // If the type of the setcc is i1, then the true case of the select is just
8631 : // sext(i1 1), that is, -1.
8632 : // If the type of the setcc is larger (say, i8) then the value of the high
8633 : // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8634 : // of the appropriate width.
8635 : SDValue ExtTrueVal = (SetCCWidth == 1)
8636 2771 : ? DAG.getAllOnesConstant(DL, VT)
8637 2772 : : DAG.getBoolConstant(true, DL, VT, N00VT);
8638 2772 : SDValue Zero = DAG.getConstant(0, DL, VT);
8639 2772 : if (SDValue SCC =
8640 2772 : SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8641 22 : return SCC;
8642 :
8643 2750 : if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8644 871 : EVT SetCCVT = getSetCCResultType(N00VT);
8645 : // Don't do this transform for i1 because there's a select transform
8646 : // that would reverse it.
8647 : // TODO: We should not do this transform at all without a target hook
8648 : // because a sext is likely cheaper than a select?
8649 871 : if (SetCCVT.getScalarSizeInBits() != 1 &&
8650 107 : (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8651 107 : SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8652 107 : return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8653 : }
8654 : }
8655 : }
8656 :
8657 : // fold (sext x) -> (zext x) if the sign bit is known zero.
8658 54816 : if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8659 27068 : DAG.SignBitIsZero(N0))
8660 884 : return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8661 :
8662 27306 : if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8663 4 : return NewVSel;
8664 :
8665 27302 : return SDValue();
8666 : }
8667 :
8668 : // isTruncateOf - If N is a truncate of some other value, return true, record
8669 : // the value being truncated in Op and which of Op's bits are zero/one in Known.
8670 : // This function computes KnownBits to avoid a duplicated call to
8671 : // computeKnownBits in the caller.
8672 0 : static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8673 : KnownBits &Known) {
8674 0 : if (N->getOpcode() == ISD::TRUNCATE) {
8675 0 : Op = N->getOperand(0);
8676 0 : DAG.computeKnownBits(Op, Known);
8677 0 : return true;
8678 : }
8679 :
8680 0 : if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
8681 0 : cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
8682 0 : return false;
8683 :
8684 0 : SDValue Op0 = N->getOperand(0);
8685 0 : SDValue Op1 = N->getOperand(1);
8686 : assert(Op0.getValueType() == Op1.getValueType());
8687 :
8688 0 : if (isNullConstant(Op0))
8689 0 : Op = Op1;
8690 0 : else if (isNullConstant(Op1))
8691 0 : Op = Op0;
8692 : else
8693 0 : return false;
8694 :
8695 0 : DAG.computeKnownBits(Op, Known);
8696 :
8697 0 : if (!(Known.Zero | 1).isAllOnesValue())
8698 0 : return false;
8699 :
8700 : return true;
8701 : }
8702 :
8703 142725 : SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8704 142725 : SDValue N0 = N->getOperand(0);
8705 142725 : EVT VT = N->getValueType(0);
8706 :
8707 142725 : if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8708 142725 : LegalOperations))
8709 452 : return SDValue(Res, 0);
8710 :
8711 : // fold (zext (zext x)) -> (zext x)
8712 : // fold (zext (aext x)) -> (zext x)
8713 284546 : if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8714 279 : return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8715 558 : N0.getOperand(0));
8716 :
8717 : // fold (zext (truncate x)) -> (zext x) or
8718 : // (zext (truncate x)) -> (truncate x)
8719 : // This is valid when the truncated bits of x are already zero.
8720 : // FIXME: We should extend this to work for vectors too.
8721 141994 : SDValue Op;
8722 141994 : KnownBits Known;
8723 141994 : if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
8724 : APInt TruncatedBits =
8725 33830 : (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
8726 : APInt(Op.getValueSizeInBits(), 0) :
8727 : APInt::getBitsSet(Op.getValueSizeInBits(),
8728 : N0.getValueSizeInBits(),
8729 33828 : std::min(Op.getValueSizeInBits(),
8730 67660 : VT.getSizeInBits()));
8731 33830 : if (TruncatedBits.isSubsetOf(Known.Zero))
8732 31937 : return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8733 : }
8734 :
8735 : // fold (zext (truncate x)) -> (and x, mask)
8736 253298 : if (N0.getOpcode() == ISD::TRUNCATE) {
8737 : // fold (zext (truncate (load x))) -> (zext (smaller load x))
8738 : // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8739 18670 : if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8740 13 : SDNode *oye = N0.getOperand(0).getNode();
8741 13 : if (NarrowLoad.getNode() != N0.getNode()) {
8742 13 : CombineTo(N0.getNode(), NarrowLoad);
8743 : // CombineTo deleted the truncate, if needed, but not what's under it.
8744 13 : AddToWorklist(oye);
8745 : }
8746 13 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
8747 : }
8748 :
8749 37314 : EVT SrcVT = N0.getOperand(0).getValueType();
8750 18657 : EVT MinVT = N0.getValueType();
8751 :
8752 : // Try to mask before the extension to avoid having to generate a larger mask,
8753 : // possibly over several sub-vectors.
8754 30722 : if (SrcVT.bitsLT(VT) && VT.isVector()) {
8755 51 : if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8756 : TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8757 51 : SDValue Op = N0.getOperand(0);
8758 102 : Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8759 51 : AddToWorklist(Op.getNode());
8760 51 : SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8761 : // Transfer the debug info; the new node is equivalent to N0.
8762 51 : DAG.transferDbgValues(N0, ZExtOrTrunc);
8763 51 : return ZExtOrTrunc;
8764 : }
8765 : }
8766 :
8767 18606 : if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8768 18720 : SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8769 18606 : AddToWorklist(Op.getNode());
8770 37212 : SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8771 : // We may safely transfer the debug info describing the truncate node over
8772 : // to the equivalent and operation.
8773 18606 : DAG.transferDbgValues(N0, And);
8774 18606 : return And;
8775 : }
8776 : }
8777 :
8778 : // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8779 : // if either of the casts is not free.
8780 107979 : if (N0.getOpcode() == ISD::AND &&
8781 2667 : N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8782 108834 : N0.getOperand(1).getOpcode() == ISD::Constant &&
8783 712 : (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8784 411 : N0.getValueType()) ||
8785 110 : !TLI.isZExtFree(N0.getValueType(), VT))) {
8786 706 : SDValue X = N0.getOperand(0).getOperand(0);
8787 361 : X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8788 706 : APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8789 706 : Mask = Mask.zext(VT.getSizeInBits());
8790 : SDLoc DL(N);
8791 353 : return DAG.getNode(ISD::AND, DL, VT,
8792 353 : X, DAG.getConstant(Mask, DL, VT));
8793 : }
8794 :
8795 : // Try to simplify (zext (load x)).
8796 107626 : if (SDValue foldedExt =
8797 107626 : tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8798 107626 : ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
8799 6873 : return foldedExt;
8800 :
8801 : // fold (zext (load x)) to multiple smaller zextloads.
8802 : // Only on illegal but splittable vectors.
8803 100753 : if (SDValue ExtLoad = CombineExtLoad(N))
8804 70 : return ExtLoad;
8805 :
8806 : // fold (zext (and/or/xor (load x), cst)) ->
8807 : // (and/or/xor (zextload x), (zext cst))
8808 : // Unless (and (load x) cst) will match as a zextload already and has
8809 : // additional users.
8810 100683 : if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8811 15668 : N0.getOpcode() == ISD::XOR) &&
8812 200 : isa<LoadSDNode>(N0.getOperand(0)) &&
8813 100883 : N0.getOperand(1).getOpcode() == ISD::Constant &&
8814 83 : (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8815 : LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8816 : EVT MemVT = LN00->getMemoryVT();
8817 30 : if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8818 70 : LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8819 : bool DoXform = true;
8820 : SmallVector<SDNode*, 4> SetCCs;
8821 30 : if (!N0.hasOneUse()) {
8822 18 : if (N0.getOpcode() == ISD::AND) {
8823 : auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8824 0 : EVT LoadResultTy = AndC->getValueType(0);
8825 0 : EVT ExtVT;
8826 0 : if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8827 : DoXform = false;
8828 : }
8829 : }
8830 : if (DoXform)
8831 60 : DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8832 : ISD::ZERO_EXTEND, SetCCs, TLI);
8833 30 : if (DoXform) {
8834 60 : SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8835 : LN00->getChain(), LN00->getBasePtr(),
8836 : LN00->getMemoryVT(),
8837 30 : LN00->getMemOperand());
8838 60 : APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8839 60 : Mask = Mask.zext(VT.getSizeInBits());
8840 : SDLoc DL(N);
8841 30 : SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8842 30 : ExtLoad, DAG.getConstant(Mask, DL, VT));
8843 60 : ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8844 30 : bool NoReplaceTruncAnd = !N0.hasOneUse();
8845 : bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8846 30 : CombineTo(N, And);
8847 : // If N0 has multiple uses, change other uses as well.
8848 30 : if (NoReplaceTruncAnd) {
8849 : SDValue TruncAnd =
8850 27 : DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8851 9 : CombineTo(N0.getNode(), TruncAnd);
8852 : }
8853 30 : if (NoReplaceTrunc) {
8854 58 : DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8855 : } else {
8856 1 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8857 2 : LN00->getValueType(0), ExtLoad);
8858 : CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8859 : }
8860 30 : return SDValue(N,0); // Return N so it doesn't get rechecked!
8861 : }
8862 : }
8863 : }
8864 :
8865 : // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8866 : // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8867 100653 : if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
8868 26 : return ZExtLoad;
8869 :
8870 : // Try to simplify (zext (zextload x)).
8871 100627 : if (SDValue foldedExt = tryToFoldExtOfExtload(
8872 100627 : DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
8873 17 : return foldedExt;
8874 :
8875 100610 : if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8876 22 : return V;
8877 :
8878 201176 : if (N0.getOpcode() == ISD::SETCC) {
8879 : // Only do this before legalize for now.
8880 54757 : if (!LegalOperations && VT.isVector() &&
8881 28342 : N0.getValueType().getVectorElementType() == MVT::i1) {
8882 678 : EVT N00VT = N0.getOperand(0).getValueType();
8883 1017 : if (getSetCCResultType(N00VT) == N0.getValueType())
8884 142 : return SDValue();
8885 :
8886 : // We know that the # elements of the results is the same as the #
8887 : // elements of the compare (and the # elements of the compare result for
8888 : // that matter). Check to see that they are the same size. If so, we know
8889 : // that the element size of the sext'd result matches the element size of
8890 : // the compare operands.
8891 : SDLoc DL(N);
8892 197 : SDValue VecOnes = DAG.getConstant(1, DL, VT);
8893 197 : if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8894 : // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8895 178 : SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8896 356 : N0.getOperand(1), N0.getOperand(2));
8897 356 : return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8898 : }
8899 :
8900 : // If the desired elements are smaller or larger than the source
8901 : // elements we can use a matching integer vector type and then
8902 : // truncate/sign extend.
8903 19 : EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8904 : SDValue VsetCC =
8905 19 : DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8906 38 : N0.getOperand(1), N0.getOperand(2));
8907 19 : return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8908 19 : VecOnes);
8909 : }
8910 :
8911 : // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8912 : SDLoc DL(N);
8913 27662 : if (SDValue SCC = SimplifySelectCC(
8914 27662 : DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8915 27662 : DAG.getConstant(0, DL, VT),
8916 55324 : cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8917 137 : return SCC;
8918 : }
8919 :
8920 : // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8921 100112 : if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8922 7229 : isa<ConstantSDNode>(N0.getOperand(1)) &&
8923 107352 : N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8924 11 : N0.hasOneUse()) {
8925 10 : SDValue ShAmt = N0.getOperand(1);
8926 20 : unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8927 10 : if (N0.getOpcode() == ISD::SHL) {
8928 9 : SDValue InnerZExt = N0.getOperand(0);
8929 : // If the original shl may be shifting out bits, do not perform this
8930 : // transformation.
8931 9 : unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8932 18 : InnerZExt.getOperand(0).getValueSizeInBits();
8933 9 : if (ShAmtVal > KnownZeroBits)
8934 0 : return SDValue();
8935 : }
8936 :
8937 : SDLoc DL(N);
8938 :
8939 : // Ensure that the shift amount is wide enough for the shifted value.
8940 10 : if (VT.getSizeInBits() >= 256)
8941 0 : ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8942 :
8943 10 : return DAG.getNode(N0.getOpcode(), DL, VT,
8944 10 : DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8945 10 : ShAmt);
8946 : }
8947 :
8948 100102 : if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8949 2 : return NewVSel;
8950 :
8951 100100 : return SDValue();
8952 : }
8953 :
8954 88162 : SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8955 88162 : SDValue N0 = N->getOperand(0);
8956 88162 : EVT VT = N->getValueType(0);
8957 :
8958 88162 : if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8959 88162 : LegalOperations))
8960 78 : return SDValue(Res, 0);
8961 :
8962 : // fold (aext (aext x)) -> (aext x)
8963 : // fold (aext (zext x)) -> (zext x)
8964 : // fold (aext (sext x)) -> (sext x)
8965 88084 : if (N0.getOpcode() == ISD::ANY_EXTEND ||
8966 176151 : N0.getOpcode() == ISD::ZERO_EXTEND ||
8967 : N0.getOpcode() == ISD::SIGN_EXTEND)
8968 44 : return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8969 :
8970 : // fold (aext (truncate (load x))) -> (aext (smaller load x))
8971 : // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8972 88062 : if (N0.getOpcode() == ISD::TRUNCATE) {
8973 7527 : if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8974 13 : SDNode *oye = N0.getOperand(0).getNode();
8975 13 : if (NarrowLoad.getNode() != N0.getNode()) {
8976 13 : CombineTo(N0.getNode(), NarrowLoad);
8977 : // CombineTo deleted the truncate, if needed, but not what's under it.
8978 13 : AddToWorklist(oye);
8979 : }
8980 13 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
8981 : }
8982 : }
8983 :
8984 : // fold (aext (truncate x))
8985 88049 : if (N0.getOpcode() == ISD::TRUNCATE)
8986 15117 : return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8987 :
8988 : // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8989 : // if the trunc is not free.
8990 4001 : if (N0.getOpcode() == ISD::AND &&
8991 4001 : N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8992 86633 : N0.getOperand(1).getOpcode() == ISD::Constant &&
8993 5988 : !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8994 2994 : N0.getValueType())) {
8995 : SDLoc DL(N);
8996 8 : SDValue X = N0.getOperand(0).getOperand(0);
8997 8 : X = DAG.getAnyExtOrTrunc(X, DL, VT);
8998 8 : APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8999 8 : Mask = Mask.zext(VT.getSizeInBits());
9000 8 : return DAG.getNode(ISD::AND, DL, VT,
9001 8 : X, DAG.getConstant(Mask, DL, VT));
9002 : }
9003 :
9004 : // fold (aext (load x)) -> (aext (truncate (extload x)))
9005 : // None of the supported targets knows how to perform load and any_ext
9006 : // on vectors in one instruction. We only perform this transformation on
9007 : // scalars.
9008 15059 : if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9009 14875 : ISD::isUNINDEXEDLoad(N0.getNode()) &&
9010 15011 : TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9011 : bool DoXform = true;
9012 : SmallVector<SDNode*, 4> SetCCs;
9013 14670 : if (!N0.hasOneUse())
9014 10994 : DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9015 : TLI);
9016 10994 : if (DoXform) {
9017 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9018 28980 : SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9019 : LN0->getChain(),
9020 : LN0->getBasePtr(), N0.getValueType(),
9021 14568 : LN0->getMemOperand());
9022 14490 : ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9023 : // If the load value is used only by N, replace it via CombineTo N.
9024 : bool NoReplaceTrunc = N0.hasOneUse();
9025 14490 : CombineTo(N, ExtLoad);
9026 14490 : if (NoReplaceTrunc) {
9027 7352 : DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9028 : } else {
9029 10814 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9030 21628 : N0.getValueType(), ExtLoad);
9031 : CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9032 : }
9033 14490 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
9034 : }
9035 : }
9036 :
9037 : // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9038 : // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9039 : // fold (aext ( extload x)) -> (aext (truncate (extload x)))
9040 : if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9041 66472 : ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9042 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9043 : ISD::LoadExtType ExtType = LN0->getExtensionType();
9044 263 : EVT MemVT = LN0->getMemoryVT();
9045 308 : if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9046 239 : SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9047 : VT, LN0->getChain(), LN0->getBasePtr(),
9048 239 : MemVT, LN0->getMemOperand());
9049 239 : CombineTo(N, ExtLoad);
9050 478 : DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9051 239 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
9052 : }
9053 : }
9054 :
9055 65798 : if (N0.getOpcode() == ISD::SETCC) {
9056 : // For vectors:
9057 : // aext(setcc) -> vsetcc
9058 : // aext(setcc) -> truncate(vsetcc)
9059 : // aext(setcc) -> aext(vsetcc)
9060 : // Only do this before legalize for now.
9061 3230 : if (VT.isVector() && !LegalOperations) {
9062 948 : EVT N00VT = N0.getOperand(0).getValueType();
9063 948 : if (getSetCCResultType(N00VT) == N0.getValueType())
9064 89 : return SDValue();
9065 :
9066 : // We know that the # elements of the results is the same as the
9067 : // # elements of the compare (and the # elements of the compare result
9068 : // for that matter). Check to see that they are the same size. If so,
9069 : // we know that the element size of the sext'd result matches the
9070 : // element size of the compare operands.
9071 385 : if (VT.getSizeInBits() == N00VT.getSizeInBits())
9072 558 : return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9073 : N0.getOperand(1),
9074 279 : cast<CondCodeSDNode>(N0.getOperand(2))->get());
9075 : // If the desired elements are smaller or larger than the source
9076 : // elements we can use a matching integer vector type and then
9077 : // truncate/any extend
9078 : else {
9079 106 : EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9080 : SDValue VsetCC =
9081 106 : DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9082 : N0.getOperand(1),
9083 106 : cast<CondCodeSDNode>(N0.getOperand(2))->get());
9084 212 : return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9085 : }
9086 : }
9087 :
9088 : // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9089 : SDLoc DL(N);
9090 2756 : if (SDValue SCC = SimplifySelectCC(
9091 2756 : DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9092 2756 : DAG.getConstant(0, DL, VT),
9093 2756 : cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9094 138 : return SCC;
9095 : }
9096 :
9097 65186 : return SDValue();
9098 : }
9099 :
9100 0 : SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9101 0 : unsigned Opcode = N->getOpcode();
9102 0 : SDValue N0 = N->getOperand(0);
9103 0 : SDValue N1 = N->getOperand(1);
9104 0 : EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9105 :
9106 : // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9107 0 : if (N0.getOpcode() == Opcode &&
9108 : AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9109 0 : return N0;
9110 :
9111 0 : if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9112 0 : N0.getOperand(0).getOpcode() == Opcode) {
9113 : // We have an assert, truncate, assert sandwich. Make one stronger assert
9114 : // by asserting on the smallest asserted type to the larger source type.
9115 : // This eliminates the later assert:
9116 : // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9117 : // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9118 0 : SDValue BigA = N0.getOperand(0);
9119 0 : EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9120 : assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9121 : "Asserting zero/sign-extended bits to a type larger than the "
9122 : "truncated destination does not provide information");
9123 :
9124 : SDLoc DL(N);
9125 0 : EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9126 0 : SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9127 0 : SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9128 0 : BigA.getOperand(0), MinAssertVTVal);
9129 0 : return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9130 : }
9131 :
9132 0 : return SDValue();
9133 : }
9134 :
9135 : /// If the result of a wider load is shifted to right of N bits and then
9136 : /// truncated to a narrower type and where N is a multiple of number of bits of
9137 : /// the narrower type, transform it to a narrower load from address + N / num of
9138 : /// bits of new type. Also narrow the load if the result is masked with an AND
9139 : /// to effectively produce a smaller type. If the result is to be extended, also
9140 : /// fold the extension to form a extending load.
9141 1115566 : SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9142 1115566 : unsigned Opc = N->getOpcode();
9143 :
9144 : ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9145 1115566 : SDValue N0 = N->getOperand(0);
9146 1115566 : EVT VT = N->getValueType(0);
9147 1115566 : EVT ExtVT = VT;
9148 :
9149 : // This transformation isn't valid for vector loads.
9150 1115566 : if (VT.isVector())
9151 16240 : return SDValue();
9152 :
9153 : unsigned ShAmt = 0;
9154 : bool HasShiftedOffset = false;
9155 : // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9156 : // extended to VT.
9157 1099326 : if (Opc == ISD::SIGN_EXTEND_INREG) {
9158 : ExtType = ISD::SEXTLOAD;
9159 41766 : ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9160 1057560 : } else if (Opc == ISD::SRL) {
9161 : // Another special-case: SRL is basically zero-extending a narrower value,
9162 : // or it maybe shifting a higher subword, half or byte into the lowest
9163 : // bits.
9164 : ExtType = ISD::ZEXTLOAD;
9165 138786 : N0 = SDValue(N, 0);
9166 :
9167 : auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9168 : auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9169 138786 : if (!N01 || !LN0)
9170 116385 : return SDValue();
9171 :
9172 22401 : uint64_t ShiftAmt = N01->getZExtValue();
9173 22401 : uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9174 22401 : if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9175 22232 : ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9176 : else
9177 169 : ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9178 169 : VT.getSizeInBits() - ShiftAmt);
9179 918774 : } else if (Opc == ISD::AND) {
9180 : // An AND with a constant mask is the same as a truncate + zero-extend.
9181 : auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9182 : if (!AndC)
9183 0 : return SDValue();
9184 :
9185 80218 : const APInt &Mask = AndC->getAPIntValue();
9186 : unsigned ActiveBits = 0;
9187 80218 : if (Mask.isMask()) {
9188 : ActiveBits = Mask.countTrailingOnes();
9189 8824 : } else if (Mask.isShiftedMask()) {
9190 7785 : ShAmt = Mask.countTrailingZeros();
9191 7785 : APInt ShiftedMask = Mask.lshr(ShAmt);
9192 : ActiveBits = ShiftedMask.countTrailingOnes();
9193 : HasShiftedOffset = true;
9194 : } else
9195 1039 : return SDValue();
9196 :
9197 : ExtType = ISD::ZEXTLOAD;
9198 79179 : ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9199 : }
9200 :
9201 1963804 : if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9202 63686 : SDValue SRL = N0;
9203 : if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9204 62198 : ShAmt = ConstShift->getZExtValue();
9205 62198 : unsigned EVTBits = ExtVT.getSizeInBits();
9206 : // Is the shift amount a multiple of size of VT?
9207 62198 : if ((ShAmt & (EVTBits-1)) == 0) {
9208 53583 : N0 = N0.getOperand(0);
9209 : // Is the load width a multiple of size of VT?
9210 53583 : if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9211 21 : return SDValue();
9212 : }
9213 :
9214 : // At this point, we must have a load or else we can't do the transform.
9215 62177 : if (!isa<LoadSDNode>(N0)) return SDValue();
9216 :
9217 : auto *LN0 = cast<LoadSDNode>(N0);
9218 :
9219 : // Because a SRL must be assumed to *need* to zero-extend the high bits
9220 : // (as opposed to anyext the high bits), we can't combine the zextload
9221 : // lowering of SRL and an sextload.
9222 22980 : if (LN0->getExtensionType() == ISD::SEXTLOAD)
9223 188 : return SDValue();
9224 :
9225 : // If the shift amount is larger than the input type then we're not
9226 : // accessing any of the loaded bytes. If the load was a zextload/extload
9227 : // then the result of the shift+trunc is zero/undef (handled elsewhere).
9228 22792 : if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9229 4 : return SDValue();
9230 :
9231 : // If the SRL is only used by a masking AND, we may be able to adjust
9232 : // the ExtVT to make the AND redundant.
9233 22788 : SDNode *Mask = *(SRL->use_begin());
9234 22788 : if (Mask->getOpcode() == ISD::AND &&
9235 7819 : isa<ConstantSDNode>(Mask->getOperand(1))) {
9236 : const APInt &ShiftMask =
9237 7812 : cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9238 7812 : if (ShiftMask.isMask()) {
9239 6141 : EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9240 6141 : ShiftMask.countTrailingOnes());
9241 : // If the mask is smaller, recompute the type.
9242 9738 : if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9243 10254 : TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9244 773 : ExtVT = MaskedVT;
9245 : }
9246 : }
9247 : }
9248 : }
9249 :
9250 : // If the load is shifted left (and the result isn't shifted back right),
9251 : // we can fold the truncate through the shift.
9252 : unsigned ShLeftAmt = 0;
9253 1823842 : if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9254 947018 : ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9255 393 : if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9256 9 : ShLeftAmt = N01->getZExtValue();
9257 9 : N0 = N0.getOperand(0);
9258 : }
9259 : }
9260 :
9261 : // If we haven't found a load, we can't narrow it.
9262 942492 : if (!isa<LoadSDNode>(N0))
9263 792923 : return SDValue();
9264 :
9265 149569 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9266 149569 : if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9267 146449 : return SDValue();
9268 :
9269 : auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9270 : unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9271 : unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9272 : return LVTStoreBits - EVTStoreBits - ShAmt;
9273 : };
9274 :
9275 : // For big endian targets, we need to adjust the offset to the pointer to
9276 : // load the correct bytes.
9277 3120 : if (DAG.getDataLayout().isBigEndian())
9278 167 : ShAmt = AdjustBigEndianShift(ShAmt);
9279 :
9280 6240 : EVT PtrType = N0.getOperand(1).getValueType();
9281 3120 : uint64_t PtrOff = ShAmt / 8;
9282 3120 : unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9283 3120 : SDLoc DL(LN0);
9284 : // The original load itself didn't wrap, so an offset within it doesn't.
9285 : SDNodeFlags Flags;
9286 : Flags.setNoUnsignedWrap(true);
9287 3120 : SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9288 3120 : PtrType, LN0->getBasePtr(),
9289 : DAG.getConstant(PtrOff, DL, PtrType),
9290 3120 : Flags);
9291 3120 : AddToWorklist(NewPtr.getNode());
9292 :
9293 : SDValue Load;
9294 3120 : if (ExtType == ISD::NON_EXTLOAD)
9295 5022 : Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9296 : LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9297 12555 : LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9298 : else
9299 1218 : Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9300 : LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9301 609 : NewAlign, LN0->getMemOperand()->getFlags(),
9302 3045 : LN0->getAAInfo());
9303 :
9304 : // Replace the old load's chain with the new load's chain.
9305 : WorklistRemover DeadNodes(*this);
9306 3120 : DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9307 :
9308 : // Shift the result left, if we've swallowed a left shift.
9309 3120 : SDValue Result = Load;
9310 3120 : if (ShLeftAmt != 0) {
9311 9 : EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9312 9 : if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9313 0 : ShImmTy = VT;
9314 : // If the shift amount is as large as the result size (but, presumably,
9315 : // no larger than the source) then the useful bits of the result are
9316 : // zero; we can't simply return the shortened shift, because the result
9317 : // of that operation is undefined.
9318 : SDLoc DL(N0);
9319 9 : if (ShLeftAmt >= VT.getSizeInBits())
9320 7 : Result = DAG.getConstant(0, DL, VT);
9321 : else
9322 2 : Result = DAG.getNode(ISD::SHL, DL, VT,
9323 2 : Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9324 : }
9325 :
9326 3120 : if (HasShiftedOffset) {
9327 : // Recalculate the shift amount after it has been altered to calculate
9328 : // the offset.
9329 80 : if (DAG.getDataLayout().isBigEndian())
9330 16 : ShAmt = AdjustBigEndianShift(ShAmt);
9331 :
9332 : // We're using a shifted mask, so the load now has an offset. This means we
9333 : // now need to shift right the mask to match the new load and then shift
9334 : // right the result of the AND.
9335 160 : const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
9336 80 : APInt ShiftedMask = Mask.lshr(ShAmt);
9337 80 : DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT));
9338 80 : SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9339 80 : SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0),
9340 80 : ShiftC);
9341 160 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted);
9342 160 : DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC);
9343 : }
9344 : // Return the new loaded value.
9345 3120 : return Result;
9346 : }
9347 :
9348 46974 : SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9349 46974 : SDValue N0 = N->getOperand(0);
9350 46974 : SDValue N1 = N->getOperand(1);
9351 46974 : EVT VT = N->getValueType(0);
9352 46974 : EVT EVT = cast<VTSDNode>(N1)->getVT();
9353 : unsigned VTBits = VT.getScalarSizeInBits();
9354 : unsigned EVTBits = EVT.getScalarSizeInBits();
9355 :
9356 46974 : if (N0.isUndef())
9357 1 : return DAG.getUNDEF(VT);
9358 :
9359 : // fold (sext_in_reg c1) -> c1
9360 46973 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9361 24 : return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9362 :
9363 : // If the input is already sign extended, just drop the extension.
9364 46961 : if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9365 1473 : return N0;
9366 :
9367 : // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9368 45488 : if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9369 4 : EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9370 4 : return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9371 8 : N0.getOperand(0), N1);
9372 :
9373 : // fold (sext_in_reg (sext x)) -> (sext x)
9374 : // fold (sext_in_reg (aext x)) -> (sext x)
9375 : // if x is small enough.
9376 45484 : if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9377 9486 : SDValue N00 = N0.getOperand(0);
9378 9486 : if (N00.getScalarValueSizeInBits() <= EVTBits &&
9379 4 : (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9380 4 : return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9381 : }
9382 :
9383 : // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9384 45468 : if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9385 45468 : N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9386 45496 : N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9387 14 : N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9388 0 : if (!LegalOperations ||
9389 0 : TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9390 0 : return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
9391 : }
9392 :
9393 : // fold (sext_in_reg (zext x)) -> (sext x)
9394 : // iff we are extending the source sign bit.
9395 45482 : if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9396 6 : SDValue N00 = N0.getOperand(0);
9397 6 : if (N00.getScalarValueSizeInBits() == EVTBits &&
9398 4 : (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9399 8 : return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9400 : }
9401 :
9402 : // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9403 90956 : if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9404 6 : return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9405 :
9406 : // fold operands of sext_in_reg based on knowledge that the top bits are not
9407 : // demanded.
9408 45476 : if (SimplifyDemandedBits(SDValue(N, 0)))
9409 2118 : return SDValue(N, 0);
9410 :
9411 : // fold (sext_in_reg (load x)) -> (smaller sextload x)
9412 : // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9413 43358 : if (SDValue NarrowLoad = ReduceLoadWidth(N))
9414 206 : return NarrowLoad;
9415 :
9416 : // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9417 : // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9418 : // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9419 43152 : if (N0.getOpcode() == ISD::SRL) {
9420 : if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9421 20196 : if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9422 : // We can turn this into an SRA iff the input to the SRL is already sign
9423 : // extended enough.
9424 20194 : unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9425 20194 : if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9426 2358 : return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9427 4716 : N0.getOperand(0), N0.getOperand(1));
9428 : }
9429 : }
9430 :
9431 : // fold (sext_inreg (extload x)) -> (sextload x)
9432 : // If sextload is not supported by target, we can only do the combine when
9433 : // load has one use. Doing otherwise can block folding the extload with other
9434 : // extends that the target does support.
9435 : if (ISD::isEXTLoad(N0.getNode()) &&
9436 40 : ISD::isUNINDEXEDLoad(N0.getNode()) &&
9437 1700 : EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9438 2412 : ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9439 1043 : N0.hasOneUse()) ||
9440 1061 : TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9441 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9442 727 : SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9443 : LN0->getChain(),
9444 : LN0->getBasePtr(), EVT,
9445 728 : LN0->getMemOperand());
9446 727 : CombineTo(N, ExtLoad);
9447 : CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9448 727 : AddToWorklist(ExtLoad.getNode());
9449 727 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
9450 : }
9451 : // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9452 73 : if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9453 0 : N0.hasOneUse() &&
9454 0 : EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9455 0 : ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9456 0 : TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9457 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9458 0 : SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9459 : LN0->getChain(),
9460 : LN0->getBasePtr(), EVT,
9461 0 : LN0->getMemOperand());
9462 0 : CombineTo(N, ExtLoad);
9463 : CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9464 0 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
9465 : }
9466 :
9467 : // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9468 40067 : if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9469 58 : if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9470 58 : N0.getOperand(1), false))
9471 8 : return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9472 16 : BSwap, N1);
9473 : }
9474 :
9475 40059 : return SDValue();
9476 : }
9477 :
9478 3049 : SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9479 3049 : SDValue N0 = N->getOperand(0);
9480 6098 : EVT VT = N->getValueType(0);
9481 :
9482 3049 : if (N0.isUndef())
9483 0 : return DAG.getUNDEF(VT);
9484 :
9485 3049 : if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9486 3049 : LegalOperations))
9487 20 : return SDValue(Res, 0);
9488 :
9489 3029 : return SDValue();
9490 : }
9491 :
9492 5864 : SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9493 5864 : SDValue N0 = N->getOperand(0);
9494 11728 : EVT VT = N->getValueType(0);
9495 :
9496 5864 : if (N0.isUndef())
9497 0 : return DAG.getUNDEF(VT);
9498 :
9499 5864 : if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9500 5864 : LegalOperations))
9501 22 : return SDValue(Res, 0);
9502 :
9503 5842 : return SDValue();
9504 : }
9505 :
9506 858614 : SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9507 858614 : SDValue N0 = N->getOperand(0);
9508 858614 : EVT VT = N->getValueType(0);
9509 858614 : bool isLE = DAG.getDataLayout().isLittleEndian();
9510 :
9511 : // noop truncate
9512 1717674 : if (N0.getValueType() == N->getValueType(0))
9513 0 : return N0;
9514 :
9515 : // fold (truncate (truncate x)) -> (truncate x)
9516 858614 : if (N0.getOpcode() == ISD::TRUNCATE)
9517 4587 : return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9518 :
9519 : // fold (truncate c1) -> c1
9520 856331 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9521 3355 : SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9522 3355 : if (C.getNode() != N)
9523 3332 : return C;
9524 : }
9525 :
9526 : // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9527 852999 : if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9528 1703424 : N0.getOpcode() == ISD::SIGN_EXTEND ||
9529 : N0.getOpcode() == ISD::ANY_EXTEND) {
9530 : // if the source is smaller than the dest, we still need an extend.
9531 12544 : if (N0.getOperand(0).getValueType().bitsLT(VT))
9532 2422 : return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9533 : // if the source is larger than the dest, than we just need the truncate.
9534 5061 : if (N0.getOperand(0).getValueType().bitsGT(VT))
9535 2276 : return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9536 : // if the source and dest are the same type, we can drop both the extend
9537 : // and the truncate.
9538 3967 : return N0.getOperand(0);
9539 : }
9540 :
9541 : // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9542 831141 : if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9543 1931 : return SDValue();
9544 :
9545 : // Fold extract-and-trunc into a narrow extract. For example:
9546 : // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9547 : // i32 y = TRUNCATE(i64 x)
9548 : // -- becomes --
9549 : // v16i8 b = BITCAST (v2i64 val)
9550 : // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9551 : //
9552 : // Note: We only run this optimization after type legalization (which often
9553 : // creates this pattern) and before operation legalization after which
9554 : // we need to be more careful about the vector instructions that we generate.
9555 17435 : if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9556 852112 : LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9557 7316 : EVT VecTy = N0.getOperand(0).getValueType();
9558 7316 : EVT ExTy = N0.getValueType();
9559 14632 : EVT TrTy = N->getValueType(0);
9560 :
9561 : unsigned NumElem = VecTy.getVectorNumElements();
9562 7316 : unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9563 :
9564 7316 : EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9565 : assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9566 :
9567 7316 : SDValue EltNo = N0->getOperand(1);
9568 7316 : if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9569 7102 : int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9570 7102 : EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9571 7102 : int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9572 :
9573 : SDLoc DL(N);
9574 7102 : return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9575 7102 : DAG.getBitcast(NVT, N0.getOperand(0)),
9576 7102 : DAG.getConstant(Index, DL, IndexTy));
9577 : }
9578 : }
9579 :
9580 : // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9581 1675388 : if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9582 124 : EVT SrcVT = N0.getValueType();
9583 248 : if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9584 124 : TLI.isTruncateFree(SrcVT, VT)) {
9585 : SDLoc SL(N0);
9586 62 : SDValue Cond = N0.getOperand(0);
9587 124 : SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9588 186 : SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9589 126 : return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9590 : }
9591 : }
9592 :
9593 : // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9594 837632 : if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9595 840182 : (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9596 1275 : TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9597 1888 : SDValue Amt = N0.getOperand(1);
9598 442 : KnownBits Known;
9599 944 : DAG.computeKnownBits(Amt, Known);
9600 : unsigned Size = VT.getScalarSizeInBits();
9601 1888 : if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9602 : SDLoc SL(N);
9603 502 : EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9604 :
9605 1506 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9606 502 : if (AmtVT != Amt.getValueType()) {
9607 7 : Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9608 7 : AddToWorklist(Amt.getNode());
9609 : }
9610 1004 : return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9611 : }
9612 : }
9613 :
9614 : // Fold a series of buildvector, bitcast, and truncate if possible.
9615 : // For example fold
9616 : // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9617 : // (2xi32 (buildvector x, y)).
9618 13549 : if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9619 1181 : N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9620 837161 : N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9621 1 : N0.getOperand(0).hasOneUse()) {
9622 2 : SDValue BuildVect = N0.getOperand(0);
9623 1 : EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9624 1 : EVT TruncVecEltTy = VT.getVectorElementType();
9625 :
9626 : // Check that the element types match.
9627 1 : if (BuildVectEltTy == TruncVecEltTy) {
9628 : // Now we only need to compute the offset of the truncated elements.
9629 : unsigned BuildVecNumElts = BuildVect.getNumOperands();
9630 : unsigned TruncVecNumElts = VT.getVectorNumElements();
9631 1 : unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9632 :
9633 : assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9634 : "Invalid number of elements");
9635 :
9636 : SmallVector<SDValue, 8> Opnds;
9637 3 : for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9638 2 : Opnds.push_back(BuildVect.getOperand(i));
9639 :
9640 2 : return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9641 : }
9642 : }
9643 :
9644 : // See if we can simplify the input to this truncate through knowledge that
9645 : // only the low bits are being used.
9646 : // For example "trunc (or (shl x, 8), y)" // -> trunc y
9647 : // Currently we only perform this optimization on scalars because vectors
9648 : // may have different active low bits.
9649 837129 : if (!VT.isVector()) {
9650 : APInt Mask =
9651 826969 : APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9652 826969 : if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9653 3195 : return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9654 : }
9655 :
9656 : // fold (truncate (load x)) -> (smaller load x)
9657 : // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9658 835539 : if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9659 816111 : if (SDValue Reduced = ReduceLoadWidth(N))
9660 2444 : return Reduced;
9661 :
9662 : // Handle the case where the load remains an extending load even
9663 : // after truncation.
9664 813667 : if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9665 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9666 12528 : if (!LN0->isVolatile() &&
9667 24941 : LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9668 548 : SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9669 : VT, LN0->getChain(), LN0->getBasePtr(),
9670 : LN0->getMemoryVT(),
9671 274 : LN0->getMemOperand());
9672 274 : DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9673 274 : return NewLoad;
9674 : }
9675 : }
9676 : }
9677 :
9678 : // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9679 : // where ... are all 'undef'.
9680 1665642 : if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9681 : SmallVector<EVT, 8> VTs;
9682 : SDValue V;
9683 : unsigned Idx = 0;
9684 : unsigned NumDefs = 0;
9685 :
9686 398 : for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9687 776 : SDValue X = N0.getOperand(i);
9688 388 : if (!X.isUndef()) {
9689 362 : V = X;
9690 : Idx = i;
9691 362 : NumDefs++;
9692 : }
9693 : // Stop if more than one members are non-undef.
9694 388 : if (NumDefs > 1)
9695 : break;
9696 212 : VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9697 : VT.getVectorElementType(),
9698 636 : X.getValueType().getVectorNumElements()));
9699 : }
9700 :
9701 186 : if (NumDefs == 0)
9702 0 : return DAG.getUNDEF(VT);
9703 :
9704 186 : if (NumDefs == 1) {
9705 : assert(V.getNode() && "The single defined operand is empty!");
9706 : SmallVector<SDValue, 8> Opnds;
9707 46 : for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9708 36 : if (i != Idx) {
9709 52 : Opnds.push_back(DAG.getUNDEF(VTs[i]));
9710 26 : continue;
9711 : }
9712 20 : SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9713 10 : AddToWorklist(NV.getNode());
9714 10 : Opnds.push_back(NV);
9715 : }
9716 20 : return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9717 : }
9718 : }
9719 :
9720 : // Fold truncate of a bitcast of a vector to an extract of the low vector
9721 : // element.
9722 : //
9723 : // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9724 1682853 : if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9725 17014 : SDValue VecSrc = N0.getOperand(0);
9726 17014 : EVT SrcVT = VecSrc.getValueType();
9727 28694 : if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9728 11680 : (!LegalOperations ||
9729 10767 : TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
9730 : SDLoc SL(N);
9731 :
9732 10483 : EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9733 10486 : unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9734 10483 : return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9735 10483 : VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9736 : }
9737 : }
9738 :
9739 : // Simplify the operands using demanded-bits information.
9740 1634507 : if (!VT.isVector() &&
9741 812179 : SimplifyDemandedBits(SDValue(N, 0)))
9742 12314 : return SDValue(N, 0);
9743 :
9744 : // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9745 : // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9746 : // When the adde's carry is not used.
9747 810037 : if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9748 810058 : N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9749 21 : (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9750 : SDLoc SL(N);
9751 60 : auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9752 60 : auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9753 40 : auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9754 40 : return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9755 : }
9756 :
9757 : // fold (truncate (extract_subvector(ext x))) ->
9758 : // (extract_subvector x)
9759 : // TODO: This can be generalized to cover cases where the truncate and extract
9760 : // do not fully cancel each other out.
9761 809994 : if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9762 153 : SDValue N00 = N0.getOperand(0);
9763 25 : if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9764 170 : N00.getOpcode() == ISD::ZERO_EXTEND ||
9765 : N00.getOpcode() == ISD::ANY_EXTEND) {
9766 272 : if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9767 : VT.getVectorElementType())
9768 136 : return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9769 272 : N00.getOperand(0), N0.getOperand(1));
9770 : }
9771 : }
9772 :
9773 809858 : if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9774 4 : return NewVSel;
9775 :
9776 809854 : return SDValue();
9777 : }
9778 :
9779 : static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9780 58415 : SDValue Elt = N->getOperand(i);
9781 116830 : if (Elt.getOpcode() != ISD::MERGE_VALUES)
9782 : return Elt.getNode();
9783 513 : return Elt.getOperand(Elt.getResNo()).getNode();
9784 : }
9785 :
9786 : /// build_pair (load, load) -> load
9787 : /// if load locations are consecutive.
9788 58415 : SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9789 : assert(N->getOpcode() == ISD::BUILD_PAIR);
9790 :
9791 : LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
9792 : LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
9793 :
9794 : // A BUILD_PAIR is always having the least significant part in elt 0 and the
9795 : // most significant part in elt 1. So when combining into one large load, we
9796 : // need to consider the endianness.
9797 58415 : if (DAG.getDataLayout().isBigEndian())
9798 : std::swap(LD1, LD2);
9799 :
9800 61036 : if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9801 : LD1->getAddressSpace() != LD2->getAddressSpace())
9802 55794 : return SDValue();
9803 5242 : EVT LD1VT = LD1->getValueType(0);
9804 : unsigned LD1Bytes = LD1VT.getStoreSize();
9805 2621 : if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9806 2621 : DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9807 2483 : unsigned Align = LD1->getAlignment();
9808 4966 : unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9809 2483 : VT.getTypeForEVT(*DAG.getContext()));
9810 :
9811 2483 : if (NewAlign <= Align &&
9812 2256 : (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9813 2256 : return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
9814 4512 : LD1->getPointerInfo(), Align);
9815 : }
9816 :
9817 365 : return SDValue();
9818 : }
9819 :
9820 : static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
9821 : // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
9822 : // and Lo parts; on big-endian machines it doesn't.
9823 10 : return DAG.getDataLayout().isBigEndian() ? 1 : 0;
9824 : }
9825 :
9826 679701 : static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
9827 : const TargetLowering &TLI) {
9828 : // If this is not a bitcast to an FP type or if the target doesn't have
9829 : // IEEE754-compliant FP logic, we're done.
9830 679701 : EVT VT = N->getValueType(0);
9831 679701 : if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
9832 625359 : return SDValue();
9833 :
9834 : // TODO: Handle cases where the integer constant is a different scalar
9835 : // bitwidth to the FP.
9836 54342 : SDValue N0 = N->getOperand(0);
9837 54342 : EVT SourceVT = N0.getValueType();
9838 54342 : if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
9839 19441 : return SDValue();
9840 :
9841 : unsigned FPOpcode;
9842 : APInt SignMask;
9843 34901 : switch (N0.getOpcode()) {
9844 2099 : case ISD::AND:
9845 : FPOpcode = ISD::FABS;
9846 2099 : SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
9847 2099 : break;
9848 689 : case ISD::XOR:
9849 : FPOpcode = ISD::FNEG;
9850 689 : SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
9851 689 : break;
9852 971 : case ISD::OR:
9853 : FPOpcode = ISD::FABS;
9854 971 : SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
9855 971 : break;
9856 31142 : default:
9857 31142 : return SDValue();
9858 : }
9859 :
9860 : // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
9861 : // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
9862 : // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
9863 : // fneg (fabs X)
9864 3759 : SDValue LogicOp0 = N0.getOperand(0);
9865 3759 : ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
9866 650 : if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
9867 3759 : LogicOp0.getOpcode() == ISD::BITCAST &&
9868 98 : LogicOp0.getOperand(0).getValueType() == VT) {
9869 98 : SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
9870 : NumFPLogicOpsConv++;
9871 49 : if (N0.getOpcode() == ISD::OR)
9872 20 : return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
9873 39 : return FPOp;
9874 : }
9875 :
9876 3710 : return SDValue();
9877 : }
9878 :
9879 821241 : SDValue DAGCombiner::visitBITCAST(SDNode *N) {
9880 821241 : SDValue N0 = N->getOperand(0);
9881 1642482 : EVT VT = N->getValueType(0);
9882 :
9883 821241 : if (N0.isUndef())
9884 126 : return DAG.getUNDEF(VT);
9885 :
9886 : // If the input is a BUILD_VECTOR with all constant elements, fold this now.
9887 : // Only do this before legalize types, since we might create an illegal
9888 : // scalar type. Even if we knew we wouldn't create an illegal scalar type
9889 : // we can only do this before legalize ops, since the target maybe
9890 : // depending on the bitcast.
9891 : // First check to see if this is all constant.
9892 151854 : if (!LegalTypes &&
9893 90104 : N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
9894 910331 : VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
9895 : return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
9896 88248 : VT.getVectorElementType());
9897 :
9898 : // If the input is a constant, let getNode fold it.
9899 : if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
9900 : // If we can't allow illegal operations, we need to check that this is just
9901 : // a fp -> int or int -> conversion and that the resulting operation will
9902 : // be legal.
9903 941 : if (!LegalOperations ||
9904 740 : (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
9905 949 : TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
9906 100 : (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
9907 27 : TLI.isOperationLegal(ISD::Constant, VT))) {
9908 256 : SDValue C = DAG.getBitcast(VT, N0);
9909 256 : if (C.getNode() != N)
9910 98 : return C;
9911 : }
9912 : }
9913 :
9914 : // (conv (conv x, t1), t2) -> (conv x, t2)
9915 732769 : if (N0.getOpcode() == ISD::BITCAST)
9916 91980 : return DAG.getBitcast(VT, N0.getOperand(0));
9917 :
9918 : // fold (conv (load x)) -> (load (conv*)x)
9919 : // If the resultant load doesn't need a higher alignment than the original!
9920 196110 : if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9921 : // Do not remove the cast if the types differ in endian layout.
9922 96318 : TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
9923 96318 : TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
9924 : // If the load is volatile, we only want to change the load type if the
9925 : // resulting load is legal. Otherwise we might increase the number of
9926 : // memory accesses. We don't care if the original type was legal or not
9927 : // as we assume software couldn't rely on the number of accesses of an
9928 : // illegal type.
9929 96316 : ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9930 99152 : TLI.isOperationLegal(ISD::LOAD, VT)) &&
9931 31850 : TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
9932 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9933 7177 : unsigned OrigAlign = LN0->getAlignment();
9934 :
9935 7177 : bool Fast = false;
9936 7177 : if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9937 7177 : LN0->getAddressSpace(), OrigAlign, &Fast) &&
9938 : Fast) {
9939 : SDValue Load =
9940 14156 : DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
9941 : LN0->getPointerInfo(), OrigAlign,
9942 14168 : LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9943 7078 : DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9944 7078 : return Load;
9945 : }
9946 : }
9947 :
9948 679701 : if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9949 49 : return V;
9950 :
9951 : // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9952 : // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9953 : //
9954 : // For ppc_fp128:
9955 : // fold (bitcast (fneg x)) ->
9956 : // flipbit = signbit
9957 : // (xor (bitcast x) (build_pair flipbit, flipbit))
9958 : //
9959 : // fold (bitcast (fabs x)) ->
9960 : // flipbit = (and (extract_element (bitcast x), 0), signbit)
9961 : // (xor (bitcast x) (build_pair flipbit, flipbit))
9962 : // This often reduces constant pool loads.
9963 680134 : if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9964 336 : (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9965 347 : N0.getNode()->hasOneUse() && VT.isInteger() &&
9966 1359404 : !VT.isVector() && !N0.getValueType().isVector()) {
9967 96 : SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9968 48 : AddToWorklist(NewConv.getNode());
9969 :
9970 : SDLoc DL(N);
9971 10 : if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9972 : assert(VT.getSizeInBits() == 128);
9973 10 : SDValue SignBit = DAG.getConstant(
9974 20 : APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9975 10 : SDValue FlipBit;
9976 10 : if (N0.getOpcode() == ISD::FNEG) {
9977 5 : FlipBit = SignBit;
9978 5 : AddToWorklist(FlipBit.getNode());
9979 : } else {
9980 : assert(N0.getOpcode() == ISD::FABS);
9981 : SDValue Hi =
9982 5 : DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9983 5 : DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9984 15 : SDLoc(NewConv)));
9985 5 : AddToWorklist(Hi.getNode());
9986 5 : FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9987 5 : AddToWorklist(FlipBit.getNode());
9988 : }
9989 : SDValue FlipBits =
9990 10 : DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9991 10 : AddToWorklist(FlipBits.getNode());
9992 20 : return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9993 : }
9994 38 : APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9995 38 : if (N0.getOpcode() == ISD::FNEG)
9996 28 : return DAG.getNode(ISD::XOR, DL, VT,
9997 28 : NewConv, DAG.getConstant(SignBit, DL, VT));
9998 : assert(N0.getOpcode() == ISD::FABS);
9999 10 : return DAG.getNode(ISD::AND, DL, VT,
10000 20 : NewConv, DAG.getConstant(~SignBit, DL, VT));
10001 : }
10002 :
10003 : // fold (bitconvert (fcopysign cst, x)) ->
10004 : // (or (and (bitconvert x), sign), (and cst, (not sign)))
10005 : // Note that we don't handle (copysign x, cst) because this can always be
10006 : // folded to an fneg or fabs.
10007 : //
10008 : // For ppc_fp128:
10009 : // fold (bitcast (fcopysign cst, x)) ->
10010 : // flipbit = (and (extract_element
10011 : // (xor (bitcast cst), (bitcast x)), 0),
10012 : // signbit)
10013 : // (xor (bitcast cst) (build_pair flipbit, flipbit))
10014 : if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10015 6 : isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10016 679616 : VT.isInteger() && !VT.isVector()) {
10017 6 : unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10018 6 : EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10019 6 : if (isTypeLegal(IntXVT)) {
10020 12 : SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10021 6 : AddToWorklist(X.getNode());
10022 :
10023 : // If X has a different width than the result/lhs, sext it or truncate it.
10024 6 : unsigned VTWidth = VT.getSizeInBits();
10025 6 : if (OrigXWidth < VTWidth) {
10026 0 : X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10027 0 : AddToWorklist(X.getNode());
10028 6 : } else if (OrigXWidth > VTWidth) {
10029 : // To get the sign bit in the right place, we have to shift it right
10030 : // before truncating.
10031 : SDLoc DL(X);
10032 0 : X = DAG.getNode(ISD::SRL, DL,
10033 : X.getValueType(), X,
10034 0 : DAG.getConstant(OrigXWidth-VTWidth, DL,
10035 0 : X.getValueType()));
10036 0 : AddToWorklist(X.getNode());
10037 0 : X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10038 0 : AddToWorklist(X.getNode());
10039 : }
10040 :
10041 5 : if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10042 5 : APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10043 10 : SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10044 5 : AddToWorklist(Cst.getNode());
10045 10 : SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10046 5 : AddToWorklist(X.getNode());
10047 5 : SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10048 5 : AddToWorklist(XorResult.getNode());
10049 5 : SDValue XorResult64 = DAG.getNode(
10050 5 : ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10051 5 : DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10052 10 : SDLoc(XorResult)));
10053 5 : AddToWorklist(XorResult64.getNode());
10054 : SDValue FlipBit =
10055 5 : DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10056 15 : DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10057 5 : AddToWorklist(FlipBit.getNode());
10058 : SDValue FlipBits =
10059 5 : DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10060 5 : AddToWorklist(FlipBits.getNode());
10061 10 : return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10062 : }
10063 1 : APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10064 2 : X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10065 2 : X, DAG.getConstant(SignBit, SDLoc(X), VT));
10066 1 : AddToWorklist(X.getNode());
10067 :
10068 2 : SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10069 2 : Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10070 3 : Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10071 1 : AddToWorklist(Cst.getNode());
10072 :
10073 2 : return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10074 : }
10075 : }
10076 :
10077 : // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10078 679598 : if (N0.getOpcode() == ISD::BUILD_PAIR)
10079 7396 : if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10080 38 : return CombineLD;
10081 :
10082 : // Remove double bitcasts from shuffles - this is often a legacy of
10083 : // XformToShuffleWithZero being used to combine bitmaskings (of
10084 : // float vectors bitcast to integer vectors) into shuffles.
10085 : // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10086 333566 : if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10087 146810 : N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
10088 694106 : VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10089 680375 : !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10090 : ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10091 :
10092 : // If operands are a bitcast, peek through if it casts the original VT.
10093 : // If operands are a constant, just bitcast back to original VT.
10094 : auto PeekThroughBitcast = [&](SDValue Op) {
10095 : if (Op.getOpcode() == ISD::BITCAST &&
10096 : Op.getOperand(0).getValueType() == VT)
10097 : return SDValue(Op.getOperand(0));
10098 : if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10099 : ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10100 : return DAG.getBitcast(VT, Op);
10101 : return SDValue();
10102 815 : };
10103 :
10104 : // FIXME: If either input vector is bitcast, try to convert the shuffle to
10105 : // the result type of this bitcast. This would eliminate at least one
10106 : // bitcast. See the transform in InstCombine.
10107 1630 : SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10108 1630 : SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10109 815 : if (!(SV0 && SV1))
10110 594 : return SDValue();
10111 :
10112 : int MaskScale =
10113 442 : VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10114 : SmallVector<int, 8> NewMask;
10115 1109 : for (int M : SVN->getMask())
10116 3506 : for (int i = 0; i != MaskScale; ++i)
10117 2618 : NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10118 :
10119 442 : bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10120 221 : if (!LegalMask) {
10121 : std::swap(SV0, SV1);
10122 : ShuffleVectorSDNode::commuteMask(NewMask);
10123 10 : LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10124 : }
10125 :
10126 221 : if (LegalMask)
10127 432 : return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10128 : }
10129 :
10130 678750 : return SDValue();
10131 : }
10132 :
10133 : SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10134 51019 : EVT VT = N->getValueType(0);
10135 51019 : return CombineConsecutiveLoads(N, VT);
10136 : }
10137 :
10138 : /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10139 : /// operands. DstEltVT indicates the destination element value type.
10140 88394 : SDValue DAGCombiner::
10141 : ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10142 176788 : EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10143 :
10144 : // If this is already the right type, we're done.
10145 88394 : if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10146 :
10147 88394 : unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10148 88394 : unsigned DstBitSize = DstEltVT.getSizeInBits();
10149 :
10150 : // If this is a conversion of N elements of one type to N elements of another
10151 : // type, convert each element. This handles FP<->INT cases.
10152 88394 : if (SrcBitSize == DstBitSize) {
10153 : SmallVector<SDValue, 8> Ops;
10154 1045 : for (SDValue Op : BV->op_values()) {
10155 : // If the vector element type is not legal, the BUILD_VECTOR operands
10156 : // are promoted and implicitly truncated. Make that explicit here.
10157 797 : if (Op.getValueType() != SrcEltVT)
10158 0 : Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10159 797 : Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10160 797 : AddToWorklist(Ops.back().getNode());
10161 : }
10162 248 : EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10163 744 : BV->getValueType(0).getVectorNumElements());
10164 496 : return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10165 : }
10166 :
10167 : // Otherwise, we're growing or shrinking the elements. To avoid having to
10168 : // handle annoying details of growing/shrinking FP values, we convert them to
10169 : // int first.
10170 88146 : if (SrcEltVT.isFloatingPoint()) {
10171 : // Convert the input float vector to a int vector where the elements are the
10172 : // same sizes.
10173 64 : EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10174 64 : BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10175 64 : SrcEltVT = IntVT;
10176 : }
10177 :
10178 : // Now we know the input is an integer vector. If the output is a FP type,
10179 : // convert to integer first, then to FP of the right size.
10180 88146 : if (DstEltVT.isFloatingPoint()) {
10181 41 : EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10182 41 : SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10183 :
10184 : // Next, convert to FP elements of the same size.
10185 41 : return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10186 : }
10187 :
10188 : SDLoc DL(BV);
10189 :
10190 : // Okay, we know the src/dst types are both integers of differing types.
10191 : // Handling growing first.
10192 : assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10193 88105 : if (SrcBitSize < DstBitSize) {
10194 87691 : unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10195 :
10196 : SmallVector<SDValue, 8> Ops;
10197 263243 : for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10198 175552 : i += NumInputsPerOutput) {
10199 175552 : bool isLE = DAG.getDataLayout().isLittleEndian();
10200 : APInt NewBits = APInt(DstBitSize, 0);
10201 : bool EltIsUndef = true;
10202 527104 : for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10203 : // Shift the previously computed bits over.
10204 351552 : NewBits <<= SrcBitSize;
10205 351552 : SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10206 351552 : if (Op.isUndef()) continue;
10207 : EltIsUndef = false;
10208 :
10209 351398 : NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10210 702796 : zextOrTrunc(SrcBitSize).zext(DstBitSize);
10211 : }
10212 :
10213 175552 : if (EltIsUndef)
10214 16 : Ops.push_back(DAG.getUNDEF(DstEltVT));
10215 : else
10216 175536 : Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10217 : }
10218 :
10219 87691 : EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10220 175382 : return DAG.getBuildVector(VT, DL, Ops);
10221 : }
10222 :
10223 : // Finally, this must be the case where we are shrinking elements: each input
10224 : // turns into multiple outputs.
10225 414 : unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10226 414 : EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10227 828 : NumOutputsPerInput*BV->getNumOperands());
10228 : SmallVector<SDValue, 8> Ops;
10229 :
10230 2316 : for (const SDValue &Op : BV->op_values()) {
10231 3804 : if (Op.isUndef()) {
10232 13 : Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10233 13 : continue;
10234 : }
10235 :
10236 : APInt OpVal = cast<ConstantSDNode>(Op)->
10237 3778 : getAPIntValue().zextOrTrunc(SrcBitSize);
10238 :
10239 8221 : for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10240 6332 : APInt ThisVal = OpVal.trunc(DstBitSize);
10241 6332 : Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10242 : OpVal.lshrInPlace(DstBitSize);
10243 : }
10244 :
10245 : // For big endian targets, swap the order of the pieces of each element.
10246 1889 : if (DAG.getDataLayout().isBigEndian())
10247 76 : std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10248 : }
10249 :
10250 828 : return DAG.getBuildVector(VT, DL, Ops);
10251 : }
10252 :
10253 : static bool isContractable(SDNode *N) {
10254 37613 : SDNodeFlags F = N->getFlags();
10255 37613 : return F.hasAllowContract() || F.hasAllowReassociation();
10256 : }
10257 :
10258 : /// Try to perform FMA combining on a given FADD node.
10259 43058 : SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10260 43058 : SDValue N0 = N->getOperand(0);
10261 43058 : SDValue N1 = N->getOperand(1);
10262 86116 : EVT VT = N->getValueType(0);
10263 : SDLoc SL(N);
10264 :
10265 43058 : const TargetOptions &Options = DAG.getTarget().Options;
10266 :
10267 : // Floating-point multiply-add with intermediate rounding.
10268 43058 : bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10269 :
10270 : // Floating-point multiply-add without intermediate rounding.
10271 : bool HasFMA =
10272 43058 : TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10273 11596 : (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10274 :
10275 : // No valid opcode, do not combine.
10276 43058 : if (!HasFMAD && !HasFMA)
10277 25817 : return SDValue();
10278 :
10279 17241 : SDNodeFlags Flags = N->getFlags();
10280 17241 : bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10281 17241 : bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10282 17241 : CanFuse || HasFMAD);
10283 : // If the addition is not contractable, do not combine.
10284 : if (!AllowFusionGlobally && !isContractable(N))
10285 9443 : return SDValue();
10286 :
10287 15596 : const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10288 7798 : if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10289 56 : return SDValue();
10290 :
10291 : // Always prefer FMAD to FMA for precision.
10292 7742 : unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10293 7742 : bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10294 :
10295 : // Is the node an FMUL and contractable either due to global flags or
10296 : // SDNodeFlags.
10297 : auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10298 9453 : if (N.getOpcode() != ISD::FMUL)
10299 : return false;
10300 4005 : return AllowFusionGlobally || isContractable(N.getNode());
10301 : };
10302 : // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10303 : // prefer to fold the multiply with fewer uses.
10304 7742 : if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10305 224 : if (N0.getNode()->use_size() > N1.getNode()->use_size())
10306 : std::swap(N0, N1);
10307 : }
10308 :
10309 : // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10310 2012 : if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10311 1993 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10312 1993 : N0.getOperand(0), N0.getOperand(1), N1, Flags);
10313 : }
10314 :
10315 : // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10316 : // Note: Commutes FADD operands.
10317 326 : if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10318 299 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10319 299 : N1.getOperand(0), N1.getOperand(1), N0, Flags);
10320 : }
10321 :
10322 : // Look through FP_EXTEND nodes to do more combining.
10323 :
10324 : // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10325 5450 : if (N0.getOpcode() == ISD::FP_EXTEND) {
10326 17 : SDValue N00 = N0.getOperand(0);
10327 4 : if (isContractableFMUL(N00) &&
10328 8 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10329 3 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10330 3 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10331 : N00.getOperand(0)),
10332 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10333 3 : N00.getOperand(1)), N1, Flags);
10334 : }
10335 : }
10336 :
10337 : // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10338 : // Note: Commutes FADD operands.
10339 5447 : if (N1.getOpcode() == ISD::FP_EXTEND) {
10340 16 : SDValue N10 = N1.getOperand(0);
10341 4 : if (isContractableFMUL(N10) &&
10342 8 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10343 3 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10344 3 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10345 : N10.getOperand(0)),
10346 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10347 3 : N10.getOperand(1)), N0, Flags);
10348 : }
10349 : }
10350 :
10351 : // More folding opportunities when target permits.
10352 5444 : if (Aggressive) {
10353 : // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10354 322 : if (CanFuse &&
10355 322 : N0.getOpcode() == PreferredFusedOpcode &&
10356 20 : N0.getOperand(2).getOpcode() == ISD::FMUL &&
10357 2476 : N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10358 7 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10359 : N0.getOperand(0), N0.getOperand(1),
10360 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10361 : N0.getOperand(2).getOperand(0),
10362 : N0.getOperand(2).getOperand(1),
10363 28 : N1, Flags), Flags);
10364 : }
10365 :
10366 : // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10367 315 : if (CanFuse &&
10368 630 : N1->getOpcode() == PreferredFusedOpcode &&
10369 20 : N1.getOperand(2).getOpcode() == ISD::FMUL &&
10370 2469 : N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10371 2 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10372 : N1.getOperand(0), N1.getOperand(1),
10373 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10374 : N1.getOperand(2).getOperand(0),
10375 : N1.getOperand(2).getOperand(1),
10376 2 : N0, Flags), Flags);
10377 : }
10378 :
10379 :
10380 : // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10381 : // -> (fma x, y, (fma (fpext u), (fpext v), z))
10382 : auto FoldFAddFMAFPExtFMul = [&] (
10383 : SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10384 : SDNodeFlags Flags) {
10385 : return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10386 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10387 : DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10388 : DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10389 : Z, Flags), Flags);
10390 2467 : };
10391 2467 : if (N0.getOpcode() == PreferredFusedOpcode) {
10392 86 : SDValue N02 = N0.getOperand(2);
10393 86 : if (N02.getOpcode() == ISD::FP_EXTEND) {
10394 8 : SDValue N020 = N02.getOperand(0);
10395 8 : if (isContractableFMUL(N020) &&
10396 16 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10397 : return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10398 : N020.getOperand(0), N020.getOperand(1),
10399 6 : N1, Flags);
10400 : }
10401 : }
10402 : }
10403 :
10404 : // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10405 : // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10406 : // FIXME: This turns two single-precision and one double-precision
10407 : // operation into two double-precision operations, which might not be
10408 : // interesting for all targets, especially GPUs.
10409 : auto FoldFAddFPExtFMAFMul = [&] (
10410 : SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10411 : SDNodeFlags Flags) {
10412 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10413 : DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10414 : DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10415 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10416 : DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10417 : DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10418 : Z, Flags), Flags);
10419 2461 : };
10420 2461 : if (N0.getOpcode() == ISD::FP_EXTEND) {
10421 14 : SDValue N00 = N0.getOperand(0);
10422 14 : if (N00.getOpcode() == PreferredFusedOpcode) {
10423 4 : SDValue N002 = N00.getOperand(2);
10424 4 : if (isContractableFMUL(N002) &&
10425 8 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10426 : return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10427 : N002.getOperand(0), N002.getOperand(1),
10428 4 : N1, Flags);
10429 : }
10430 : }
10431 : }
10432 :
10433 : // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10434 : // -> (fma y, z, (fma (fpext u), (fpext v), x))
10435 2457 : if (N1.getOpcode() == PreferredFusedOpcode) {
10436 62 : SDValue N12 = N1.getOperand(2);
10437 62 : if (N12.getOpcode() == ISD::FP_EXTEND) {
10438 6 : SDValue N120 = N12.getOperand(0);
10439 6 : if (isContractableFMUL(N120) &&
10440 12 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10441 : return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10442 : N120.getOperand(0), N120.getOperand(1),
10443 5 : N0, Flags);
10444 : }
10445 : }
10446 : }
10447 :
10448 : // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10449 : // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10450 : // FIXME: This turns two single-precision and one double-precision
10451 : // operation into two double-precision operations, which might not be
10452 : // interesting for all targets, especially GPUs.
10453 2452 : if (N1.getOpcode() == ISD::FP_EXTEND) {
10454 13 : SDValue N10 = N1.getOperand(0);
10455 13 : if (N10.getOpcode() == PreferredFusedOpcode) {
10456 4 : SDValue N102 = N10.getOperand(2);
10457 4 : if (isContractableFMUL(N102) &&
10458 8 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10459 : return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10460 : N102.getOperand(0), N102.getOperand(1),
10461 4 : N0, Flags);
10462 : }
10463 : }
10464 : }
10465 : }
10466 :
10467 5416 : return SDValue();
10468 : }
10469 :
10470 : /// Try to perform FMA combining on a given FSUB node.
10471 12480 : SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10472 12480 : SDValue N0 = N->getOperand(0);
10473 12480 : SDValue N1 = N->getOperand(1);
10474 24960 : EVT VT = N->getValueType(0);
10475 : SDLoc SL(N);
10476 :
10477 12480 : const TargetOptions &Options = DAG.getTarget().Options;
10478 : // Floating-point multiply-add with intermediate rounding.
10479 12480 : bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10480 :
10481 : // Floating-point multiply-add without intermediate rounding.
10482 : bool HasFMA =
10483 12480 : TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10484 2945 : (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10485 :
10486 : // No valid opcode, do not combine.
10487 12480 : if (!HasFMAD && !HasFMA)
10488 8831 : return SDValue();
10489 :
10490 3649 : const SDNodeFlags Flags = N->getFlags();
10491 3649 : bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10492 3649 : bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10493 3649 : CanFuse || HasFMAD);
10494 :
10495 : // If the subtraction is not contractable, do not combine.
10496 : if (!AllowFusionGlobally && !isContractable(N))
10497 1553 : return SDValue();
10498 :
10499 4192 : const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10500 2096 : if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10501 58 : return SDValue();
10502 :
10503 : // Always prefer FMAD to FMA for precision.
10504 2038 : unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10505 2038 : bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10506 :
10507 : // Is the node an FMUL and contractable either due to global flags or
10508 : // SDNodeFlags.
10509 : auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10510 2249 : if (N.getOpcode() != ISD::FMUL)
10511 : return false;
10512 1019 : return AllowFusionGlobally || isContractable(N.getNode());
10513 : };
10514 :
10515 : // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10516 306 : if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10517 298 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10518 : N0.getOperand(0), N0.getOperand(1),
10519 298 : DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10520 : }
10521 :
10522 : // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10523 : // Note: Commutes FSUB operands.
10524 569 : if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10525 539 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10526 : DAG.getNode(ISD::FNEG, SL, VT,
10527 : N1.getOperand(0)),
10528 539 : N1.getOperand(1), N0, Flags);
10529 : }
10530 :
10531 : // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10532 1288 : if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10533 66 : (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10534 87 : SDValue N00 = N0.getOperand(0).getOperand(0);
10535 87 : SDValue N01 = N0.getOperand(0).getOperand(1);
10536 87 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10537 87 : DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10538 87 : DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10539 : }
10540 :
10541 : // Look through FP_EXTEND nodes to do more combining.
10542 :
10543 : // fold (fsub (fpext (fmul x, y)), z)
10544 : // -> (fma (fpext x), (fpext y), (fneg z))
10545 1114 : if (N0.getOpcode() == ISD::FP_EXTEND) {
10546 23 : SDValue N00 = N0.getOperand(0);
10547 8 : if (isContractableFMUL(N00) &&
10548 16 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10549 5 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10550 5 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10551 : N00.getOperand(0)),
10552 5 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10553 : N00.getOperand(1)),
10554 5 : DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10555 : }
10556 : }
10557 :
10558 : // fold (fsub x, (fpext (fmul y, z)))
10559 : // -> (fma (fneg (fpext y)), (fpext z), x)
10560 : // Note: Commutes FSUB operands.
10561 1109 : if (N1.getOpcode() == ISD::FP_EXTEND) {
10562 16 : SDValue N10 = N1.getOperand(0);
10563 6 : if (isContractableFMUL(N10) &&
10564 12 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10565 3 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10566 3 : DAG.getNode(ISD::FNEG, SL, VT,
10567 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10568 : N10.getOperand(0))),
10569 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10570 : N10.getOperand(1)),
10571 3 : N0, Flags);
10572 : }
10573 : }
10574 :
10575 : // fold (fsub (fpext (fneg (fmul, x, y))), z)
10576 : // -> (fneg (fma (fpext x), (fpext y), z))
10577 : // Note: This could be removed with appropriate canonicalization of the
10578 : // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10579 : // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10580 : // from implementing the canonicalization in visitFSUB.
10581 1106 : if (N0.getOpcode() == ISD::FP_EXTEND) {
10582 18 : SDValue N00 = N0.getOperand(0);
10583 18 : if (N00.getOpcode() == ISD::FNEG) {
10584 2 : SDValue N000 = N00.getOperand(0);
10585 2 : if (isContractableFMUL(N000) &&
10586 4 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10587 2 : return DAG.getNode(ISD::FNEG, SL, VT,
10588 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10589 2 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10590 : N000.getOperand(0)),
10591 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10592 : N000.getOperand(1)),
10593 2 : N1, Flags));
10594 : }
10595 : }
10596 : }
10597 :
10598 : // fold (fsub (fneg (fpext (fmul, x, y))), z)
10599 : // -> (fneg (fma (fpext x)), (fpext y), z)
10600 : // Note: This could be removed with appropriate canonicalization of the
10601 : // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10602 : // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10603 : // from implementing the canonicalization in visitFSUB.
10604 1104 : if (N0.getOpcode() == ISD::FNEG) {
10605 30 : SDValue N00 = N0.getOperand(0);
10606 30 : if (N00.getOpcode() == ISD::FP_EXTEND) {
10607 2 : SDValue N000 = N00.getOperand(0);
10608 2 : if (isContractableFMUL(N000) &&
10609 4 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10610 2 : return DAG.getNode(ISD::FNEG, SL, VT,
10611 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10612 2 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10613 : N000.getOperand(0)),
10614 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10615 : N000.getOperand(1)),
10616 2 : N1, Flags));
10617 : }
10618 : }
10619 : }
10620 :
10621 : // More folding opportunities when target permits.
10622 1102 : if (Aggressive) {
10623 : // fold (fsub (fma x, y, (fmul u, v)), z)
10624 : // -> (fma x, y (fma u, v, (fneg z)))
10625 87 : if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10626 596 : isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10627 11 : N0.getOperand(2)->hasOneUse()) {
10628 7 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10629 : N0.getOperand(0), N0.getOperand(1),
10630 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10631 : N0.getOperand(2).getOperand(0),
10632 7 : N0.getOperand(2).getOperand(1),
10633 : DAG.getNode(ISD::FNEG, SL, VT,
10634 7 : N1), Flags), Flags);
10635 : }
10636 :
10637 : // fold (fsub x, (fma y, z, (fmul u, v)))
10638 : // -> (fma (fneg y), z, (fma (fneg u), v, x))
10639 572 : if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10640 6 : isContractableFMUL(N1.getOperand(2))) {
10641 4 : SDValue N20 = N1.getOperand(2).getOperand(0);
10642 4 : SDValue N21 = N1.getOperand(2).getOperand(1);
10643 4 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10644 4 : DAG.getNode(ISD::FNEG, SL, VT,
10645 : N1.getOperand(0)),
10646 : N1.getOperand(1),
10647 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10648 : DAG.getNode(ISD::FNEG, SL, VT, N20),
10649 4 : N21, N0, Flags), Flags);
10650 : }
10651 :
10652 :
10653 : // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10654 : // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10655 568 : if (N0.getOpcode() == PreferredFusedOpcode) {
10656 30 : SDValue N02 = N0.getOperand(2);
10657 30 : if (N02.getOpcode() == ISD::FP_EXTEND) {
10658 6 : SDValue N020 = N02.getOperand(0);
10659 6 : if (isContractableFMUL(N020) &&
10660 12 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10661 5 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10662 : N0.getOperand(0), N0.getOperand(1),
10663 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10664 5 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10665 : N020.getOperand(0)),
10666 5 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10667 : N020.getOperand(1)),
10668 : DAG.getNode(ISD::FNEG, SL, VT,
10669 5 : N1), Flags), Flags);
10670 : }
10671 : }
10672 : }
10673 :
10674 : // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10675 : // -> (fma (fpext x), (fpext y),
10676 : // (fma (fpext u), (fpext v), (fneg z)))
10677 : // FIXME: This turns two single-precision and one double-precision
10678 : // operation into two double-precision operations, which might not be
10679 : // interesting for all targets, especially GPUs.
10680 563 : if (N0.getOpcode() == ISD::FP_EXTEND) {
10681 16 : SDValue N00 = N0.getOperand(0);
10682 16 : if (N00.getOpcode() == PreferredFusedOpcode) {
10683 4 : SDValue N002 = N00.getOperand(2);
10684 4 : if (isContractableFMUL(N002) &&
10685 8 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10686 4 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10687 4 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10688 : N00.getOperand(0)),
10689 4 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10690 : N00.getOperand(1)),
10691 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10692 4 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10693 : N002.getOperand(0)),
10694 4 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10695 : N002.getOperand(1)),
10696 : DAG.getNode(ISD::FNEG, SL, VT,
10697 4 : N1), Flags), Flags);
10698 : }
10699 : }
10700 : }
10701 :
10702 : // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10703 : // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10704 559 : if (N1.getOpcode() == PreferredFusedOpcode &&
10705 20 : N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10706 6 : SDValue N120 = N1.getOperand(2).getOperand(0);
10707 6 : if (isContractableFMUL(N120) &&
10708 12 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10709 5 : SDValue N1200 = N120.getOperand(0);
10710 5 : SDValue N1201 = N120.getOperand(1);
10711 5 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10712 5 : DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10713 : N1.getOperand(1),
10714 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10715 5 : DAG.getNode(ISD::FNEG, SL, VT,
10716 : DAG.getNode(ISD::FP_EXTEND, SL,
10717 : VT, N1200)),
10718 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10719 : N1201),
10720 5 : N0, Flags), Flags);
10721 : }
10722 : }
10723 :
10724 : // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10725 : // -> (fma (fneg (fpext y)), (fpext z),
10726 : // (fma (fneg (fpext u)), (fpext v), x))
10727 : // FIXME: This turns two single-precision and one double-precision
10728 : // operation into two double-precision operations, which might not be
10729 : // interesting for all targets, especially GPUs.
10730 554 : if (N1.getOpcode() == ISD::FP_EXTEND &&
10731 13 : N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10732 4 : SDValue CvtSrc = N1.getOperand(0);
10733 4 : SDValue N100 = CvtSrc.getOperand(0);
10734 4 : SDValue N101 = CvtSrc.getOperand(1);
10735 4 : SDValue N102 = CvtSrc.getOperand(2);
10736 4 : if (isContractableFMUL(N102) &&
10737 8 : TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10738 4 : SDValue N1020 = N102.getOperand(0);
10739 4 : SDValue N1021 = N102.getOperand(1);
10740 4 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10741 4 : DAG.getNode(ISD::FNEG, SL, VT,
10742 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10743 : N100)),
10744 4 : DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10745 : DAG.getNode(PreferredFusedOpcode, SL, VT,
10746 4 : DAG.getNode(ISD::FNEG, SL, VT,
10747 : DAG.getNode(ISD::FP_EXTEND, SL,
10748 : VT, N1020)),
10749 : DAG.getNode(ISD::FP_EXTEND, SL, VT,
10750 : N1021),
10751 4 : N0, Flags), Flags);
10752 : }
10753 : }
10754 : }
10755 :
10756 1073 : return SDValue();
10757 : }
10758 :
10759 : /// Try to perform FMA combining on a given FMUL node based on the distributive
10760 : /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10761 : /// subtraction instead of addition).
10762 26193 : SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10763 26193 : SDValue N0 = N->getOperand(0);
10764 26193 : SDValue N1 = N->getOperand(1);
10765 52386 : EVT VT = N->getValueType(0);
10766 : SDLoc SL(N);
10767 26193 : const SDNodeFlags Flags = N->getFlags();
10768 :
10769 : assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10770 :
10771 26193 : const TargetOptions &Options = DAG.getTarget().Options;
10772 :
10773 : // The transforms below are incorrect when x == 0 and y == inf, because the
10774 : // intermediate multiplication produces a nan.
10775 26193 : if (!Options.NoInfsFPMath)
10776 25719 : return SDValue();
10777 :
10778 : // Floating-point multiply-add without intermediate rounding.
10779 : bool HasFMA =
10780 699 : (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10781 699 : TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10782 349 : (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10783 :
10784 : // Floating-point multiply-add with intermediate rounding. This can result
10785 : // in a less precise result due to the changed rounding order.
10786 796 : bool HasFMAD = Options.UnsafeFPMath &&
10787 474 : (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10788 :
10789 : // No valid opcode, do not combine.
10790 474 : if (!HasFMAD && !HasFMA)
10791 125 : return SDValue();
10792 :
10793 : // Always prefer FMAD to FMA for precision.
10794 349 : unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10795 349 : bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10796 :
10797 : // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
10798 : // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
10799 : auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10800 : if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10801 : if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
10802 : if (C->isExactlyValue(+1.0))
10803 : return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10804 : Y, Flags);
10805 : if (C->isExactlyValue(-1.0))
10806 : return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10807 : DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10808 : }
10809 : }
10810 : return SDValue();
10811 349 : };
10812 :
10813 349 : if (SDValue FMA = FuseFADD(N0, N1, Flags))
10814 18 : return FMA;
10815 331 : if (SDValue FMA = FuseFADD(N1, N0, Flags))
10816 26 : return FMA;
10817 :
10818 : // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
10819 : // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
10820 : // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
10821 : // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
10822 : auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10823 : if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
10824 : if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
10825 : if (C0->isExactlyValue(+1.0))
10826 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10827 : DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10828 : Y, Flags);
10829 : if (C0->isExactlyValue(-1.0))
10830 : return DAG.getNode(PreferredFusedOpcode, SL, VT,
10831 : DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10832 : DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10833 : }
10834 : if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
10835 : if (C1->isExactlyValue(+1.0))
10836 : return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10837 : DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10838 : if (C1->isExactlyValue(-1.0))
10839 : return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10840 : Y, Flags);
10841 : }
10842 : }
10843 : return SDValue();
10844 305 : };
10845 :
10846 305 : if (SDValue FMA = FuseFSUB(N0, N1, Flags))
10847 36 : return FMA;
10848 269 : if (SDValue FMA = FuseFSUB(N1, N0, Flags))
10849 86 : return FMA;
10850 :
10851 183 : return SDValue();
10852 : }
10853 :
10854 43573 : SDValue DAGCombiner::visitFADD(SDNode *N) {
10855 43573 : SDValue N0 = N->getOperand(0);
10856 43573 : SDValue N1 = N->getOperand(1);
10857 43573 : bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
10858 43573 : bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
10859 87146 : EVT VT = N->getValueType(0);
10860 : SDLoc DL(N);
10861 43573 : const TargetOptions &Options = DAG.getTarget().Options;
10862 43573 : const SDNodeFlags Flags = N->getFlags();
10863 :
10864 : // fold vector ops
10865 43573 : if (VT.isVector())
10866 14088 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
10867 6 : return FoldedVOp;
10868 :
10869 : // fold (fadd c1, c2) -> c1 + c2
10870 43567 : if (N0CFP && N1CFP)
10871 3 : return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
10872 :
10873 : // canonicalize constant to RHS
10874 43564 : if (N0CFP && !N1CFP)
10875 84 : return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
10876 :
10877 : // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
10878 43480 : ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
10879 48522 : if (N1C && N1C->isZero())
10880 718 : if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
10881 27 : return N0;
10882 :
10883 43453 : if (SDValue NewSel = foldBinOpIntoSelect(N))
10884 5 : return NewSel;
10885 :
10886 : // fold (fadd A, (fneg B)) -> (fsub A, B)
10887 83971 : if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10888 40523 : isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
10889 144 : return DAG.getNode(ISD::FSUB, DL, VT, N0,
10890 144 : GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10891 :
10892 : // fold (fadd (fneg A), B) -> (fsub B, A)
10893 83683 : if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10894 40379 : isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
10895 178 : return DAG.getNode(ISD::FSUB, DL, VT, N1,
10896 178 : GetNegatedExpression(N0, DAG, LegalOperations), Flags);
10897 :
10898 : auto isFMulNegTwo = [](SDValue FMul) {
10899 : if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
10900 : return false;
10901 : auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
10902 : return C && C->isExactlyValue(-2.0);
10903 : };
10904 :
10905 : // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
10906 43126 : if (isFMulNegTwo(N0)) {
10907 15 : SDValue B = N0.getOperand(0);
10908 15 : SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
10909 15 : return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
10910 : }
10911 : // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
10912 43111 : if (isFMulNegTwo(N1)) {
10913 15 : SDValue B = N1.getOperand(0);
10914 15 : SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
10915 15 : return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
10916 : }
10917 :
10918 : // No FP constant should be created after legalization as Instruction
10919 : // Selection pass has a hard time dealing with FP constants.
10920 43096 : bool AllowNewConst = (Level < AfterLegalizeDAG);
10921 :
10922 : // If 'unsafe math' or nnan is enabled, fold lots of things.
10923 43096 : if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
10924 : // If allowed, fold (fadd (fneg x), x) -> 0.0
10925 2714 : if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
10926 0 : return DAG.getConstantFP(0.0, DL, VT);
10927 :
10928 : // If allowed, fold (fadd x, (fneg x)) -> 0.0
10929 2714 : if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
10930 0 : return DAG.getConstantFP(0.0, DL, VT);
10931 : }
10932 :
10933 : // If 'unsafe math' or reassoc and nsz, fold lots of things.
10934 : // TODO: break out portions of the transformations below for which Unsafe is
10935 : // considered and which do not require both nsz and reassoc
10936 40605 : if ((Options.UnsafeFPMath ||
10937 44423 : (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
10938 : AllowNewConst) {
10939 : // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
10940 2476 : if (N1CFP && N0.getOpcode() == ISD::FADD &&
10941 7 : isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10942 14 : SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
10943 14 : return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
10944 : }
10945 :
10946 : // We can fold chains of FADD's of the same value into multiplications.
10947 : // This transform is not safe in general because we are reducing the number
10948 : // of rounding steps.
10949 4628 : if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
10950 2031 : if (N0.getOpcode() == ISD::FMUL) {
10951 405 : bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10952 405 : bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10953 :
10954 : // (fadd (fmul x, c), x) -> (fmul x, c+1)
10955 405 : if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10956 7 : SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10957 7 : DAG.getConstantFP(1.0, DL, VT), Flags);
10958 7 : return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10959 : }
10960 :
10961 : // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10962 52 : if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10963 402 : N1.getOperand(0) == N1.getOperand(1) &&
10964 4 : N0.getOperand(0) == N1.getOperand(0)) {
10965 4 : SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10966 4 : DAG.getConstantFP(2.0, DL, VT), Flags);
10967 8 : return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10968 : }
10969 : }
10970 :
10971 2020 : if (N1.getOpcode() == ISD::FMUL) {
10972 541 : bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10973 541 : bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10974 :
10975 : // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10976 541 : if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10977 4 : SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10978 4 : DAG.getConstantFP(1.0, DL, VT), Flags);
10979 4 : return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10980 : }
10981 :
10982 : // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10983 15 : if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10984 541 : N0.getOperand(0) == N0.getOperand(1) &&
10985 4 : N1.getOperand(0) == N0.getOperand(0)) {
10986 4 : SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10987 4 : DAG.getConstantFP(2.0, DL, VT), Flags);
10988 8 : return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10989 : }
10990 : }
10991 :
10992 2012 : if (N0.getOpcode() == ISD::FADD) {
10993 512 : bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10994 : // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10995 512 : if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10996 : (N0.getOperand(0) == N1)) {
10997 5 : return DAG.getNode(ISD::FMUL, DL, VT,
10998 5 : N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10999 : }
11000 : }
11001 :
11002 2007 : if (N1.getOpcode() == ISD::FADD) {
11003 260 : bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11004 : // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11005 260 : if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11006 : N1.getOperand(0) == N0) {
11007 4 : return DAG.getNode(ISD::FMUL, DL, VT,
11008 4 : N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11009 : }
11010 : }
11011 :
11012 : // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11013 507 : if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11014 58 : N0.getOperand(0) == N0.getOperand(1) &&
11015 2006 : N1.getOperand(0) == N1.getOperand(1) &&
11016 : N0.getOperand(0) == N1.getOperand(0)) {
11017 3 : return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11018 3 : DAG.getConstantFP(4.0, DL, VT), Flags);
11019 : }
11020 : }
11021 : } // enable-unsafe-fp-math
11022 :
11023 : // FADD -> FMA combines:
11024 43058 : if (SDValue Fused = visitFADDForFMACombine(N)) {
11025 2326 : AddToWorklist(Fused.getNode());
11026 2326 : return Fused;
11027 : }
11028 40732 : return SDValue();
11029 : }
11030 :
11031 12783 : SDValue DAGCombiner::visitFSUB(SDNode *N) {
11032 12783 : SDValue N0 = N->getOperand(0);
11033 12783 : SDValue N1 = N->getOperand(1);
11034 12783 : ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11035 12783 : ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11036 25566 : EVT VT = N->getValueType(0);
11037 : SDLoc DL(N);
11038 12783 : const TargetOptions &Options = DAG.getTarget().Options;
11039 12783 : const SDNodeFlags Flags = N->getFlags();
11040 :
11041 : // fold vector ops
11042 12783 : if (VT.isVector())
11043 3276 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
11044 1 : return FoldedVOp;
11045 :
11046 : // fold (fsub c1, c2) -> c1-c2
11047 12782 : if (N0CFP && N1CFP)
11048 0 : return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11049 :
11050 12782 : if (SDValue NewSel = foldBinOpIntoSelect(N))
11051 10 : return NewSel;
11052 :
11053 : // (fsub A, 0) -> A
11054 13102 : if (N1CFP && N1CFP->isZero()) {
11055 29 : if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11056 : Flags.hasNoSignedZeros()) {
11057 28 : return N0;
11058 : }
11059 : }
11060 :
11061 : if (N0 == N1) {
11062 : // (fsub x, x) -> 0.0
11063 157 : if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11064 9 : return DAG.getConstantFP(0.0f, DL, VT);
11065 : }
11066 :
11067 : // (fsub -0.0, N1) -> -N1
11068 13992 : if (N0CFP && N0CFP->isZero()) {
11069 402 : if (N0CFP->isNegative() ||
11070 394 : (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11071 48 : if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11072 3 : return GetNegatedExpression(N1, DAG, LegalOperations);
11073 45 : if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11074 41 : return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11075 : }
11076 : }
11077 :
11078 11520 : if ((Options.UnsafeFPMath ||
11079 51 : (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11080 12742 : && N1.getOpcode() == ISD::FADD) {
11081 : // X - (X + Y) -> -Y
11082 20 : if (N0 == N1->getOperand(0))
11083 12 : return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11084 : // X - (Y + X) -> -Y
11085 14 : if (N0 == N1->getOperand(1))
11086 12 : return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11087 : }
11088 :
11089 : // fold (fsub A, (fneg B)) -> (fadd A, B)
11090 12679 : if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11091 199 : return DAG.getNode(ISD::FADD, DL, VT, N0,
11092 199 : GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11093 :
11094 : // FSUB -> FMA combines:
11095 12480 : if (SDValue Fused = visitFSUBForFMACombine(N)) {
11096 965 : AddToWorklist(Fused.getNode());
11097 965 : return Fused;
11098 : }
11099 :
11100 11515 : return SDValue();
11101 : }
11102 :
11103 27386 : SDValue DAGCombiner::visitFMUL(SDNode *N) {
11104 27386 : SDValue N0 = N->getOperand(0);
11105 27386 : SDValue N1 = N->getOperand(1);
11106 27386 : ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11107 27386 : ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11108 54772 : EVT VT = N->getValueType(0);
11109 : SDLoc DL(N);
11110 27386 : const TargetOptions &Options = DAG.getTarget().Options;
11111 27386 : const SDNodeFlags Flags = N->getFlags();
11112 :
11113 : // fold vector ops
11114 27386 : if (VT.isVector()) {
11115 : // This just handles C1 * C2 for vectors. Other vector folds are below.
11116 8696 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
11117 3 : return FoldedVOp;
11118 : }
11119 :
11120 : // fold (fmul c1, c2) -> c1*c2
11121 27383 : if (N0CFP && N1CFP)
11122 0 : return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11123 :
11124 : // canonicalize constant to RHS
11125 : if (isConstantFPBuildVectorOrConstantFP(N0) &&
11126 : !isConstantFPBuildVectorOrConstantFP(N1))
11127 309 : return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11128 :
11129 : // fold (fmul A, 1.0) -> A
11130 27074 : if (N1CFP && N1CFP->isExactlyValue(1.0))
11131 338 : return N0;
11132 :
11133 26736 : if (SDValue NewSel = foldBinOpIntoSelect(N))
11134 3 : return NewSel;
11135 :
11136 22881 : if (Options.UnsafeFPMath ||
11137 29136 : (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11138 : // fold (fmul A, 0) -> 0
11139 7192 : if (N1CFP && N1CFP->isZero())
11140 6 : return N1;
11141 : }
11142 :
11143 26727 : if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11144 : // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11145 1369 : if (isConstantFPBuildVectorOrConstantFP(N1) &&
11146 : N0.getOpcode() == ISD::FMUL) {
11147 251 : SDValue N00 = N0.getOperand(0);
11148 251 : SDValue N01 = N0.getOperand(1);
11149 : // Avoid an infinite loop by making sure that N00 is not a constant
11150 : // (the inner multiply has not been constant folded yet).
11151 : if (isConstantFPBuildVectorOrConstantFP(N01) &&
11152 : !isConstantFPBuildVectorOrConstantFP(N00)) {
11153 177 : SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11154 177 : return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11155 : }
11156 : }
11157 :
11158 : // Match a special-case: we convert X * 2.0 into fadd.
11159 : // fmul (fadd X, X), C -> fmul X, 2.0 * C
11160 6401 : if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11161 207 : N0.getOperand(0) == N0.getOperand(1)) {
11162 25 : const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11163 25 : SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11164 50 : return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11165 : }
11166 : }
11167 :
11168 : // fold (fmul X, 2.0) -> (fadd X, X)
11169 26525 : if (N1CFP && N1CFP->isExactlyValue(+2.0))
11170 239 : return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11171 :
11172 : // fold (fmul X, -1.0) -> (fneg X)
11173 26286 : if (N1CFP && N1CFP->isExactlyValue(-1.0))
11174 57 : if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11175 114 : return DAG.getNode(ISD::FNEG, DL, VT, N0);
11176 :
11177 : // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11178 26229 : if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11179 1197 : if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11180 : // Both can be negated for free, check to see if at least one is cheaper
11181 : // negated.
11182 207 : if (LHSNeg == 2 || RHSNeg == 2)
11183 34 : return DAG.getNode(ISD::FMUL, DL, VT,
11184 34 : GetNegatedExpression(N0, DAG, LegalOperations),
11185 34 : GetNegatedExpression(N1, DAG, LegalOperations),
11186 34 : Flags);
11187 : }
11188 : }
11189 :
11190 : // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11191 : // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11192 4373 : if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11193 30438 : (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11194 2 : TLI.isOperationLegal(ISD::FABS, VT)) {
11195 2 : SDValue Select = N0, X = N1;
11196 2 : if (Select.getOpcode() != ISD::SELECT)
11197 : std::swap(Select, X);
11198 :
11199 2 : SDValue Cond = Select.getOperand(0);
11200 : auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11201 : auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11202 :
11203 2 : if (TrueOpnd && FalseOpnd &&
11204 2 : Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11205 4 : isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11206 2 : cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11207 2 : ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11208 : switch (CC) {
11209 : default: break;
11210 : case ISD::SETOLT:
11211 : case ISD::SETULT:
11212 : case ISD::SETOLE:
11213 : case ISD::SETULE:
11214 : case ISD::SETLT:
11215 : case ISD::SETLE:
11216 : std::swap(TrueOpnd, FalseOpnd);
11217 : LLVM_FALLTHROUGH;
11218 2 : case ISD::SETOGT:
11219 : case ISD::SETUGT:
11220 : case ISD::SETOGE:
11221 : case ISD::SETUGE:
11222 : case ISD::SETGT:
11223 : case ISD::SETGE:
11224 2 : if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11225 1 : TLI.isOperationLegal(ISD::FNEG, VT))
11226 1 : return DAG.getNode(ISD::FNEG, DL, VT,
11227 1 : DAG.getNode(ISD::FABS, DL, VT, X));
11228 1 : if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11229 2 : return DAG.getNode(ISD::FABS, DL, VT, X);
11230 :
11231 : break;
11232 : }
11233 : }
11234 : }
11235 :
11236 : // FMUL -> FMA combines:
11237 26193 : if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11238 166 : AddToWorklist(Fused.getNode());
11239 166 : return Fused;
11240 : }
11241 :
11242 26027 : return SDValue();
11243 : }
11244 :
11245 9771 : SDValue DAGCombiner::visitFMA(SDNode *N) {
11246 9771 : SDValue N0 = N->getOperand(0);
11247 9771 : SDValue N1 = N->getOperand(1);
11248 9771 : SDValue N2 = N->getOperand(2);
11249 : ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11250 : ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11251 19542 : EVT VT = N->getValueType(0);
11252 : SDLoc DL(N);
11253 9771 : const TargetOptions &Options = DAG.getTarget().Options;
11254 :
11255 : // FMA nodes have flags that propagate to the created nodes.
11256 9771 : const SDNodeFlags Flags = N->getFlags();
11257 9771 : bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11258 :
11259 : // Constant fold FMA.
11260 : if (isa<ConstantFPSDNode>(N0) &&
11261 : isa<ConstantFPSDNode>(N1) &&
11262 : isa<ConstantFPSDNode>(N2)) {
11263 0 : return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11264 : }
11265 :
11266 9771 : if (UnsafeFPMath) {
11267 2366 : if (N0CFP && N0CFP->isZero())
11268 0 : return N2;
11269 2417 : if (N1CFP && N1CFP->isZero())
11270 0 : return N2;
11271 : }
11272 : // TODO: The FMA node should have flags that propagate to these nodes.
11273 9771 : if (N0CFP && N0CFP->isExactlyValue(1.0))
11274 0 : return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11275 9771 : if (N1CFP && N1CFP->isExactlyValue(1.0))
11276 10 : return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11277 :
11278 : // Canonicalize (fma c, x, y) -> (fma x, c, y)
11279 : if (isConstantFPBuildVectorOrConstantFP(N0) &&
11280 : !isConstantFPBuildVectorOrConstantFP(N1))
11281 132 : return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11282 :
11283 9700 : if (UnsafeFPMath) {
11284 : // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11285 110 : if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11286 2350 : isConstantFPBuildVectorOrConstantFP(N1) &&
11287 17 : isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11288 17 : return DAG.getNode(ISD::FMUL, DL, VT, N0,
11289 : DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11290 17 : Flags), Flags);
11291 : }
11292 :
11293 : // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11294 : if (N0.getOpcode() == ISD::FMUL &&
11295 2333 : isConstantFPBuildVectorOrConstantFP(N1) &&
11296 17 : isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11297 17 : return DAG.getNode(ISD::FMA, DL, VT,
11298 : N0.getOperand(0),
11299 : DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11300 : Flags),
11301 17 : N2);
11302 : }
11303 : }
11304 :
11305 : // (fma x, 1, y) -> (fadd x, y)
11306 : // (fma x, -1, y) -> (fadd (fneg x), y)
11307 9666 : if (N1CFP) {
11308 340 : if (N1CFP->isExactlyValue(1.0))
11309 : // TODO: The FMA node should have flags that propagate to this node.
11310 0 : return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11311 :
11312 340 : if (N1CFP->isExactlyValue(-1.0) &&
11313 1 : (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11314 2 : SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11315 1 : AddToWorklist(RHSNeg.getNode());
11316 : // TODO: The FMA node should have flags that propagate to this node.
11317 2 : return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11318 : }
11319 :
11320 : // fma (fneg x), K, y -> fma x -K, y
11321 339 : if (N0.getOpcode() == ISD::FNEG &&
11322 66 : (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11323 0 : (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11324 18 : return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11325 18 : DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11326 : }
11327 : }
11328 :
11329 9647 : if (UnsafeFPMath) {
11330 : // (fma x, c, x) -> (fmul x, (c+1))
11331 2313 : if (N1CFP && N0 == N2) {
11332 1 : return DAG.getNode(ISD::FMUL, DL, VT, N0,
11333 : DAG.getNode(ISD::FADD, DL, VT, N1,
11334 : DAG.getConstantFP(1.0, DL, VT), Flags),
11335 1 : Flags);
11336 : }
11337 :
11338 : // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11339 2312 : if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11340 1 : return DAG.getNode(ISD::FMUL, DL, VT, N0,
11341 : DAG.getNode(ISD::FADD, DL, VT, N1,
11342 : DAG.getConstantFP(-1.0, DL, VT), Flags),
11343 1 : Flags);
11344 : }
11345 : }
11346 :
11347 9645 : return SDValue();
11348 : }
11349 :
11350 : // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11351 : // reciprocal.
11352 : // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11353 : // Notice that this is not always beneficial. One reason is different targets
11354 : // may have different costs for FDIV and FMUL, so sometimes the cost of two
11355 : // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11356 : // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11357 9461 : SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11358 9461 : bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11359 9461 : const SDNodeFlags Flags = N->getFlags();
11360 9461 : if (!UnsafeMath && !Flags.hasAllowReciprocal())
11361 8954 : return SDValue();
11362 :
11363 : // Skip if current node is a reciprocal.
11364 507 : SDValue N0 = N->getOperand(0);
11365 : ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11366 87 : if (N0CFP && N0CFP->isExactlyValue(1.0))
11367 84 : return SDValue();
11368 :
11369 : // Exit early if the target does not want this transform or if there can't
11370 : // possibly be enough uses of the divisor to make the transform worthwhile.
11371 423 : SDValue N1 = N->getOperand(1);
11372 423 : unsigned MinUses = TLI.combineRepeatedFPDivisors();
11373 794 : if (!MinUses || N1->use_size() < MinUses)
11374 396 : return SDValue();
11375 :
11376 : // Find all FDIV users of the same divisor.
11377 : // Use a set because duplicates may be present in the user list.
11378 27 : SetVector<SDNode *> Users;
11379 86 : for (auto *U : N1->uses()) {
11380 59 : if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11381 : // This division is eligible for optimization only if global unsafe math
11382 : // is enabled or if this division allows reciprocal formation.
11383 57 : if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11384 53 : Users.insert(U);
11385 : }
11386 : }
11387 :
11388 : // Now that we have the actual number of divisor uses, make sure it meets
11389 : // the minimum threshold specified by the target.
11390 27 : if (Users.size() < MinUses)
11391 7 : return SDValue();
11392 :
11393 40 : EVT VT = N->getValueType(0);
11394 : SDLoc DL(N);
11395 20 : SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11396 20 : SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11397 :
11398 : // Dividend / Divisor -> Dividend * Reciprocal
11399 65 : for (auto *U : Users) {
11400 45 : SDValue Dividend = U->getOperand(0);
11401 : if (Dividend != FPOne) {
11402 88 : SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11403 44 : Reciprocal, Flags);
11404 44 : CombineTo(U, NewNode);
11405 1 : } else if (U != Reciprocal.getNode()) {
11406 : // In the absence of fast-math-flags, this user node is always the
11407 : // same node as Reciprocal, but with FMF they may be different nodes.
11408 0 : CombineTo(U, Reciprocal);
11409 : }
11410 : }
11411 20 : return SDValue(N, 0); // N was replaced.
11412 : }
11413 :
11414 10233 : SDValue DAGCombiner::visitFDIV(SDNode *N) {
11415 10233 : SDValue N0 = N->getOperand(0);
11416 10233 : SDValue N1 = N->getOperand(1);
11417 : ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11418 : ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11419 20466 : EVT VT = N->getValueType(0);
11420 : SDLoc DL(N);
11421 10233 : const TargetOptions &Options = DAG.getTarget().Options;
11422 10233 : SDNodeFlags Flags = N->getFlags();
11423 :
11424 : // fold vector ops
11425 10233 : if (VT.isVector())
11426 1592 : if (SDValue FoldedVOp = SimplifyVBinOp(N))
11427 0 : return FoldedVOp;
11428 :
11429 : // fold (fdiv c1, c2) -> c1/c2
11430 10233 : if (N0CFP && N1CFP)
11431 6 : return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11432 :
11433 10230 : if (SDValue NewSel = foldBinOpIntoSelect(N))
11434 8 : return NewSel;
11435 :
11436 10222 : if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11437 : // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11438 1251 : if (N1CFP) {
11439 : // Compute the reciprocal 1.0 / c2.
11440 47 : const APFloat &N1APF = N1CFP->getValueAPF();
11441 47 : APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11442 47 : APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11443 : // Only do the transform if the reciprocal is a legal fp immediate that
11444 : // isn't too nasty (eg NaN, denormal, ...).
11445 47 : if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11446 43 : (!LegalOperations ||
11447 : // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11448 : // backend)... we should handle this gracefully after Legalize.
11449 : // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11450 0 : TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11451 0 : TLI.isFPImmLegal(Recip, VT)))
11452 43 : return DAG.getNode(ISD::FMUL, DL, VT, N0,
11453 43 : DAG.getConstantFP(Recip, DL, VT), Flags);
11454 : }
11455 :
11456 : // If this FDIV is part of a reciprocal square root, it may be folded
11457 : // into a target-specific square root estimate instruction.
11458 1208 : if (N1.getOpcode() == ISD::FSQRT) {
11459 124 : if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11460 55 : return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11461 : }
11462 1084 : } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11463 2 : N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11464 2 : if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11465 2 : Flags)) {
11466 2 : RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11467 2 : AddToWorklist(RV.getNode());
11468 2 : return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11469 : }
11470 1082 : } else if (N1.getOpcode() == ISD::FP_ROUND &&
11471 2 : N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11472 2 : if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11473 2 : Flags)) {
11474 2 : RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11475 2 : AddToWorklist(RV.getNode());
11476 2 : return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11477 : }
11478 1080 : } else if (N1.getOpcode() == ISD::FMUL) {
11479 : // Look through an FMUL. Even though this won't remove the FDIV directly,
11480 : // it's still worthwhile to get rid of the FSQRT if possible.
11481 : SDValue SqrtOp;
11482 2 : SDValue OtherOp;
11483 4 : if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11484 : SqrtOp = N1.getOperand(0);
11485 2 : OtherOp = N1.getOperand(1);
11486 0 : } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11487 : SqrtOp = N1.getOperand(1);
11488 0 : OtherOp = N1.getOperand(0);
11489 : }
11490 2 : if (SqrtOp.getNode()) {
11491 : // We found a FSQRT, so try to make this fold:
11492 : // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11493 2 : if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11494 2 : RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11495 2 : AddToWorklist(RV.getNode());
11496 2 : return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11497 : }
11498 : }
11499 : }
11500 :
11501 : // Fold into a reciprocal estimate and multiply instead of a real divide.
11502 1147 : if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11503 640 : AddToWorklist(RV.getNode());
11504 640 : return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11505 : }
11506 : }
11507 :
11508 : // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11509 9478 : if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11510 280 : if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11511 : // Both can be negated for free, check to see if at least one is cheaper
11512 : // negated.
11513 21 : if (LHSNeg == 2 || RHSNeg == 2)
11514 17 : return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11515 17 : GetNegatedExpression(N0, DAG, LegalOperations),
11516 17 : GetNegatedExpression(N1, DAG, LegalOperations),
11517 34 : Flags);
11518 : }
11519 : }
11520 :
11521 9461 : if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11522 20 : return CombineRepeatedDivisors;
11523 :
11524 9441 : return SDValue();
11525 : }
11526 :
11527 283 : SDValue DAGCombiner::visitFREM(SDNode *N) {
11528 283 : SDValue N0 = N->getOperand(0);
11529 283 : SDValue N1 = N->getOperand(1);
11530 : ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11531 : ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11532 283 : EVT VT = N->getValueType(0);
11533 :
11534 : // fold (frem c1, c2) -> fmod(c1,c2)
11535 283 : if (N0CFP && N1CFP)
11536 27 : return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11537 :
11538 274 : if (SDValue NewSel = foldBinOpIntoSelect(N))
11539 7 : return NewSel;
11540 :
11541 267 : return SDValue();
11542 : }
11543 :
11544 2134 : SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11545 2134 : SDNodeFlags Flags = N->getFlags();
11546 2134 : if (!DAG.getTarget().Options.UnsafeFPMath &&
11547 : !Flags.hasApproximateFuncs())
11548 1849 : return SDValue();
11549 :
11550 285 : SDValue N0 = N->getOperand(0);
11551 285 : if (TLI.isFsqrtCheap(N0, DAG))
11552 47 : return SDValue();
11553 :
11554 : // FSQRT nodes have flags that propagate to the created nodes.
11555 238 : return buildSqrtEstimate(N0, Flags);
11556 : }
11557 :
11558 : /// copysign(x, fp_extend(y)) -> copysign(x, y)
11559 : /// copysign(x, fp_round(y)) -> copysign(x, y)
11560 1429 : static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11561 1429 : SDValue N1 = N->getOperand(1);
11562 1429 : if ((N1.getOpcode() == ISD::FP_EXTEND ||
11563 : N1.getOpcode() == ISD::FP_ROUND)) {
11564 : // Do not optimize out type conversion of f128 type yet.
11565 : // For some targets like x86_64, configuration is changed to keep one f128
11566 : // value in one SSE register, but instruction selection cannot handle
11567 : // FCOPYSIGN on SSE registers yet.
11568 91 : EVT N1VT = N1->getValueType(0);
11569 91 : EVT N1Op0VT = N1->getOperand(0).getValueType();
11570 0 : return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11571 : }
11572 : return false;
11573 : }
11574 :
11575 1440 : SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11576 1440 : SDValue N0 = N->getOperand(0);
11577 1440 : SDValue N1 = N->getOperand(1);
11578 : ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11579 : ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11580 1440 : EVT VT = N->getValueType(0);
11581 :
11582 1440 : if (N0CFP && N1CFP) // Constant fold
11583 0 : return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11584 :
11585 1440 : if (N1CFP) {
11586 1 : const APFloat &V = N1CFP->getValueAPF();
11587 : // copysign(x, c1) -> fabs(x) iff ispos(c1)
11588 : // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11589 1 : if (!V.isNegative()) {
11590 1 : if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11591 2 : return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11592 : } else {
11593 0 : if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11594 0 : return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11595 0 : DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11596 : }
11597 : }
11598 :
11599 : // copysign(fabs(x), y) -> copysign(x, y)
11600 : // copysign(fneg(x), y) -> copysign(x, y)
11601 : // copysign(copysign(x,z), y) -> copysign(x, y)
11602 1439 : if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11603 : N0.getOpcode() == ISD::FCOPYSIGN)
11604 12 : return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11605 :
11606 : // copysign(x, abs(y)) -> abs(x)
11607 1433 : if (N1.getOpcode() == ISD::FABS)
11608 4 : return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11609 :
11610 : // copysign(x, copysign(y,z)) -> copysign(x, z)
11611 1431 : if (N1.getOpcode() == ISD::FCOPYSIGN)
11612 4 : return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11613 :
11614 : // copysign(x, fp_extend(y)) -> copysign(x, y)
11615 : // copysign(x, fp_round(y)) -> copysign(x, y)
11616 1429 : if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11617 158 : return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11618 :
11619 1350 : return SDValue();
11620 : }
11621 :
11622 0 : SDValue DAGCombiner::visitFPOW(SDNode *N) {
11623 0 : ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
11624 0 : if (!ExponentC)
11625 0 : return SDValue();
11626 :
11627 : // Try to convert x ** (1/3) into cube root.
11628 : // TODO: Handle the various flavors of long double.
11629 : // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
11630 : // Some range near 1/3 should be fine.
11631 0 : EVT VT = N->getValueType(0);
11632 0 : if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
11633 0 : (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
11634 : // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
11635 : // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
11636 : // pow(-val, 1/3) = nan; cbrt(-val) = -num.
11637 : // For regular numbers, rounding may cause the results to differ.
11638 : // Therefore, we require { nsz ninf nnan afn } for this transform.
11639 : // TODO: We could select out the special cases if we don't have nsz/ninf.
11640 0 : SDNodeFlags Flags = N->getFlags();
11641 0 : if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
11642 : !Flags.hasApproximateFuncs())
11643 0 : return SDValue();
11644 :
11645 : // Do not create a cbrt() libcall if the target does not have it, and do not
11646 : // turn a pow that has lowering support into a cbrt() libcall.
11647 0 : if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
11648 0 : (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
11649 : DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
11650 0 : return SDValue();
11651 :
11652 0 : return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
11653 : }
11654 :
11655 : // Try to convert x ** (1/4) into square roots.
11656 : // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
11657 : // TODO: This could be extended (using a target hook) to handle smaller
11658 : // power-of-2 fractional exponents.
11659 0 : if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
11660 : // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
11661 : // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
11662 : // For regular numbers, rounding may cause the results to differ.
11663 : // Therefore, we require { nsz ninf afn } for this transform.
11664 : // TODO: We could select out the special cases if we don't have nsz/ninf.
11665 0 : SDNodeFlags Flags = N->getFlags();
11666 0 : if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
11667 : !Flags.hasApproximateFuncs())
11668 0 : return SDValue();
11669 :
11670 : // Don't double the number of libcalls. We are trying to inline fast code.
11671 0 : if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
11672 0 : return SDValue();
11673 :
11674 : // Assume that libcalls are the smallest code.
11675 : // TODO: This restriction should probably be lifted for vectors.
11676 0 : if (DAG.getMachineFunction().getFunction().optForSize())
11677 0 : return SDValue();
11678 :
11679 : // pow(X, 0.25) --> sqrt(sqrt(X))
11680 : SDLoc DL(N);
11681 0 : SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
11682 0 : return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
11683 : }
11684 :
11685 0 : return SDValue();
11686 : }
11687 :
11688 34408 : static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
11689 : const TargetLowering &TLI) {
11690 : // This optimization is guarded by a function attribute because it may produce
11691 : // unexpected results. Ie, programs may be relying on the platform-specific
11692 : // undefined behavior when the float-to-int conversion overflows.
11693 34408 : const Function &F = DAG.getMachineFunction().getFunction();
11694 34408 : Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11695 68801 : if (StrictOverflow.getValueAsString().equals("false"))
11696 15 : return SDValue();
11697 :
11698 : // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11699 : // replacing casts with a libcall. We also must be allowed to ignore -0.0
11700 : // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11701 : // conversions would return +0.0.
11702 : // FIXME: We should be able to use node-level FMF here.
11703 : // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11704 68786 : EVT VT = N->getValueType(0);
11705 11533 : if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11706 11533 : !DAG.getTarget().Options.NoSignedZerosFPMath)
11707 34262 : return SDValue();
11708 :
11709 : // fptosi/fptoui round towards zero, so converting from FP to integer and
11710 : // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11711 131 : SDValue N0 = N->getOperand(0);
11712 262 : if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11713 20 : N0.getOperand(0).getValueType() == VT)
11714 40 : return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11715 :
11716 111 : if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11717 19 : N0.getOperand(0).getValueType() == VT)
11718 38 : return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11719 :
11720 92 : return SDValue();
11721 : }
11722 :
11723 21933 : SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11724 21933 : SDValue N0 = N->getOperand(0);
11725 43866 : EVT VT = N->getValueType(0);
11726 21933 : EVT OpVT = N0.getValueType();
11727 :
11728 : // fold (sint_to_fp c1) -> c1fp
11729 21933 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11730 : // ...but only if the target supports immediate floating-point values
11731 5 : (!LegalOperations ||
11732 1 : TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11733 8 : return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11734 :
11735 : // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11736 : // but UINT_TO_FP is legal on this target, try to convert.
11737 36135 : if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
11738 14206 : hasOperation(ISD::UINT_TO_FP, OpVT)) {
11739 : // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11740 0 : if (DAG.SignBitIsZero(N0))
11741 0 : return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11742 : }
11743 :
11744 : // The next optimizations are desirable only if SELECT_CC can be lowered.
11745 42846 : if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11746 : // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11747 4 : if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11748 7486 : !VT.isVector() &&
11749 4 : (!LegalOperations ||
11750 0 : TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11751 : SDLoc DL(N);
11752 : SDValue Ops[] =
11753 : { N0.getOperand(0), N0.getOperand(1),
11754 4 : DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11755 4 : N0.getOperand(2) };
11756 8 : return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11757 : }
11758 :
11759 : // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11760 : // (select_cc x, y, 1.0, 0.0,, cc)
11761 654 : if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11762 7597 : N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11763 26 : (!LegalOperations ||
11764 0 : TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11765 : SDLoc DL(N);
11766 : SDValue Ops[] =
11767 26 : { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11768 26 : DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11769 78 : N0.getOperand(0).getOperand(2) };
11770 52 : return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11771 : }
11772 : }
11773 :
11774 21899 : if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11775 20 : return FTrunc;
11776 :
11777 21879 : return SDValue();
11778 : }
11779 :
11780 12560 : SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11781 12560 : SDValue N0 = N->getOperand(0);
11782 25120 : EVT VT = N->getValueType(0);
11783 12560 : EVT OpVT = N0.getValueType();
11784 :
11785 : // fold (uint_to_fp c1) -> c1fp
11786 12560 : if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11787 : // ...but only if the target supports immediate floating-point values
11788 0 : (!LegalOperations ||
11789 0 : TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11790 0 : return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11791 :
11792 : // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11793 : // but SINT_TO_FP is legal on this target, try to convert.
11794 16427 : if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
11795 3867 : hasOperation(ISD::SINT_TO_FP, OpVT)) {
11796 : // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11797 1292 : if (DAG.SignBitIsZero(N0))
11798 40 : return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11799 : }
11800 :
11801 : // The next optimizations are desirable only if SELECT_CC can be lowered.
11802 24208 : if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11803 : // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11804 10709 : if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11805 31 : (!LegalOperations ||
11806 0 : TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11807 : SDLoc DL(N);
11808 : SDValue Ops[] =
11809 : { N0.getOperand(0), N0.getOperand(1),
11810 31 : DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11811 31 : N0.getOperand(2) };
11812 62 : return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11813 : }
11814 : }
11815 :
11816 12509 : if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11817 19 : return FTrunc;
11818 :
11819 12490 : return SDValue();
11820 : }
11821 :
11822 : // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
11823 15241 : static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
11824 15241 : SDValue N0 = N->getOperand(0);
11825 30482 : EVT VT = N->getValueType(0);
11826 :
11827 15241 : if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
11828 15206 : return SDValue();
11829 :
11830 35 : SDValue Src = N0.getOperand(0);
11831 35 : EVT SrcVT = Src.getValueType();
11832 35 : bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
11833 35 : bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
11834 :
11835 : // We can safely assume the conversion won't overflow the output range,
11836 : // because (for example) (uint8_t)18293.f is undefined behavior.
11837 :
11838 : // Since we can assume the conversion won't overflow, our decision as to
11839 : // whether the input will fit in the float should depend on the minimum
11840 : // of the input range and output range.
11841 :
11842 : // This means this is also safe for a signed input and unsigned output, since
11843 : // a negative input would lead to undefined behavior.
11844 35 : unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
11845 35 : unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
11846 35 : unsigned ActualSize = std::min(InputSize, OutputSize);
11847 35 : const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
11848 :
11849 : // We can only fold away the float conversion if the input range can be
11850 : // represented exactly in the float range.
11851 35 : if (APFloat::semanticsPrecision(sem) >= ActualSize) {
11852 5 : if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
11853 3 : unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
11854 : : ISD::ZERO_EXTEND;
11855 6 : return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
11856 : }
11857 2 : if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
11858 2 : return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
11859 1 : return DAG.getBitcast(VT, Src);
11860 : }
11861 30 : return SDValue();
11862 : }
11863 :
11864 0 : SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
11865 0 : SDValue N0 = N->getOperand(0);
11866 0 : EVT VT = N->getValueType(0);
11867 :
11868 : // fold (fp_to_sint c1fp) -> c1
11869 : if (isConstantFPBuildVectorOrConstantFP(N0))
11870 0 : return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
11871 :
11872 0 : return FoldIntToFPToInt(N, DAG);
11873 : }
11874 :
11875 0 : SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
11876 0 : SDValue N0 = N->getOperand(0);
11877 0 : EVT VT = N->getValueType(0);
11878 :
11879 : // fold (fp_to_uint c1fp) -> c1
11880 : if (isConstantFPBuildVectorOrConstantFP(N0))
11881 0 : return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
11882 :
11883 0 : return FoldIntToFPToInt(N, DAG);
11884 : }
11885 :
11886 4291 : SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
11887 4291 : SDValue N0 = N->getOperand(0);
11888 4291 : SDValue N1 = N->getOperand(1);
11889 : ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11890 4291 : EVT VT = N->getValueType(0);
11891 :
11892 : // fold (fp_round c1fp) -> c1fp
11893 4291 : if (N0CFP)
11894 4 : return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
11895 :
11896 : // fold (fp_round (fp_extend x)) -> x
11897 4289 : if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
11898 6 : return N0.getOperand(0);
11899 :
11900 : // fold (fp_round (fp_round x)) -> (fp_round x)
11901 4283 : if (N0.getOpcode() == ISD::FP_ROUND) {
11902 19 : const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
11903 19 : const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
11904 :
11905 : // Skip this folding if it results in an fp_round from f80 to f16.
11906 : //
11907 : // f80 to f16 always generates an expensive (and as yet, unimplemented)
11908 : // libcall to __truncxfhf2 instead of selecting native f16 conversion
11909 : // instructions from f32 or f64. Moreover, the first (value-preserving)
11910 : // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
11911 : // x86.
11912 19 : if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
11913 1 : return SDValue();
11914 :
11915 : // If the first fp_round isn't a value preserving truncation, it might
11916 : // introduce a tie in the second fp_round, that wouldn't occur in the
11917 : // single-step fp_round we want to fold to.
11918 : // In other words, double rounding isn't the same as rounding.
11919 : // Also, this is a value preserving truncation iff both fp_round's are.
11920 18 : if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
11921 : SDLoc DL(N);
11922 7 : return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
11923 7 : DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
11924 : }
11925 : }
11926 :
11927 : // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
11928 4275 : if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
11929 8 : SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
11930 8 : N0.getOperand(0), N1);
11931 8 : AddToWorklist(Tmp.getNode());
11932 8 : return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
11933 16 : Tmp, N0.getOperand(1));
11934 : }
11935 :
11936 4267 : if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11937 4 : return NewVSel;
11938 :
11939 4263 : return SDValue();
11940 : }
11941 :
11942 0 : SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
11943 0 : SDValue N0 = N->getOperand(0);
11944 0 : EVT VT = N->getValueType(0);
11945 0 : EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11946 : ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11947 :
11948 : // fold (fp_round_inreg c1fp) -> c1fp
11949 0 : if (N0CFP && isTypeLegal(EVT)) {
11950 : SDLoc DL(N);
11951 0 : SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
11952 0 : return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
11953 : }
11954 :
11955 0 : return SDValue();
11956 : }
11957 :
11958 9165 : SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
11959 9165 : SDValue N0 = N->getOperand(0);
11960 18330 : EVT VT = N->getValueType(0);
11961 :
11962 : // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
11963 8648 : if (N->hasOneUse() &&
11964 8648 : N->use_begin()->getOpcode() == ISD::FP_ROUND)
11965 0 : return SDValue();
11966 :
11967 : // fold (fp_extend c1fp) -> c1fp
11968 : if (isConstantFPBuildVectorOrConstantFP(N0))
11969 20 : return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
11970 :
11971 : // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
11972 9995 : if (N0.getOpcode() == ISD::FP16_TO_FP &&
11973 840 : TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
11974 10 : return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
11975 :
11976 : // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
11977 : // value of X.
11978 : if (N0.getOpcode() == ISD::FP_ROUND
11979 9386 : && N0.getConstantOperandVal(1) == 1) {
11980 133 : SDValue In = N0.getOperand(0);
11981 261 : if (In.getValueType() == VT) return In;
11982 5 : if (VT.bitsLT(In.getValueType()))
11983 5 : return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
11984 10 : In, N0.getOperand(1));
11985 0 : return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
11986 : }
11987 :
11988 : // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
11989 3548 : if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11990 3242 : TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11991 : LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11992 810 : SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11993 : LN0->getChain(),
11994 : LN0->getBasePtr(), N0.getValueType(),
11995 811 : LN0->getMemOperand());
11996 810 : CombineTo(N, ExtLoad);
11997 : CombineTo(N0.getNode(),
11998 810 : DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
11999 : N0.getValueType(), ExtLoad,
12000 810 : DAG.getIntPtrConstant(1, SDLoc(N0))),
12001 1621 : ExtLoad.getValue(1));
12002 810 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
12003 : }
12004 :
12005 8207 : if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12006 2 : return NewVSel;
12007 :
12008 8205 : return SDValue();
12009 : }
12010 :
12011 0 : SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12012 0 : SDValue N0 = N->getOperand(0);
12013 0 : EVT VT = N->getValueType(0);
12014 :
12015 : // fold (fceil c1) -> fceil(c1)
12016 : if (isConstantFPBuildVectorOrConstantFP(N0))
12017 0 : return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12018 :
12019 0 : return SDValue();
12020 : }
12021 :
12022 0 : SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12023 0 : SDValue N0 = N->getOperand(0);
12024 0 : EVT VT = N->getValueType(0);
12025 :
12026 : // fold (ftrunc c1) -> ftrunc(c1)
12027 : if (isConstantFPBuildVectorOrConstantFP(N0))
12028 0 : return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12029 :
12030 : // fold ftrunc (known rounded int x) -> x
12031 : // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12032 : // likely to be generated to extract integer from a rounded floating value.
12033 0 : switch (N0.getOpcode()) {
12034 : default: break;
12035 0 : case ISD::FRINT:
12036 : case ISD::FTRUNC:
12037 : case ISD::FNEARBYINT:
12038 : case ISD::FFLOOR:
12039 : case ISD::FCEIL:
12040 0 : return N0;
12041 : }
12042 :
12043 0 : return SDValue();
12044 : }
12045 :
12046 0 : SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12047 0 : SDValue N0 = N->getOperand(0);
12048 0 : EVT VT = N->getValueType(0);
12049 :
12050 : // fold (ffloor c1) -> ffloor(c1)
12051 : if (isConstantFPBuildVectorOrConstantFP(N0))
12052 0 : return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12053 :
12054 0 : return SDValue();
12055 : }
12056 :
12057 : // FIXME: FNEG and FABS have a lot in common; refactor.
12058 6789 : SDValue DAGCombiner::visitFNEG(SDNode *N) {
12059 6789 : SDValue N0 = N->getOperand(0);
12060 6789 : EVT VT = N->getValueType(0);
12061 :
12062 : // Constant fold FNEG.
12063 6789 : if (isConstantFPBuildVectorOrConstantFP(N0))
12064 0 : return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12065 :
12066 6789 : if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12067 6789 : &DAG.getTarget().Options))
12068 101 : return GetNegatedExpression(N0, DAG, LegalOperations);
12069 :
12070 : // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12071 : // constant pool values.
12072 9424 : if (!TLI.isFNegFree(VT) &&
12073 6688 : N0.getOpcode() == ISD::BITCAST &&
12074 : N0.getNode()->hasOneUse()) {
12075 204 : SDValue Int = N0.getOperand(0);
12076 204 : EVT IntVT = Int.getValueType();
12077 384 : if (IntVT.isInteger() && !IntVT.isVector()) {
12078 : APInt SignMask;
12079 126 : if (N0.getValueType().isVector()) {
12080 : // For a vector, get a mask such as 0x80... per scalar element
12081 : // and splat it.
12082 32 : SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12083 64 : SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12084 : } else {
12085 : // For a scalar, just generate 0x80...
12086 20 : SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12087 : }
12088 : SDLoc DL0(N0);
12089 42 : Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12090 42 : DAG.getConstant(SignMask, DL0, IntVT));
12091 42 : AddToWorklist(Int.getNode());
12092 42 : return DAG.getBitcast(VT, Int);
12093 : }
12094 : }
12095 :
12096 : // (fneg (fmul c, x)) -> (fmul -c, x)
12097 13292 : if (N0.getOpcode() == ISD::FMUL &&
12098 71 : (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12099 450 : ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12100 : if (CFP1) {
12101 6 : APFloat CVal = CFP1->getValueAPF();
12102 6 : CVal.changeSign();
12103 8 : if (Level >= AfterLegalizeDAG &&
12104 2 : (TLI.isFPImmLegal(CVal, VT) ||
12105 1 : TLI.isOperationLegal(ISD::ConstantFP, VT)))
12106 1 : return DAG.getNode(
12107 2 : ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12108 1 : DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12109 4 : N0->getFlags());
12110 : }
12111 : }
12112 :
12113 6645 : return SDValue();
12114 : }
12115 :
12116 0 : SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12117 0 : SDValue N0 = N->getOperand(0);
12118 0 : SDValue N1 = N->getOperand(1);
12119 0 : EVT VT = N->getValueType(0);
12120 0 : const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12121 0 : const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12122 :
12123 0 : if (N0CFP && N1CFP) {
12124 0 : const APFloat &C0 = N0CFP->getValueAPF();
12125 0 : const APFloat &C1 = N1CFP->getValueAPF();
12126 0 : return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
12127 : }
12128 :
12129 : // Canonicalize to constant on RHS.
12130 : if (isConstantFPBuildVectorOrConstantFP(N0) &&
12131 : !isConstantFPBuildVectorOrConstantFP(N1))
12132 0 : return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
12133 :
12134 0 : return SDValue();
12135 : }
12136 :
12137 0 : SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12138 0 : SDValue N0 = N->getOperand(0);
12139 0 : SDValue N1 = N->getOperand(1);
12140 0 : EVT VT = N->getValueType(0);
12141 0 : const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12142 0 : const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12143 :
12144 0 : if (N0CFP && N1CFP) {
12145 0 : const APFloat &C0 = N0CFP->getValueAPF();
12146 0 : const APFloat &C1 = N1CFP->getValueAPF();
12147 0 : return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
12148 : }
12149 :
12150 : // Canonicalize to constant on RHS.
12151 : if (isConstantFPBuildVectorOrConstantFP(N0) &&
12152 : !isConstantFPBuildVectorOrConstantFP(N1))
12153 0 : return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
12154 :
12155 0 : return SDValue();
12156 : }
12157 :
12158 5148 : SDValue DAGCombiner::visitFABS(SDNode *N) {
12159 5148 : SDValue N0 = N->getOperand(0);
12160 5148 : EVT VT = N->getValueType(0);
12161 :
12162 : // fold (fabs c1) -> fabs(c1)
12163 5148 : if (isConstantFPBuildVectorOrConstantFP(N0))
12164 0 : return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12165 :
12166 : // fold (fabs (fabs x)) -> (fabs x)
12167 10296 : if (N0.getOpcode() == ISD::FABS)
12168 4 : return N->getOperand(0);
12169 :
12170 : // fold (fabs (fneg x)) -> (fabs x)
12171 : // fold (fabs (fcopysign x, y)) -> (fabs x)
12172 5144 : if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12173 8 : return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12174 :
12175 : // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12176 5140 : if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12177 202 : SDValue Int = N0.getOperand(0);
12178 101 : EVT IntVT = Int.getValueType();
12179 179 : if (IntVT.isInteger() && !IntVT.isVector()) {
12180 : APInt SignMask;
12181 168 : if (N0.getValueType().isVector()) {
12182 : // For a vector, get a mask such as 0x7f... per scalar element
12183 : // and splat it.
12184 46 : SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12185 92 : SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12186 : } else {
12187 : // For a scalar, just generate 0x7f...
12188 20 : SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12189 : }
12190 : SDLoc DL(N0);
12191 56 : Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12192 56 : DAG.getConstant(SignMask, DL, IntVT));
12193 56 : AddToWorklist(Int.getNode());
12194 112 : return DAG.getBitcast(N->getValueType(0), Int);
12195 : }
12196 : }
12197 :
12198 5084 : return SDValue();
12199 : }
12200 :
12201 258065 : SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12202 258065 : SDValue Chain = N->getOperand(0);
12203 258065 : SDValue N1 = N->getOperand(1);
12204 258065 : SDValue N2 = N->getOperand(2);
12205 :
12206 : // If N is a constant we could fold this into a fallthrough or unconditional
12207 : // branch. However that doesn't happen very often in normal code, because
12208 : // Instcombine/SimplifyCFG should have handled the available opportunities.
12209 : // If we did this folding here, it would be necessary to update the
12210 : // MachineBasicBlock CFG, which is awkward.
12211 :
12212 : // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12213 : // on the target.
12214 258065 : if (N1.getOpcode() == ISD::SETCC &&
12215 434520 : TLI.isOperationLegalOrCustom(ISD::BR_CC,
12216 : N1.getOperand(0).getValueType())) {
12217 4240 : return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12218 : Chain, N1.getOperand(2),
12219 8480 : N1.getOperand(0), N1.getOperand(1), N2);
12220 : }
12221 :
12222 253825 : if (N1.hasOneUse()) {
12223 248932 : if (SDValue NewN1 = rebuildSetCC(N1))
12224 79736 : return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12225 : }
12226 :
12227 223965 : return SDValue();
12228 : }
12229 :
12230 249617 : SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12231 249617 : if (N.getOpcode() == ISD::SRL ||
12232 1485 : (N.getOpcode() == ISD::TRUNCATE &&
12233 2897 : (N.getOperand(0).hasOneUse() &&
12234 1412 : N.getOperand(0).getOpcode() == ISD::SRL))) {
12235 : // Look pass the truncate.
12236 119 : if (N.getOpcode() == ISD::TRUNCATE)
12237 89 : N = N.getOperand(0);
12238 :
12239 : // Match this pattern so that we can generate simpler code:
12240 : //
12241 : // %a = ...
12242 : // %b = and i32 %a, 2
12243 : // %c = srl i32 %b, 1
12244 : // brcond i32 %c ...
12245 : //
12246 : // into
12247 : //
12248 : // %a = ...
12249 : // %b = and i32 %a, 2
12250 : // %c = setcc eq %b, 0
12251 : // brcond %c ...
12252 : //
12253 : // This applies only when the AND constant value has one bit set and the
12254 : // SRL constant is equal to the log2 of the AND constant. The back-end is
12255 : // smart enough to convert the result into a TEST/JMP sequence.
12256 119 : SDValue Op0 = N.getOperand(0);
12257 119 : SDValue Op1 = N.getOperand(1);
12258 :
12259 119 : if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12260 118 : SDValue AndOp1 = Op0.getOperand(1);
12261 :
12262 118 : if (AndOp1.getOpcode() == ISD::Constant) {
12263 118 : const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12264 :
12265 118 : if (AndConst.isPowerOf2() &&
12266 236 : cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12267 : SDLoc DL(N);
12268 118 : return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12269 : Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12270 118 : ISD::SETNE);
12271 : }
12272 : }
12273 : }
12274 : }
12275 :
12276 : // Transform br(xor(x, y)) -> br(x != y)
12277 : // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12278 249499 : if (N.getOpcode() == ISD::XOR) {
12279 : // Because we may call this on a speculatively constructed
12280 : // SimplifiedSetCC Node, we need to simplify this node first.
12281 : // Ideally this should be folded into SimplifySetCC and not
12282 : // here. For now, grab a handle to N so we don't lose it from
12283 : // replacements interal to the visit.
12284 30835 : HandleSDNode XORHandle(N);
12285 52403 : while (N.getOpcode() == ISD::XOR) {
12286 30353 : SDValue Tmp = visitXOR(N.getNode());
12287 : // No simplification done.
12288 30353 : if (!Tmp.getNode())
12289 : break;
12290 : // Returning N is form in-visit replacement that may invalidated
12291 : // N. Grab value from Handle.
12292 22057 : if (Tmp.getNode() == N.getNode())
12293 8 : N = XORHandle.getValue();
12294 : else // Node simplified. Try simplifying again.
12295 22049 : N = Tmp;
12296 : }
12297 :
12298 30346 : if (N.getOpcode() != ISD::XOR)
12299 29857 : return N;
12300 :
12301 : SDNode *TheXor = N.getNode();
12302 :
12303 8296 : SDValue Op0 = TheXor->getOperand(0);
12304 8296 : SDValue Op1 = TheXor->getOperand(1);
12305 :
12306 8296 : if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12307 : bool Equal = false;
12308 7807 : if (isOneConstant(Op0) && Op0.hasOneUse() &&
12309 : Op0.getOpcode() == ISD::XOR) {
12310 : TheXor = Op0.getNode();
12311 : Equal = true;
12312 : }
12313 :
12314 7807 : EVT SetCCVT = N.getValueType();
12315 7807 : if (LegalTypes)
12316 4 : SetCCVT = getSetCCResultType(SetCCVT);
12317 : // Replace the uses of XOR with SETCC
12318 7807 : return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12319 15614 : Equal ? ISD::SETEQ : ISD::SETNE);
12320 : }
12321 : }
12322 :
12323 219642 : return SDValue();
12324 : }
12325 :
12326 : // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12327 : //
12328 6356 : SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12329 6356 : CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12330 6356 : SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12331 :
12332 : // If N is a constant we could fold this into a fallthrough or unconditional
12333 : // branch. However that doesn't happen very often in normal code, because
12334 : // Instcombine/SimplifyCFG should have handled the available opportunities.
12335 : // If we did this folding here, it would be necessary to update the
12336 : // MachineBasicBlock CFG, which is awkward.
12337 :
12338 : // Use SimplifySetCC to simplify SETCC's.
12339 : SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12340 6356 : CondLHS, CondRHS, CC->get(), SDLoc(N),
12341 19068 : false);
12342 6356 : if (Simp.getNode()) AddToWorklist(Simp.getNode());
12343 :
12344 : // fold to a simpler setcc
12345 6356 : if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12346 958 : return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12347 : N->getOperand(0), Simp.getOperand(2),
12348 : Simp.getOperand(0), Simp.getOperand(1),
12349 961 : N->getOperand(4));
12350 :
12351 5877 : return SDValue();
12352 : }
12353 :
12354 : /// Return true if 'Use' is a load or a store that uses N as its base pointer
12355 : /// and that N may be folded in the load / store addressing mode.
12356 12898 : static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12357 : SelectionDAG &DAG,
12358 : const TargetLowering &TLI) {
12359 12898 : EVT VT;
12360 : unsigned AS;
12361 :
12362 : if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
12363 4644 : if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12364 : return false;
12365 4635 : VT = LD->getMemoryVT();
12366 : AS = LD->getAddressSpace();
12367 : } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
12368 6962 : if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12369 : return false;
12370 6824 : VT = ST->getMemoryVT();
12371 : AS = ST->getAddressSpace();
12372 : } else
12373 : return false;
12374 :
12375 11459 : TargetLowering::AddrMode AM;
12376 22918 : if (N->getOpcode() == ISD::ADD) {
12377 11459 : ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12378 : if (Offset)
12379 : // [reg +/- imm]
12380 22858 : AM.BaseOffs = Offset->getSExtValue();
12381 : else
12382 : // [reg +/- reg]
12383 30 : AM.Scale = 1;
12384 0 : } else if (N->getOpcode() == ISD::SUB) {
12385 0 : ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12386 : if (Offset)
12387 : // [reg +/- imm]
12388 0 : AM.BaseOffs = -Offset->getSExtValue();
12389 : else
12390 : // [reg +/- reg]
12391 0 : AM.Scale = 1;
12392 : } else
12393 : return false;
12394 :
12395 11459 : return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12396 22918 : VT.getTypeForEVT(*DAG.getContext()), AS);
12397 : }
12398 :
12399 : /// Try turning a load/store into a pre-indexed load/store when the base
12400 : /// pointer is an add or subtract and it has other uses besides the load/store.
12401 : /// After the transformation, the new indexed load/store has effectively folded
12402 : /// the add/subtract in and all of its other uses are redirected to the
12403 : /// new load/store.
12404 13578213 : bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12405 13578213 : if (Level < AfterLegalizeDAG)
12406 : return false;
12407 :
12408 : bool isLoad = true;
12409 5529896 : SDValue Ptr;
12410 : EVT VT;
12411 : if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12412 2592171 : if (LD->isIndexed())
12413 : return false;
12414 : VT = LD->getMemoryVT();
12415 2591763 : if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12416 : !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12417 : return false;
12418 19138 : Ptr = LD->getBasePtr();
12419 : } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12420 2937725 : if (ST->isIndexed())
12421 : return false;
12422 : VT = ST->getMemoryVT();
12423 2937488 : if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12424 : !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12425 : return false;
12426 16827 : Ptr = ST->getBasePtr();
12427 : isLoad = false;
12428 : } else {
12429 : return false;
12430 : }
12431 :
12432 : // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12433 : // out. There is no reason to make this a preinc/predec.
12434 35965 : if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12435 : Ptr.getNode()->hasOneUse())
12436 : return false;
12437 :
12438 : // Ask the target to do addressing mode selection.
12439 4878 : SDValue BasePtr;
12440 4878 : SDValue Offset;
12441 4878 : ISD::MemIndexedMode AM = ISD::UNINDEXED;
12442 4878 : if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12443 : return false;
12444 :
12445 : // Backends without true r+i pre-indexed forms may need to pass a
12446 : // constant base with a variable offset so that constant coercion
12447 : // will work with the patterns in canonical form.
12448 : bool Swapped = false;
12449 : if (isa<ConstantSDNode>(BasePtr)) {
12450 : std::swap(BasePtr, Offset);
12451 : Swapped = true;
12452 : }
12453 :
12454 : // Don't create a indexed load / store with zero offset.
12455 4797 : if (isNullConstant(Offset))
12456 : return false;
12457 :
12458 : // Try turning it into a pre-indexed load / store except when:
12459 : // 1) The new base ptr is a frame index.
12460 : // 2) If N is a store and the new base ptr is either the same as or is a
12461 : // predecessor of the value being stored.
12462 : // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12463 : // that would create a cycle.
12464 : // 4) All uses are load / store ops that use it as old base ptr.
12465 :
12466 : // Check #1. Preinc'ing a frame index would require copying the stack pointer
12467 : // (plus the implicit offset) to a register to preinc anyway.
12468 3849 : if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12469 : return false;
12470 :
12471 : // Check #2.
12472 3739 : if (!isLoad) {
12473 1793 : SDValue Val = cast<StoreSDNode>(N)->getValue();
12474 1788 : if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
12475 : return false;
12476 : }
12477 :
12478 : // Caches for hasPredecessorHelper.
12479 : SmallPtrSet<const SDNode *, 32> Visited;
12480 : SmallVector<const SDNode *, 16> Worklist;
12481 2163 : Worklist.push_back(N);
12482 :
12483 : // If the offset is a constant, there may be other adds of constants that
12484 : // can be folded with this one. We should do this to avoid having to keep
12485 : // a copy of the original base pointer.
12486 : SmallVector<SDNode *, 16> OtherUses;
12487 : if (isa<ConstantSDNode>(Offset))
12488 2017 : for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12489 : UE = BasePtr.getNode()->use_end();
12490 3882 : UI != UE; ++UI) {
12491 : SDUse &Use = UI.getUse();
12492 : // Skip the use that is Ptr and uses of other results from BasePtr's
12493 : // node (important for nodes that return multiple results).
12494 3394 : if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12495 : continue;
12496 :
12497 2779 : if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12498 : continue;
12499 :
12500 4632 : if (Use.getUser()->getOpcode() != ISD::ADD &&
12501 : Use.getUser()->getOpcode() != ISD::SUB) {
12502 : OtherUses.clear();
12503 : break;
12504 : }
12505 :
12506 1574 : SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12507 : if (!isa<ConstantSDNode>(Op1)) {
12508 : OtherUses.clear();
12509 : break;
12510 : }
12511 :
12512 : // FIXME: In some cases, we can be smarter about this.
12513 787 : if (Op1.getValueType() != Offset.getValueType()) {
12514 : OtherUses.clear();
12515 : break;
12516 : }
12517 :
12518 787 : OtherUses.push_back(Use.getUser());
12519 : }
12520 :
12521 2163 : if (Swapped)
12522 : std::swap(BasePtr, Offset);
12523 :
12524 : // Now check for #3 and #4.
12525 : bool RealUse = false;
12526 :
12527 6597 : for (SDNode *Use : Ptr.getNode()->uses()) {
12528 4611 : if (Use == N)
12529 : continue;
12530 2493 : if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12531 : return false;
12532 :
12533 : // If Ptr may be folded in addressing mode of other use, then it's
12534 : // not profitable to do this transformation.
12535 2316 : if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12536 : RealUse = true;
12537 : }
12538 :
12539 1986 : if (!RealUse)
12540 : return false;
12541 :
12542 : SDValue Result;
12543 421 : if (isLoad)
12544 308 : Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12545 308 : BasePtr, Offset, AM);
12546 : else
12547 113 : Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12548 113 : BasePtr, Offset, AM);
12549 : ++PreIndexedNodes;
12550 : ++NodesCombined;
12551 : LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12552 : Result.getNode()->dump(&DAG); dbgs() << '\n');
12553 : WorklistRemover DeadNodes(*this);
12554 421 : if (isLoad) {
12555 616 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12556 616 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12557 : } else {
12558 226 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12559 : }
12560 :
12561 : // Finally, since the node is now dead, remove it from the graph.
12562 421 : deleteAndRecombine(N);
12563 :
12564 421 : if (Swapped)
12565 : std::swap(BasePtr, Offset);
12566 :
12567 : // Replace other uses of BasePtr that can be updated to use Ptr
12568 625 : for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12569 : unsigned OffsetIdx = 1;
12570 408 : if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12571 : OffsetIdx = 0;
12572 : assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12573 : BasePtr.getNode() && "Expected BasePtr operand");
12574 :
12575 : // We need to replace ptr0 in the following expression:
12576 : // x0 * offset0 + y0 * ptr0 = t0
12577 : // knowing that
12578 : // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12579 : //
12580 : // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12581 : // indexed load/store and the expression that needs to be re-written.
12582 : //
12583 : // Therefore, we have:
12584 : // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12585 :
12586 : ConstantSDNode *CN =
12587 : cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12588 : int X0, X1, Y0, Y1;
12589 204 : const APInt &Offset0 = CN->getAPIntValue();
12590 204 : APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12591 :
12592 408 : X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12593 204 : Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12594 204 : X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12595 204 : Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12596 :
12597 204 : unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12598 :
12599 : APInt CNV = Offset0;
12600 204 : if (X0 < 0) CNV = -CNV;
12601 204 : if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12602 204 : else CNV = CNV - Offset1;
12603 :
12604 204 : SDLoc DL(OtherUses[i]);
12605 :
12606 : // We can now generate the new expression.
12607 408 : SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12608 204 : SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12609 :
12610 204 : SDValue NewUse = DAG.getNode(Opcode,
12611 : DL,
12612 408 : OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12613 612 : DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12614 204 : deleteAndRecombine(OtherUses[i]);
12615 : }
12616 :
12617 : // Replace the uses of Ptr with uses of the updated base value.
12618 534 : DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12619 421 : deleteAndRecombine(Ptr.getNode());
12620 421 : AddToWorklist(Result.getNode());
12621 :
12622 : return true;
12623 : }
12624 :
12625 : /// Try to combine a load/store with a add/sub of the base pointer node into a
12626 : /// post-indexed load/store. The transformation folded the add/subtract into the
12627 : /// new indexed load/store effectively and all of its uses are redirected to the
12628 : /// new load/store.
12629 13577792 : bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12630 13577792 : if (Level < AfterLegalizeDAG)
12631 : return false;
12632 :
12633 : bool isLoad = true;
12634 : SDValue Ptr;
12635 : EVT VT;
12636 : if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12637 2591863 : if (LD->isIndexed())
12638 : return false;
12639 : VT = LD->getMemoryVT();
12640 2591455 : if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12641 : !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12642 : return false;
12643 19477 : Ptr = LD->getBasePtr();
12644 : } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12645 2937612 : if (ST->isIndexed())
12646 : return false;
12647 : VT = ST->getMemoryVT();
12648 2937375 : if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12649 : !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12650 : return false;
12651 15678 : Ptr = ST->getBasePtr();
12652 : isLoad = false;
12653 : } else {
12654 : return false;
12655 : }
12656 :
12657 : if (Ptr.getNode()->hasOneUse())
12658 : return false;
12659 :
12660 232311 : for (SDNode *Op : Ptr.getNode()->uses()) {
12661 218474 : if (Op == N ||
12662 204208 : (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12663 217108 : continue;
12664 :
12665 9557 : SDValue BasePtr;
12666 9557 : SDValue Offset;
12667 9557 : ISD::MemIndexedMode AM = ISD::UNINDEXED;
12668 9557 : if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12669 : // Don't create a indexed load / store with zero offset.
12670 8711 : if (isNullConstant(Offset))
12671 8191 : continue;
12672 :
12673 : // Try turning it into a post-indexed load / store except when
12674 : // 1) All uses are load / store ops that use it as base ptr (and
12675 : // it may be folded as addressing mmode).
12676 : // 2) Op must be independent of N, i.e. Op is neither a predecessor
12677 : // nor a successor of N. Otherwise, if Op is folded that would
12678 : // create a cycle.
12679 :
12680 7106 : if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12681 : continue;
12682 :
12683 : // Check for #1.
12684 : bool TryNext = false;
12685 12912 : for (SDNode *Use : BasePtr.getNode()->uses()) {
12686 12392 : if (Use == Ptr.getNode())
12687 : continue;
12688 :
12689 : // If all the uses are load / store addresses, then don't do the
12690 : // transformation.
12691 24784 : if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12692 : bool RealUse = false;
12693 18061 : for (SDNode *UseUse : Use->uses()) {
12694 10582 : if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12695 : RealUse = true;
12696 : }
12697 :
12698 7479 : if (!RealUse) {
12699 : TryNext = true;
12700 : break;
12701 : }
12702 : }
12703 : }
12704 :
12705 7106 : if (TryNext)
12706 : continue;
12707 :
12708 : // Check for #2.
12709 : SmallPtrSet<const SDNode *, 32> Visited;
12710 : SmallVector<const SDNode *, 8> Worklist;
12711 : // Ptr is predecessor to both N and Op.
12712 520 : Visited.insert(Ptr.getNode());
12713 520 : Worklist.push_back(N);
12714 520 : Worklist.push_back(Op);
12715 1035 : if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
12716 515 : !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
12717 : SDValue Result = isLoad
12718 452 : ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12719 256 : BasePtr, Offset, AM)
12720 452 : : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12721 708 : BasePtr, Offset, AM);
12722 : ++PostIndexedNodes;
12723 : ++NodesCombined;
12724 : LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12725 : dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12726 : dbgs() << '\n');
12727 : WorklistRemover DeadNodes(*this);
12728 452 : if (isLoad) {
12729 510 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12730 510 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12731 : } else {
12732 394 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12733 : }
12734 :
12735 : // Finally, since the node is now dead, remove it from the graph.
12736 452 : deleteAndRecombine(N);
12737 :
12738 : // Replace the uses of Use with uses of the updated base value.
12739 649 : DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
12740 : Result.getValue(isLoad ? 1 : 0));
12741 452 : deleteAndRecombine(Op);
12742 : return true;
12743 : }
12744 : }
12745 : }
12746 :
12747 : return false;
12748 : }
12749 :
12750 : /// Return the base-pointer arithmetic from an indexed \p LD.
12751 0 : SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12752 : ISD::MemIndexedMode AM = LD->getAddressingMode();
12753 : assert(AM != ISD::UNINDEXED);
12754 0 : SDValue BP = LD->getOperand(1);
12755 0 : SDValue Inc = LD->getOperand(2);
12756 :
12757 : // Some backends use TargetConstants for load offsets, but don't expect
12758 : // TargetConstants in general ADD nodes. We can convert these constants into
12759 : // regular Constants (if the constant is not opaque).
12760 : assert((Inc.getOpcode() != ISD::TargetConstant ||
12761 : !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12762 : "Cannot split out indexing using opaque target constants");
12763 0 : if (Inc.getOpcode() == ISD::TargetConstant) {
12764 : ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12765 0 : Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12766 0 : ConstInc->getValueType(0));
12767 : }
12768 :
12769 : unsigned Opc =
12770 0 : (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12771 0 : return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12772 : }
12773 :
12774 8856 : static inline int numVectorEltsOrZero(EVT T) {
12775 11247 : return T.isVector() ? T.getVectorNumElements() : 0;
12776 : }
12777 :
12778 8866 : bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
12779 8866 : Val = ST->getValue();
12780 8866 : EVT STType = Val.getValueType();
12781 8866 : EVT STMemType = ST->getMemoryVT();
12782 0 : if (STType == STMemType)
12783 : return true;
12784 1620 : if (isTypeLegal(STMemType))
12785 : return false; // fail.
12786 1594 : if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
12787 : TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
12788 0 : Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
12789 0 : return true;
12790 : }
12791 3188 : if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
12792 1594 : STType.isInteger() && STMemType.isInteger()) {
12793 1594 : Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
12794 1594 : return true;
12795 : }
12796 0 : if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
12797 0 : Val = DAG.getBitcast(STMemType, Val);
12798 0 : return true;
12799 : }
12800 : return false; // fail.
12801 : }
12802 :
12803 0 : bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
12804 0 : EVT LDMemType = LD->getMemoryVT();
12805 0 : EVT LDType = LD->getValueType(0);
12806 : assert(Val.getValueType() == LDMemType &&
12807 : "Attempting to extend value of non-matching type");
12808 0 : if (LDType == LDMemType)
12809 0 : return true;
12810 0 : if (LDMemType.isInteger() && LDType.isInteger()) {
12811 0 : switch (LD->getExtensionType()) {
12812 0 : case ISD::NON_EXTLOAD:
12813 0 : Val = DAG.getBitcast(LDType, Val);
12814 0 : return true;
12815 0 : case ISD::EXTLOAD:
12816 0 : Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
12817 0 : return true;
12818 0 : case ISD::SEXTLOAD:
12819 0 : Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
12820 0 : return true;
12821 0 : case ISD::ZEXTLOAD:
12822 0 : Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
12823 0 : return true;
12824 : }
12825 : }
12826 : return false;
12827 : }
12828 :
12829 6353867 : SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
12830 6353867 : if (OptLevel == CodeGenOpt::None || LD->isVolatile())
12831 3885058 : return SDValue();
12832 2468809 : SDValue Chain = LD->getOperand(0);
12833 : StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
12834 335918 : if (!ST || ST->isVolatile())
12835 2135182 : return SDValue();
12836 :
12837 333627 : EVT LDType = LD->getValueType(0);
12838 333627 : EVT LDMemType = LD->getMemoryVT();
12839 333627 : EVT STMemType = ST->getMemoryVT();
12840 333627 : EVT STType = ST->getValue().getValueType();
12841 :
12842 333627 : BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
12843 333627 : BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
12844 : int64_t Offset;
12845 :
12846 : bool STCoversLD =
12847 391529 : BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset) && (Offset >= 0) &&
12848 390355 : (Offset * 8 <= LDMemType.getSizeInBits()) &&
12849 20922 : (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
12850 :
12851 : if (!STCoversLD)
12852 314277 : return SDValue();
12853 :
12854 : // Normalize for Endianness.
12855 19350 : if (DAG.getDataLayout().isBigEndian())
12856 2466 : Offset =
12857 2466 : (STMemType.getSizeInBits() - LDMemType.getSizeInBits()) / 8 - Offset;
12858 :
12859 : // Memory as copy space (potentially masked).
12860 26235 : if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
12861 : // Simple case: Direct non-truncating forwarding
12862 4132 : if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
12863 4086 : return CombineTo(LD, ST->getValue(), Chain);
12864 : // Can we model the truncate and extension with an and mask?
12865 138 : if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
12866 92 : !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
12867 : // Mask to size of LDMemType
12868 : auto Mask =
12869 92 : DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
12870 : STMemType.getSizeInBits()),
12871 46 : SDLoc(ST), STType);
12872 92 : auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
12873 : return CombineTo(LD, Val, Chain);
12874 : }
12875 : }
12876 :
12877 : // TODO: Deal with nonzero offset.
12878 30436 : if (LD->getBasePtr().isUndef() || Offset != 0)
12879 6352 : return SDValue();
12880 : // Model necessary truncations / extenstions.
12881 8866 : SDValue Val;
12882 : // Truncate Value To Stored Memory Size.
12883 : do {
12884 8866 : if (!getTruncatedStoreValue(ST, Val))
12885 : continue;
12886 8840 : if (!isTypeLegal(LDMemType))
12887 : continue;
12888 3114 : if (STMemType != LDMemType) {
12889 4722 : if (numVectorEltsOrZero(STMemType) == numVectorEltsOrZero(LDMemType) &&
12890 2834 : STMemType.isInteger() && LDMemType.isInteger())
12891 69 : Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
12892 : else
12893 : continue;
12894 : }
12895 347 : if (!extendLoadedValueToExtension(LD, Val))
12896 : continue;
12897 333 : return CombineTo(LD, Val, Chain);
12898 : } while (false);
12899 :
12900 : // On failure, cleanup dead nodes we may have created.
12901 8533 : if (Val->use_empty())
12902 1594 : deleteAndRecombine(Val.getNode());
12903 8533 : return SDValue();
12904 : }
12905 :
12906 6474249 : SDValue DAGCombiner::visitLOAD(SDNode *N) {
12907 : LoadSDNode *LD = cast<LoadSDNode>(N);
12908 6474249 : SDValue Chain = LD->getChain();
12909 6474249 : SDValue Ptr = LD->getBasePtr();
12910 :
12911 : // If load is not volatile and there are no uses of the loaded value (and
12912 : // the updated indexed value in case of indexed loads), change uses of the
12913 : // chain value into uses of the chain input (i.e. delete the dead load).
12914 6474249 : if (!LD->isVolatile()) {
12915 6432490 : if (N->getValueType(1) == MVT::Other) {
12916 : // Unindexed loads.
12917 6432083 : if (!N->hasAnyUseOfValue(0)) {
12918 : // It's not safe to use the two value CombineTo variant here. e.g.
12919 : // v1, chain2 = load chain1, loc
12920 : // v2, chain3 = load chain2, loc
12921 : // v3 = add v2, c
12922 : // Now we replace use of chain2 with chain1. This makes the second load
12923 : // isomorphic to the one we are deleting, and thus makes this load live.
12924 : LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
12925 : dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
12926 : dbgs() << "\n");
12927 : WorklistRemover DeadNodes(*this);
12928 240758 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12929 : AddUsersToWorklist(Chain.getNode());
12930 120379 : if (N->use_empty())
12931 120365 : deleteAndRecombine(N);
12932 :
12933 120379 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
12934 : }
12935 : } else {
12936 : // Indexed loads.
12937 : assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
12938 :
12939 : // If this load has an opaque TargetConstant offset, then we cannot split
12940 : // the indexing into an add/sub directly (that TargetConstant may not be
12941 : // valid for a different type of node, and we cannot convert an opaque
12942 : // target constant into a regular constant).
12943 814 : bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
12944 173 : cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
12945 :
12946 407 : if (!N->hasAnyUseOfValue(0) &&
12947 3 : ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
12948 6 : SDValue Undef = DAG.getUNDEF(N->getValueType(0));
12949 3 : SDValue Index;
12950 3 : if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
12951 3 : Index = SplitIndexingFromLoad(LD);
12952 : // Try to fold the base pointer arithmetic into subsequent loads and
12953 : // stores.
12954 : AddUsersToWorklist(N);
12955 : } else
12956 0 : Index = DAG.getUNDEF(N->getValueType(1));
12957 : LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
12958 : dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
12959 : dbgs() << " and 2 other values\n");
12960 : WorklistRemover DeadNodes(*this);
12961 6 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
12962 6 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
12963 6 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
12964 3 : deleteAndRecombine(N);
12965 3 : return SDValue(N, 0); // Return N so it doesn't get rechecked!
12966 : }
12967 : }
12968 : }
12969 :
12970 : // If this load is directly stored, replace the load value with the stored
12971 : // value.
12972 6353867 : if (auto V = ForwardStoreValueToDirectLoad(LD))
12973 4465 : return V;
12974 :
12975 : // Try to infer better alignment information than the load already has.
12976 6349402 : if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
12977 2504543 : if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12978 1207066 : if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
12979 27799 : SDValue NewLoad = DAG.getExtLoad(
12980 27799 : LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
12981 : LD->getPointerInfo(), LD->getMemoryVT(), Align,
12982 92763 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
12983 : // NewLoad will always be N as we are only refining the alignment
12984 : assert(NewLoad.getNode() == N);
12985 : (void)NewLoad;
12986 : }
12987 : }
12988 : }
12989 :
12990 6349402 : if (LD->isUnindexed()) {
12991 : // Walk up chain skipping non-aliasing memory nodes.
12992 6348994 : SDValue BetterChain = FindBetterChain(N, Chain);
12993 :
12994 : // If there is a better chain.
12995 : if (Chain != BetterChain) {
12996 : SDValue ReplLoad;
12997 :
12998 : // Replace the chain to void dependency.
12999 194523 : if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13000 362800 : ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13001 535459 : BetterChain, Ptr, LD->getMemOperand());
13002 : } else {
13003 4929 : ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13004 : LD->getValueType(0),
13005 : BetterChain, Ptr, LD->getMemoryVT(),
13006 4929 : LD->getMemOperand());
13007 : }
13008 :
13009 : // Create token factor to keep old chain connected.
13010 194523 : SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13011 389046 : MVT::Other, Chain, ReplLoad.getValue(1));
13012 :
13013 : // Replace uses with load result and token factor
13014 : return CombineTo(N, ReplLoad.getValue(0), Token);
13015 : }
13016 : }
13017 :
13018 : // Try transforming N to an indexed load.
13019 6154879 : if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13020 563 : return SDValue(N, 0);
13021 :
13022 : // Try to slice up N to more direct loads if the slices are mapped to
13023 : // different register banks or pairing can take place.
13024 6154316 : if (SliceUpLoad(N))
13025 5 : return SDValue(N, 0);
13026 :
13027 6154311 : return SDValue();
13028 : }
13029 :
13030 : namespace {
13031 :
13032 : /// Helper structure used to slice a load in smaller loads.
13033 : /// Basically a slice is obtained from the following sequence:
13034 : /// Origin = load Ty1, Base
13035 : /// Shift = srl Ty1 Origin, CstTy Amount
13036 : /// Inst = trunc Shift to Ty2
13037 : ///
13038 : /// Then, it will be rewritten into:
13039 : /// Slice = load SliceTy, Base + SliceOffset
13040 : /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13041 : ///
13042 : /// SliceTy is deduced from the number of bits that are actually used to
13043 : /// build Inst.
13044 : struct LoadedSlice {
13045 : /// Helper structure used to compute the cost of a slice.
13046 : struct Cost {
13047 : /// Are we optimizing for code size.
13048 : bool ForCodeSize;
13049 :
13050 : /// Various cost.
13051 : unsigned Loads = 0;
13052 : unsigned Truncates = 0;
13053 : unsigned CrossRegisterBanksCopies = 0;
13054 : unsigned ZExts = 0;
13055 : unsigned Shift = 0;
13056 :
13057 20 : Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13058 :
13059 : /// Get the cost of one isolated slice.
13060 40 : Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13061 40 : : ForCodeSize(ForCodeSize), Loads(1) {
13062 40 : EVT TruncType = LS.Inst->getValueType(0);
13063 40 : EVT LoadedType = LS.getLoadedType();
13064 40 : if (TruncType != LoadedType &&
13065 0 : !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13066 0 : ZExts = 1;
13067 40 : }
13068 :
13069 : /// Account for slicing gain in the current cost.
13070 : /// Slicing provide a few gains like removing a shift or a
13071 : /// truncate. This method allows to grow the cost of the original
13072 : /// load with the gain from this slice.
13073 40 : void addSliceGain(const LoadedSlice &LS) {
13074 : // Each slice saves a truncate.
13075 40 : const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13076 80 : if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13077 40 : LS.Inst->getValueType(0)))
13078 12 : ++Truncates;
13079 : // If there is a shift amount, this slice gets rid of it.
13080 40 : if (LS.Shift)
13081 20 : ++Shift;
13082 : // If this slice can merge a cross register bank copy, account for it.
13083 40 : if (LS.canMergeExpensiveCrossRegisterBankCopy())
13084 4 : ++CrossRegisterBanksCopies;
13085 40 : }
13086 :
13087 : Cost &operator+=(const Cost &RHS) {
13088 40 : Loads += RHS.Loads;
13089 40 : Truncates += RHS.Truncates;
13090 40 : CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13091 40 : ZExts += RHS.ZExts;
13092 40 : Shift += RHS.Shift;
13093 : return *this;
13094 : }
13095 :
13096 : bool operator==(const Cost &RHS) const {
13097 : return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13098 : CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13099 : ZExts == RHS.ZExts && Shift == RHS.Shift;
13100 : }
13101 :
13102 : bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13103 :
13104 20 : bool operator<(const Cost &RHS) const {
13105 : // Assume cross register banks copies are as expensive as loads.
13106 : // FIXME: Do we want some more target hooks?
13107 20 : unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13108 20 : unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13109 : // Unless we are optimizing for code size, consider the
13110 : // expensive operation first.
13111 20 : if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13112 19 : return ExpensiveOpsLHS < ExpensiveOpsRHS;
13113 1 : return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13114 1 : (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13115 : }
13116 :
13117 20 : bool operator>(const Cost &RHS) const { return RHS < *this; }
13118 :
13119 : bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13120 :
13121 : bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13122 : };
13123 :
13124 : // The last instruction that represent the slice. This should be a
13125 : // truncate instruction.
13126 : SDNode *Inst;
13127 :
13128 : // The original load instruction.
13129 : LoadSDNode *Origin;
13130 :
13131 : // The right shift amount in bits from the original load.
13132 : unsigned Shift;
13133 :
13134 : // The DAG from which Origin came from.
13135 : // This is used to get some contextual information about legal types, etc.
13136 : SelectionDAG *DAG;
13137 :
13138 : LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13139 : unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13140 991 : : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13141 :
13142 : /// Get the bits used in a chunk of bits \p BitWidth large.
13143 : /// \return Result is \p BitWidth and has used bits set to 1 and
13144 : /// not used bits set to 0.
13145 2143 : APInt getUsedBits() const {
13146 : // Reproduce the trunc(lshr) sequence:
13147 : // - Start from the truncated value.
13148 : // - Zero extend to the desired bit width.
13149 : // - Shift left.
13150 : assert(Origin && "No original load to compare against.");
13151 2143 : unsigned BitWidth = Origin->getValueSizeInBits(0);
13152 : assert(Inst && "This slice is not bound to an instruction");
13153 : assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13154 : "Extracted slice is bigger than the whole type!");
13155 2143 : APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13156 2143 : UsedBits.setAllBits();
13157 2143 : UsedBits = UsedBits.zext(BitWidth);
13158 2143 : UsedBits <<= Shift;
13159 2143 : return UsedBits;
13160 : }
13161 :
13162 : /// Get the size of the slice to be loaded in bytes.
13163 1148 : unsigned getLoadedSize() const {
13164 1148 : unsigned SliceSize = getUsedBits().countPopulation();
13165 : assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13166 1148 : return SliceSize / 8;
13167 : }
13168 :
13169 : /// Get the type that will be loaded for this slice.
13170 : /// Note: This may not be the final type for the slice.
13171 1068 : EVT getLoadedType() const {
13172 : assert(DAG && "Missing context");
13173 1068 : LLVMContext &Ctxt = *DAG->getContext();
13174 1068 : return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13175 : }
13176 :
13177 : /// Get the alignment of the load used for this slice.
13178 16 : unsigned getAlignment() const {
13179 16 : unsigned Alignment = Origin->getAlignment();
13180 16 : unsigned Offset = getOffsetFromBase();
13181 16 : if (Offset != 0)
13182 14 : Alignment = MinAlign(Alignment, Alignment + Offset);
13183 16 : return Alignment;
13184 : }
13185 :
13186 : /// Check if this slice can be rewritten with legal operations.
13187 974 : bool isLegal() const {
13188 : // An invalid slice is not legal.
13189 974 : if (!Origin || !Inst || !DAG)
13190 : return false;
13191 :
13192 : // Offsets are for indexed load only, we do not handle that.
13193 1948 : if (!Origin->getOffset().isUndef())
13194 : return false;
13195 :
13196 974 : const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13197 :
13198 : // Check that the type is legal.
13199 974 : EVT SliceType = getLoadedType();
13200 : if (!TLI.isTypeLegal(SliceType))
13201 : return false;
13202 :
13203 : // Check that the load is legal for this type.
13204 : if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13205 670 : return false;
13206 :
13207 : // Check that the offset can be computed.
13208 : // 1. Check its type.
13209 570 : EVT PtrType = Origin->getBasePtr().getValueType();
13210 285 : if (PtrType == MVT::Untyped || PtrType.isExtended())
13211 0 : return false;
13212 :
13213 : // 2. Check that it fits in the immediate.
13214 285 : if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13215 : return false;
13216 :
13217 : // 3. Check that the computation is legal.
13218 : if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13219 0 : return false;
13220 :
13221 : // Check that the zext is legal if it needs one.
13222 285 : EVT TruncateType = Inst->getValueType(0);
13223 : if (TruncateType != SliceType &&
13224 : !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13225 0 : return false;
13226 :
13227 : return true;
13228 : }
13229 :
13230 : /// Get the offset in bytes of this slice in the original chunk of
13231 : /// bits.
13232 : /// \pre DAG != nullptr.
13233 377 : uint64_t getOffsetFromBase() const {
13234 : assert(DAG && "Missing context.");
13235 377 : bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13236 : assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13237 377 : uint64_t Offset = Shift / 8;
13238 377 : unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13239 : assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13240 : "The size of the original loaded type is not a multiple of a"
13241 : " byte.");
13242 : // If Offset is bigger than TySizeInBytes, it means we are loading all
13243 : // zeros. This should have been optimized before in the process.
13244 : assert(TySizeInBytes > Offset &&
13245 : "Invalid shift amount for given loaded size");
13246 377 : if (IsBigEndian)
13247 80 : Offset = TySizeInBytes - Offset - getLoadedSize();
13248 377 : return Offset;
13249 : }
13250 :
13251 : /// Generate the sequence of instructions to load the slice
13252 : /// represented by this object and redirect the uses of this slice to
13253 : /// this new sequence of instructions.
13254 : /// \pre this->Inst && this->Origin are valid Instructions and this
13255 : /// object passed the legal check: LoadedSlice::isLegal returned true.
13256 : /// \return The last instruction of the sequence used to load the slice.
13257 10 : SDValue loadSlice() const {
13258 : assert(Inst && Origin && "Unable to replace a non-existing slice.");
13259 10 : const SDValue &OldBaseAddr = Origin->getBasePtr();
13260 10 : SDValue BaseAddr = OldBaseAddr;
13261 : // Get the offset in that chunk of bytes w.r.t. the endianness.
13262 10 : int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13263 : assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13264 10 : if (Offset) {
13265 : // BaseAddr = BaseAddr + Offset.
13266 5 : EVT ArithType = BaseAddr.getValueType();
13267 5 : SDLoc DL(Origin);
13268 5 : BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13269 5 : DAG->getConstant(Offset, DL, ArithType));
13270 : }
13271 :
13272 : // Create the type of the loaded slice according to its size.
13273 10 : EVT SliceType = getLoadedType();
13274 :
13275 : // Create the load for the slice.
13276 : SDValue LastInst =
13277 20 : DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13278 10 : Origin->getPointerInfo().getWithOffset(Offset),
13279 20 : getAlignment(), Origin->getMemOperand()->getFlags());
13280 : // If the final type is not the same as the loaded type, this means that
13281 : // we have to pad with zero. Create a zero extend for that.
13282 10 : EVT FinalType = Inst->getValueType(0);
13283 10 : if (SliceType != FinalType)
13284 1 : LastInst =
13285 1 : DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13286 10 : return LastInst;
13287 : }
13288 :
13289 : /// Check if this slice can be merged with an expensive cross register
13290 : /// bank copy. E.g.,
13291 : /// i = load i32
13292 : /// f = bitcast i32 i to float
13293 40 : bool canMergeExpensiveCrossRegisterBankCopy() const {
13294 40 : if (!Inst || !Inst->hasOneUse())
13295 : return false;
13296 : SDNode *Use = *Inst->use_begin();
13297 36 : if (Use->getOpcode() != ISD::BITCAST)
13298 : return false;
13299 : assert(DAG && "Missing context");
13300 4 : const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13301 4 : EVT ResVT = Use->getValueType(0);
13302 4 : const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13303 : const TargetRegisterClass *ArgRC =
13304 8 : TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13305 4 : if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13306 0 : return false;
13307 :
13308 : // At this point, we know that we perform a cross-register-bank copy.
13309 : // Check if it is expensive.
13310 8 : const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13311 : // Assume bitcasts are cheap, unless both register classes do not
13312 : // explicitly share a common sub class.
13313 4 : if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13314 0 : return false;
13315 :
13316 : // Check if it will be merged with the load.
13317 : // 1. Check the alignment constraint.
13318 8 : unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13319 4 : ResVT.getTypeForEVT(*DAG->getContext()));
13320 :
13321 4 : if (RequiredAlignment > getAlignment())
13322 : return false;
13323 :
13324 : // 2. Check that the load is a legal operation for that type.
13325 : if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13326 0 : return false;
13327 :
13328 : // 3. Check that we do not have a zext in the way.
13329 4 : if (Inst->getValueType(0) != getLoadedType())
13330 0 : return false;
13331 :
13332 : return true;
13333 : }
13334 : };
13335 :
13336 : } // end anonymous namespace
13337 :
13338 : /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13339 : /// \p UsedBits looks like 0..0 1..1 0..0.
13340 24 : static bool areUsedBitsDense(const APInt &UsedBits) {
13341 : // If all the bits are one, this is dense!
13342 24 : if (UsedBits.isAllOnesValue())
13343 : return true;
13344 :
13345 : // Get rid of the unused bits on the right.
13346 2 : APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13347 : // Get rid of the unused bits on the left.
13348 2 : if (NarrowedUsedBits.countLeadingZeros())
13349 2 : NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13350 : // Check that the chunk of bits is completely used.
13351 : return NarrowedUsedBits.isAllOnesValue();
13352 : }
13353 :
13354 : /// Check whether or not \p First and \p Second are next to each other
13355 : /// in memory. This means that there is no hole between the bits loaded
13356 : /// by \p First and the bits loaded by \p Second.
13357 2 : static bool areSlicesNextToEachOther(const LoadedSlice &First,
13358 : const LoadedSlice &Second) {
13359 : assert(First.Origin == Second.Origin && First.Origin &&
13360 : "Unable to match different memory origins.");
13361 2 : APInt UsedBits = First.getUsedBits();
13362 : assert((UsedBits & Second.getUsedBits()) == 0 &&
13363 : "Slices are not supposed to overlap.");
13364 2 : UsedBits |= Second.getUsedBits();
13365 2 : return areUsedBitsDense(UsedBits);
13366 : }
13367 :
13368 : /// Adjust the \p GlobalLSCost according to the target
13369 : /// paring capabilities and the layout of the slices.
13370 : /// \pre \p GlobalLSCost should account for at least as many loads as
13371 : /// there is in the slices in \p LoadedSlices.
13372 0 : static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13373 : LoadedSlice::Cost &GlobalLSCost) {
13374 0 : unsigned NumberOfSlices = LoadedSlices.size();
13375 : // If there is less than 2 elements, no pairing is possible.
13376 0 : if (NumberOfSlices < 2)
13377 0 : return;
13378 :
13379 : // Sort the slices so that elements that are likely to be next to each
13380 : // other in memory are next to each other in the list.
13381 0 : llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13382 : assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13383 0 : return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13384 : });
13385 0 : const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13386 : // First (resp. Second) is the first (resp. Second) potentially candidate
13387 : // to be placed in a paired load.
13388 : const LoadedSlice *First = nullptr;
13389 : const LoadedSlice *Second = nullptr;
13390 0 : for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13391 : // Set the beginning of the pair.
13392 : First = Second) {
13393 0 : Second = &LoadedSlices[CurrSlice];
13394 :
13395 : // If First is NULL, it means we start a new pair.
13396 : // Get to the next slice.
13397 0 : if (!First)
13398 0 : continue;
13399 :
13400 0 : EVT LoadedType = First->getLoadedType();
13401 :
13402 : // If the types of the slices are different, we cannot pair them.
13403 0 : if (LoadedType != Second->getLoadedType())
13404 0 : continue;
13405 :
13406 : // Check if the target supplies paired loads for this type.
13407 0 : unsigned RequiredAlignment = 0;
13408 0 : if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13409 : // move to the next pair, this type is hopeless.
13410 : Second = nullptr;
13411 0 : continue;
13412 : }
13413 : // Check if we meet the alignment requirement.
13414 0 : if (RequiredAlignment > First->getAlignment())
13415 0 : continue;
13416 :
13417 : // Check that both loads are next to each other in memory.
13418 0 : if (!areSlicesNextToEachOther(*First, *Second))
13419 0 : continue;
13420 :
13421 : assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13422 0 : --GlobalLSCost.Loads;
13423 : // Move to the next pair.
13424 : Second = nullptr;
13425 : }
13426 : }
13427 :
13428 : /// Check the profitability of all involved LoadedSlice.
13429 : /// Currently, it is considered profitable if there is exactly two
13430 : /// involved slices (1) which are (2) next to each other in memory, and
13431 : /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13432 : ///
13433 : /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13434 : /// the elements themselves.
13435 : ///
13436 : /// FIXME: When the cost model will be mature enough, we can relax
13437 : /// constraints (1) and (2).
13438 127 : static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13439 : const APInt &UsedBits, bool ForCodeSize) {
13440 127 : unsigned NumberOfSlices = LoadedSlices.size();
13441 127 : if (StressLoadSlicing)
13442 2 : return NumberOfSlices > 1;
13443 :
13444 : // Check (1).
13445 125 : if (NumberOfSlices != 2)
13446 : return false;
13447 :
13448 : // Check (2).
13449 22 : if (!areUsedBitsDense(UsedBits))
13450 : return false;
13451 :
13452 : // Check (3).
13453 20 : LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13454 : // The original code has one big load.
13455 20 : OrigCost.Loads = 1;
13456 60 : for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13457 40 : const LoadedSlice &LS = LoadedSlices[CurrSlice];
13458 : // Accumulate the cost of all the slices.
13459 40 : LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13460 : GlobalSlicingCost += SliceCost;
13461 :
13462 : // Account as cost in the original configuration the gain obtained
13463 : // with the current slices.
13464 40 : OrigCost.addSliceGain(LS);
13465 : }
13466 :
13467 : // If the target supports paired load, adjust the cost accordingly.
13468 20 : adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13469 20 : return OrigCost > GlobalSlicingCost;
13470 : }
13471 :
13472 : /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13473 : /// operations, split it in the various pieces being extracted.
13474 : ///
13475 : /// This sort of thing is introduced by SROA.
13476 : /// This slicing takes care not to insert overlapping loads.
13477 : /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13478 6154316 : bool DAGCombiner::SliceUpLoad(SDNode *N) {
13479 6154316 : if (Level < AfterLegalizeDAG)
13480 : return false;
13481 :
13482 : LoadSDNode *LD = cast<LoadSDNode>(N);
13483 2591608 : if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13484 5048014 : !LD->getValueType(0).isInteger())
13485 129236 : return false;
13486 :
13487 : // Keep track of already used bits to detect overlapping values.
13488 : // In that case, we will just abort the transformation.
13489 : APInt UsedBits(LD->getValueSizeInBits(0), 0);
13490 :
13491 2462372 : SmallVector<LoadedSlice, 4> LoadedSlices;
13492 :
13493 : // Check if this load is used as several smaller chunks of bits.
13494 : // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13495 : // of computation for each trunc.
13496 2462372 : for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13497 2979559 : UI != UIEnd; ++UI) {
13498 : // Skip the uses of the chain.
13499 2979432 : if (UI.getUse().getResNo() != 0)
13500 516902 : continue;
13501 :
13502 : SDNode *User = *UI;
13503 : unsigned Shift = 0;
13504 :
13505 : // Check if this is a trunc(lshr).
13506 2462530 : if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13507 8208 : isa<ConstantSDNode>(User->getOperand(1))) {
13508 3376 : Shift = User->getConstantOperandVal(1);
13509 : User = *User->use_begin();
13510 : }
13511 :
13512 : // At this point, User is a Truncate, iff we encountered, trunc or
13513 : // trunc(lshr).
13514 2462530 : if (User->getOpcode() != ISD::TRUNCATE)
13515 2462245 : return false;
13516 :
13517 : // The width of the type must be a power of 2 and greater than 8-bits.
13518 : // Otherwise the load cannot be represented in LLVM IR.
13519 : // Moreover, if we shifted with a non-8-bits multiple, the slice
13520 : // will be across several bytes. We do not support that.
13521 : unsigned Width = User->getValueSizeInBits(0);
13522 2076 : if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13523 : return false;
13524 :
13525 : // Build the slice for this chain of computations.
13526 991 : LoadedSlice LS(User, LD, Shift, &DAG);
13527 991 : APInt CurrentUsedBits = LS.getUsedBits();
13528 :
13529 : // Check if this slice overlaps with another.
13530 991 : if ((CurrentUsedBits & UsedBits) != 0)
13531 : return false;
13532 : // Update the bits used globally.
13533 : UsedBits |= CurrentUsedBits;
13534 :
13535 : // Check if the new slice would be legal.
13536 974 : if (!LS.isLegal())
13537 : return false;
13538 :
13539 : // Record the slice.
13540 285 : LoadedSlices.push_back(LS);
13541 : }
13542 :
13543 : // Abort slicing if it does not seem to be profitable.
13544 127 : if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13545 : return false;
13546 :
13547 : ++SlicedLoads;
13548 :
13549 : // Rewrite each chain to use an independent load.
13550 : // By construction, each chain can be represented by a unique load.
13551 :
13552 : // Prepare the argument for the new token factor for all the slices.
13553 : SmallVector<SDValue, 8> ArgChains;
13554 10 : for (SmallVectorImpl<LoadedSlice>::const_iterator
13555 : LSIt = LoadedSlices.begin(),
13556 : LSItEnd = LoadedSlices.end();
13557 15 : LSIt != LSItEnd; ++LSIt) {
13558 10 : SDValue SliceInst = LSIt->loadSlice();
13559 10 : CombineTo(LSIt->Inst, SliceInst, true);
13560 10 : if (SliceInst.getOpcode() != ISD::LOAD)
13561 1 : SliceInst = SliceInst.getOperand(0);
13562 : assert(SliceInst->getOpcode() == ISD::LOAD &&
13563 : "It takes more than a zext to get to the loaded slice!!");
13564 10 : ArgChains.push_back(SliceInst.getValue(1));
13565 : }
13566 :
13567 5 : SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13568 5 : ArgChains);
13569 10 : DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13570 5 : AddToWorklist(Chain.getNode());
13571 : return true;
13572 : }
13573 :
13574 : /// Check to see if V is (and load (ptr), imm), where the load is having
13575 : /// specific bytes cleared out. If so, return the byte size being masked out
13576 : /// and the shift amount.
13577 : static std::pair<unsigned, unsigned>
13578 0 : CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13579 : std::pair<unsigned, unsigned> Result(0, 0);
13580 :
13581 : // Check for the structure we're looking for.
13582 0 : if (V->getOpcode() != ISD::AND ||
13583 0 : !isa<ConstantSDNode>(V->getOperand(1)) ||
13584 0 : !ISD::isNormalLoad(V->getOperand(0).getNode()))
13585 0 : return Result;
13586 :
13587 : // Check the chain and pointer.
13588 : LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13589 0 : if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
13590 :
13591 : // This only handles simple types.
13592 0 : if (V.getValueType() != MVT::i16 &&
13593 0 : V.getValueType() != MVT::i32 &&
13594 0 : V.getValueType() != MVT::i64)
13595 0 : return Result;
13596 :
13597 : // Check the constant mask. Invert it so that the bits being masked out are
13598 : // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
13599 : // follow the sign bit for uniformity.
13600 0 : uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13601 0 : unsigned NotMaskLZ = countLeadingZeros(NotMask);
13602 0 : if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
13603 0 : unsigned NotMaskTZ = countTrailingZeros(NotMask);
13604 0 : if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
13605 0 : if (NotMaskLZ == 64) return Result; // All zero mask.
13606 :
13607 : // See if we have a continuous run of bits. If so, we have 0*1+0*
13608 0 : if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13609 0 : return Result;
13610 :
13611 : // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13612 0 : if (V.getValueType() != MVT::i64 && NotMaskLZ)
13613 0 : NotMaskLZ -= 64-V.getValueSizeInBits();
13614 :
13615 0 : unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13616 0 : switch (MaskedBytes) {
13617 : case 1:
13618 : case 2:
13619 : case 4: break;
13620 0 : default: return Result; // All one mask, or 5-byte mask.
13621 : }
13622 :
13623 : // Verify that the first bit starts at a multiple of mask so that the access
13624 : // is aligned the same as the access width.
13625 0 : if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13626 :
13627 : // For narrowing to be valid, it must be the case that the load the
13628 : // immediately preceeding memory operation before the store.
13629 0 : if (LD == Chain.getNode())
13630 : ; // ok.
13631 0 : else if (Chain->getOpcode() == ISD::TokenFactor &&
13632 0 : SDValue(LD, 1).hasOneUse()) {
13633 : // LD has only 1 chain use so they are no indirect dependencies.
13634 : bool isOk = false;
13635 0 : for (const SDValue &ChainOp : Chain->op_values())
13636 0 : if (ChainOp.getNode() == LD) {
13637 : isOk = true;
13638 : break;
13639 : }
13640 0 : if (!isOk)
13641 0 : return Result;
13642 : } else
13643 0 : return Result; // Fail.
13644 :
13645 : Result.first = MaskedBytes;
13646 0 : Result.second = NotMaskTZ/8;
13647 0 : return Result;
13648 : }
13649 :
13650 : /// Check to see if IVal is something that provides a value as specified by
13651 : /// MaskInfo. If so, replace the specified store with a narrower store of
13652 : /// truncated IVal.
13653 : static SDNode *
13654 0 : ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
13655 : SDValue IVal, StoreSDNode *St,
13656 : DAGCombiner *DC) {
13657 0 : unsigned NumBytes = MaskInfo.first;
13658 0 : unsigned ByteShift = MaskInfo.second;
13659 0 : SelectionDAG &DAG = DC->getDAG();
13660 :
13661 : // Check to see if IVal is all zeros in the part being masked in by the 'or'
13662 : // that uses this. If not, this is not a replacement.
13663 0 : APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
13664 0 : ByteShift*8, (ByteShift+NumBytes)*8);
13665 0 : if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
13666 :
13667 : // Check that it is legal on the target to do this. It is legal if the new
13668 : // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
13669 : // legalization.
13670 0 : MVT VT = MVT::getIntegerVT(NumBytes*8);
13671 0 : if (!DC->isTypeLegal(VT))
13672 0 : return nullptr;
13673 :
13674 : // Okay, we can do this! Replace the 'St' store with a store of IVal that is
13675 : // shifted by ByteShift and truncated down to NumBytes.
13676 0 : if (ByteShift) {
13677 0 : SDLoc DL(IVal);
13678 0 : IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
13679 : DAG.getConstant(ByteShift*8, DL,
13680 0 : DC->getShiftAmountTy(IVal.getValueType())));
13681 : }
13682 :
13683 : // Figure out the offset for the store and the alignment of the access.
13684 : unsigned StOffset;
13685 0 : unsigned NewAlign = St->getAlignment();
13686 :
13687 0 : if (DAG.getDataLayout().isLittleEndian())
13688 : StOffset = ByteShift;
13689 : else
13690 0 : StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
13691 :
13692 0 : SDValue Ptr = St->getBasePtr();
13693 0 : if (StOffset) {
13694 0 : SDLoc DL(IVal);
13695 0 : Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
13696 0 : Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13697 0 : NewAlign = MinAlign(NewAlign, StOffset);
13698 : }
13699 :
13700 : // Truncate down to the new size.
13701 0 : IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13702 :
13703 : ++OpsNarrowed;
13704 : return DAG
13705 0 : .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13706 0 : St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13707 0 : .getNode();
13708 : }
13709 :
13710 : /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13711 : /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13712 : /// narrowing the load and store if it would end up being a win for performance
13713 : /// or code size.
13714 7415489 : SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13715 : StoreSDNode *ST = cast<StoreSDNode>(N);
13716 7415489 : if (ST->isVolatile())
13717 40346 : return SDValue();
13718 :
13719 7375143 : SDValue Chain = ST->getChain();
13720 7375143 : SDValue Value = ST->getValue();
13721 7375143 : SDValue Ptr = ST->getBasePtr();
13722 7375143 : EVT VT = Value.getValueType();
13723 :
13724 20867955 : if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13725 2579393 : return SDValue();
13726 :
13727 : unsigned Opc = Value.getOpcode();
13728 :
13729 : // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13730 : // is a byte mask indicating a consecutive number of bytes, check to see if
13731 : // Y is known to provide just those bytes. If so, we try to replace the
13732 : // load + replace + store sequence with a single (narrower) store, which makes
13733 : // the load dead.
13734 4795750 : if (Opc == ISD::OR) {
13735 : std::pair<unsigned, unsigned> MaskedLoad;
13736 16638 : MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13737 16638 : if (MaskedLoad.first)
13738 1020 : if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13739 : Value.getOperand(1), ST,this))
13740 20 : return SDValue(NewST, 0);
13741 :
13742 : // Or is commutative, so try swapping X and Y.
13743 16618 : MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13744 16618 : if (MaskedLoad.first)
13745 4 : if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13746 : Value.getOperand(0), ST,this))
13747 4 : return SDValue(NewST, 0);
13748 : }
13749 :
13750 4795726 : if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13751 33595 : Value.getOperand(1).getOpcode() != ISD::Constant)
13752 4770879 : return SDValue();
13753 :
13754 24847 : SDValue N0 = Value.getOperand(0);
13755 9068 : if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13756 : Chain == SDValue(N0.getNode(), 1)) {
13757 : LoadSDNode *LD = cast<LoadSDNode>(N0);
13758 798 : if (LD->getBasePtr() != Ptr ||
13759 1596 : LD->getPointerInfo().getAddrSpace() !=
13760 1596 : ST->getPointerInfo().getAddrSpace())
13761 2620 : return SDValue();
13762 :
13763 : // Find the type to narrow it the load / op / store to.
13764 798 : SDValue N1 = Value.getOperand(1);
13765 798 : unsigned BitWidth = N1.getValueSizeInBits();
13766 798 : APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13767 798 : if (Opc == ISD::AND)
13768 344 : Imm ^= APInt::getAllOnesValue(BitWidth);
13769 1596 : if (Imm == 0 || Imm.isAllOnesValue())
13770 11 : return SDValue();
13771 787 : unsigned ShAmt = Imm.countTrailingZeros();
13772 787 : unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13773 787 : unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13774 787 : EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13775 : // The narrowing should be profitable, the load/store operation should be
13776 : // legal (or custom) and the store size should be equal to the NewVT width.
13777 2659 : while (NewBW < BitWidth &&
13778 242 : (NewVT.getStoreSizeInBits() != NewBW ||
13779 376 : !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13780 134 : !TLI.isNarrowingProfitable(VT, NewVT))) {
13781 1872 : NewBW = NextPowerOf2(NewBW);
13782 1872 : NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13783 : }
13784 787 : if (NewBW >= BitWidth)
13785 662 : return SDValue();
13786 :
13787 : // If the lsb changed does not start at the type bitwidth boundary,
13788 : // start at the previous one.
13789 125 : if (ShAmt % NewBW)
13790 105 : ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13791 : APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13792 242 : std::min(BitWidth, ShAmt + NewBW));
13793 125 : if ((Imm & Mask) == Imm) {
13794 233 : APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13795 116 : if (Opc == ISD::AND)
13796 4 : NewImm ^= APInt::getAllOnesValue(NewBW);
13797 116 : uint64_t PtrOff = ShAmt / 8;
13798 : // For big endian targets, we need to adjust the offset to the pointer to
13799 : // load the correct bytes.
13800 116 : if (DAG.getDataLayout().isBigEndian())
13801 0 : PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
13802 :
13803 116 : unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
13804 116 : Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
13805 116 : if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
13806 0 : return SDValue();
13807 :
13808 232 : SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
13809 : Ptr.getValueType(), Ptr,
13810 116 : DAG.getConstant(PtrOff, SDLoc(LD),
13811 232 : Ptr.getValueType()));
13812 : SDValue NewLD =
13813 232 : DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
13814 : LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
13815 464 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
13816 232 : SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
13817 116 : DAG.getConstant(NewImm, SDLoc(Value),
13818 232 : NewVT));
13819 : SDValue NewST =
13820 116 : DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
13821 348 : ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
13822 :
13823 116 : AddToWorklist(NewPtr.getNode());
13824 116 : AddToWorklist(NewLD.getNode());
13825 116 : AddToWorklist(NewVal.getNode());
13826 : WorklistRemover DeadNodes(*this);
13827 116 : DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
13828 : ++OpsNarrowed;
13829 116 : return NewST;
13830 : }
13831 : }
13832 :
13833 21438 : return SDValue();
13834 : }
13835 :
13836 : /// For a given floating point load / store pair, if the load value isn't used
13837 : /// by any other operations, then consider transforming the pair to integer
13838 : /// load / store operations if the target deems the transformation profitable.
13839 7832750 : SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
13840 : StoreSDNode *ST = cast<StoreSDNode>(N);
13841 7832750 : SDValue Chain = ST->getChain();
13842 7832750 : SDValue Value = ST->getValue();
13843 2018023 : if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
13844 : Value.hasOneUse() &&
13845 : Chain == SDValue(Value.getNode(), 1)) {
13846 : LoadSDNode *LD = cast<LoadSDNode>(Value);
13847 640615 : EVT VT = LD->getMemoryVT();
13848 36 : if (!VT.isFloatingPoint() ||
13849 6941 : VT != ST->getMemoryVT() ||
13850 6941 : LD->isNonTemporal() ||
13851 13882 : ST->isNonTemporal() ||
13852 660748 : LD->getPointerInfo().getAddrSpace() != 0 ||
13853 12502 : ST->getPointerInfo().getAddrSpace() != 0)
13854 634364 : return SDValue();
13855 :
13856 6251 : EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
13857 6251 : if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
13858 2308 : !TLI.isOperationLegal(ISD::STORE, IntVT) ||
13859 2311 : !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
13860 3 : !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
13861 6248 : return SDValue();
13862 :
13863 3 : unsigned LDAlign = LD->getAlignment();
13864 3 : unsigned STAlign = ST->getAlignment();
13865 3 : Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
13866 3 : unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
13867 3 : if (LDAlign < ABIAlign || STAlign < ABIAlign)
13868 0 : return SDValue();
13869 :
13870 : SDValue NewLD =
13871 3 : DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
13872 3 : LD->getPointerInfo(), LDAlign);
13873 :
13874 : SDValue NewST =
13875 3 : DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
13876 3 : ST->getPointerInfo(), STAlign);
13877 :
13878 3 : AddToWorklist(NewLD.getNode());
13879 3 : AddToWorklist(NewST.getNode());
13880 : WorklistRemover DeadNodes(*this);
13881 3 : DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
13882 : ++LdStFP2Int;
13883 3 : return NewST;
13884 : }
13885 :
13886 7192135 : return SDValue();
13887 : }
13888 :
13889 : // This is a helper function for visitMUL to check the profitability
13890 : // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
13891 : // MulNode is the original multiply, AddNode is (add x, c1),
13892 : // and ConstNode is c2.
13893 : //
13894 : // If the (add x, c1) has multiple uses, we could increase
13895 : // the number of adds if we make this transformation.
13896 : // It would only be worth doing this if we can remove a
13897 : // multiply in the process. Check for that here.
13898 : // To illustrate:
13899 : // (A + c1) * c3
13900 : // (A + c2) * c3
13901 : // We're checking for cases where we have common "c3 * A" expressions.
13902 0 : bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
13903 : SDValue &AddNode,
13904 : SDValue &ConstNode) {
13905 : APInt Val;
13906 :
13907 : // If the add only has one use, this would be OK to do.
13908 0 : if (AddNode.getNode()->hasOneUse())
13909 0 : return true;
13910 :
13911 : // Walk all the users of the constant with which we're multiplying.
13912 0 : for (SDNode *Use : ConstNode->uses()) {
13913 0 : if (Use == MulNode) // This use is the one we're on right now. Skip it.
13914 0 : continue;
13915 :
13916 0 : if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
13917 : SDNode *OtherOp;
13918 0 : SDNode *MulVar = AddNode.getOperand(0).getNode();
13919 :
13920 : // OtherOp is what we're multiplying against the constant.
13921 0 : if (Use->getOperand(0) == ConstNode)
13922 0 : OtherOp = Use->getOperand(1).getNode();
13923 : else
13924 : OtherOp = Use->getOperand(0).getNode();
13925 :
13926 : // Check to see if multiply is with the same operand of our "add".
13927 : //
13928 : // ConstNode = CONST
13929 : // Use = ConstNode * A <-- visiting Use. OtherOp is A.
13930 : // ...
13931 : // AddNode = (A + c1) <-- MulVar is A.
13932 : // = AddNode * ConstNode <-- current visiting instruction.
13933 : //
13934 : // If we make this transformation, we will have a common
13935 : // multiply (ConstNode * A) that we can save.
13936 0 : if (OtherOp == MulVar)
13937 0 : return true;
13938 :
13939 : // Now check to see if a future expansion will give us a common
13940 : // multiply.
13941 : //
13942 : // ConstNode = CONST
13943 : // AddNode = (A + c1)
13944 : // ... = AddNode * ConstNode <-- current visiting instruction.
13945 : // ...
13946 : // OtherOp = (A + c2)
13947 : // Use = OtherOp * ConstNode <-- visiting Use.
13948 : //
13949 : // If we make this transformation, we will have a common
13950 : // multiply (CONST * A) after we also do the same transformation
13951 : // to the "t2" instruction.
13952 0 : if (OtherOp->getOpcode() == ISD::ADD &&
13953 0 : DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
13954 0 : OtherOp->getOperand(0).getNode() == MulVar)
13955 0 : return true;
13956 : }
13957 : }
13958 :
13959 : // Didn't find a case where this would be profitable.
13960 : return false;
13961 : }
13962 :
13963 0 : SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
13964 : unsigned NumStores) {
13965 : SmallVector<SDValue, 8> Chains;
13966 : SmallPtrSet<const SDNode *, 8> Visited;
13967 0 : SDLoc StoreDL(StoreNodes[0].MemNode);
13968 :
13969 0 : for (unsigned i = 0; i < NumStores; ++i) {
13970 0 : Visited.insert(StoreNodes[i].MemNode);
13971 : }
13972 :
13973 : // don't include nodes that are children
13974 0 : for (unsigned i = 0; i < NumStores; ++i) {
13975 0 : if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
13976 0 : Chains.push_back(StoreNodes[i].MemNode->getChain());
13977 : }
13978 :
13979 : assert(Chains.size() > 0 && "Chain should have generated a chain");
13980 0 : return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
13981 : }
13982 :
13983 1087 : bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
13984 : SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
13985 : bool IsConstantSrc, bool UseVector, bool UseTrunc) {
13986 : // Make sure we have something to merge.
13987 1087 : if (NumStores < 2)
13988 : return false;
13989 :
13990 : // The latest Node in the DAG.
13991 1087 : SDLoc DL(StoreNodes[0].MemNode);
13992 :
13993 : int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
13994 1087 : unsigned SizeInBits = NumStores * ElementSizeBits;
13995 1087 : unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13996 :
13997 : EVT StoreTy;
13998 1087 : if (UseVector) {
13999 309 : unsigned Elts = NumStores * NumMemElts;
14000 : // Get the type for the merged vector store.
14001 309 : StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14002 : } else
14003 778 : StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14004 :
14005 : SDValue StoredVal;
14006 1087 : if (UseVector) {
14007 309 : if (IsConstantSrc) {
14008 : SmallVector<SDValue, 8> BuildVector;
14009 655 : for (unsigned I = 0; I != NumStores; ++I) {
14010 948 : StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14011 474 : SDValue Val = St->getValue();
14012 : // If constant is of the wrong type, convert it now.
14013 948 : if (MemVT != Val.getValueType()) {
14014 2 : Val = peekThroughBitcasts(Val);
14015 : // Deal with constants of wrong size.
14016 2 : if (ElementSizeBits != Val.getValueSizeInBits()) {
14017 : EVT IntMemVT =
14018 0 : EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14019 : if (isa<ConstantFPSDNode>(Val)) {
14020 : // Not clear how to truncate FP values.
14021 0 : return false;
14022 : } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14023 0 : Val = DAG.getConstant(C->getAPIntValue()
14024 0 : .zextOrTrunc(Val.getValueSizeInBits())
14025 0 : .zextOrTrunc(ElementSizeBits),
14026 0 : SDLoc(C), IntMemVT);
14027 : }
14028 : // Make sure correctly size type is the correct type.
14029 2 : Val = DAG.getBitcast(MemVT, Val);
14030 : }
14031 474 : BuildVector.push_back(Val);
14032 : }
14033 181 : StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14034 : : ISD::BUILD_VECTOR,
14035 362 : DL, StoreTy, BuildVector);
14036 : } else {
14037 : SmallVector<SDValue, 8> Ops;
14038 446 : for (unsigned i = 0; i < NumStores; ++i) {
14039 636 : StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14040 318 : SDValue Val = peekThroughBitcasts(St->getValue());
14041 : // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14042 : // type MemVT. If the underlying value is not the correct
14043 : // type, but it is an extraction of an appropriate vector we
14044 : // can recast Val to be of the correct type. This may require
14045 : // converting between EXTRACT_VECTOR_ELT and
14046 : // EXTRACT_SUBVECTOR.
14047 636 : if ((MemVT != Val.getValueType()) &&
14048 8 : (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14049 : Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14050 10 : EVT MemVTScalarTy = MemVT.getScalarType();
14051 : // We may need to add a bitcast here to get types to line up.
14052 20 : if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14053 10 : Val = DAG.getBitcast(MemVT, Val);
14054 : } else {
14055 0 : unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14056 : : ISD::EXTRACT_VECTOR_ELT;
14057 0 : SDValue Vec = Val.getOperand(0);
14058 0 : SDValue Idx = Val.getOperand(1);
14059 0 : Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14060 : }
14061 : }
14062 318 : Ops.push_back(Val);
14063 : }
14064 :
14065 : // Build the extracted vector elements back into a vector.
14066 128 : StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14067 : : ISD::BUILD_VECTOR,
14068 240 : DL, StoreTy, Ops);
14069 : }
14070 : } else {
14071 : // We should always use a vector store when merging extracted vector
14072 : // elements, so this path implies a store of constants.
14073 : assert(IsConstantSrc && "Merged vector elements should use vector store");
14074 :
14075 : APInt StoreInt(SizeInBits, 0);
14076 :
14077 : // Construct a single integer constant which is made of the smaller
14078 : // constant inputs.
14079 778 : bool IsLE = DAG.getDataLayout().isLittleEndian();
14080 2978 : for (unsigned i = 0; i < NumStores; ++i) {
14081 2200 : unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14082 4400 : StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14083 :
14084 2200 : SDValue Val = St->getValue();
14085 2200 : Val = peekThroughBitcasts(Val);
14086 2200 : StoreInt <<= ElementSizeBits;
14087 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14088 2164 : StoreInt |= C->getAPIntValue()
14089 4328 : .zextOrTrunc(ElementSizeBits)
14090 4328 : .zextOrTrunc(SizeInBits);
14091 : } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14092 36 : StoreInt |= C->getValueAPF()
14093 36 : .bitcastToAPInt()
14094 72 : .zextOrTrunc(ElementSizeBits)
14095 72 : .zextOrTrunc(SizeInBits);
14096 : // If fp truncation is necessary give up for now.
14097 36 : if (MemVT.getSizeInBits() != ElementSizeBits)
14098 0 : return false;
14099 : } else {
14100 0 : llvm_unreachable("Invalid constant element type");
14101 : }
14102 : }
14103 :
14104 : // Create the new Load and Store operations.
14105 778 : StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14106 : }
14107 :
14108 1087 : LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14109 1087 : SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14110 :
14111 : // make sure we use trunc store if it's necessary to be legal.
14112 : SDValue NewStore;
14113 1087 : if (!UseTrunc) {
14114 1048 : NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14115 1048 : FirstInChain->getPointerInfo(),
14116 2096 : FirstInChain->getAlignment());
14117 : } else { // Must be realized as a trunc store
14118 : EVT LegalizedStoredValTy =
14119 78 : TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14120 39 : unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14121 : ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14122 : SDValue ExtendedStoreVal =
14123 78 : DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14124 39 : LegalizedStoredValTy);
14125 39 : NewStore = DAG.getTruncStore(
14126 : NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14127 39 : FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14128 : FirstInChain->getAlignment(),
14129 78 : FirstInChain->getMemOperand()->getFlags());
14130 : }
14131 :
14132 : // Replace all merged stores with the new store.
14133 4079 : for (unsigned i = 0; i < NumStores; ++i)
14134 5984 : CombineTo(StoreNodes[i].MemNode, NewStore);
14135 :
14136 1087 : AddToWorklist(NewChain.getNode());
14137 1087 : return true;
14138 : }
14139 :
14140 292861 : void DAGCombiner::getStoreMergeCandidates(
14141 : StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14142 : SDNode *&RootNode) {
14143 : // This holds the base pointer, index, and the offset in bytes from the base
14144 : // pointer.
14145 292861 : BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14146 292861 : EVT MemVT = St->getMemoryVT();
14147 :
14148 292861 : SDValue Val = peekThroughBitcasts(St->getValue());
14149 : // We must have a base and an offset.
14150 292861 : if (!BasePtr.getBase().getNode())
14151 22512 : return;
14152 :
14153 : // Do not handle stores to undef base pointers.
14154 292856 : if (BasePtr.getBase().isUndef())
14155 : return;
14156 :
14157 291565 : bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14158 291565 : bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14159 291565 : Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14160 291565 : bool IsLoadSrc = isa<LoadSDNode>(Val);
14161 291565 : BaseIndexOffset LBasePtr;
14162 : // Match on loadbaseptr if relevant.
14163 291565 : EVT LoadVT;
14164 291565 : if (IsLoadSrc) {
14165 : auto *Ld = cast<LoadSDNode>(Val);
14166 70374 : LBasePtr = BaseIndexOffset::match(Ld, DAG);
14167 70374 : LoadVT = Ld->getMemoryVT();
14168 : // Load and store should be the same type.
14169 70374 : if (MemVT != LoadVT)
14170 : return;
14171 : // Loads must only have one use.
14172 66746 : if (!Ld->hasNUsesOfValue(1, 0))
14173 : return;
14174 : // The memory operands must not be volatile.
14175 56258 : if (Ld->isVolatile() || Ld->isIndexed())
14176 : return;
14177 : }
14178 : auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14179 : int64_t &Offset) -> bool {
14180 : if (Other->isVolatile() || Other->isIndexed())
14181 : return false;
14182 : SDValue Val = peekThroughBitcasts(Other->getValue());
14183 : // Allow merging constants of different types as integers.
14184 : bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14185 : : Other->getMemoryVT() != MemVT;
14186 : if (IsLoadSrc) {
14187 : if (NoTypeMatch)
14188 : return false;
14189 : // The Load's Base Ptr must also match
14190 : if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14191 : auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14192 : if (LoadVT != OtherLd->getMemoryVT())
14193 : return false;
14194 : // Loads must only have one use.
14195 : if (!OtherLd->hasNUsesOfValue(1, 0))
14196 : return false;
14197 : // The memory operands must not be volatile.
14198 : if (OtherLd->isVolatile() || OtherLd->isIndexed())
14199 : return false;
14200 : if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14201 : return false;
14202 : } else
14203 : return false;
14204 : }
14205 : if (IsConstantSrc) {
14206 : if (NoTypeMatch)
14207 : return false;
14208 : if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14209 : return false;
14210 : }
14211 : if (IsExtractVecSrc) {
14212 : // Do not merge truncated stores here.
14213 : if (Other->isTruncatingStore())
14214 : return false;
14215 : if (!MemVT.bitsEq(Val.getValueType()))
14216 : return false;
14217 : if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14218 : Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14219 : return false;
14220 : }
14221 : Ptr = BaseIndexOffset::match(Other, DAG);
14222 : return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14223 270349 : };
14224 :
14225 : // We looking for a root node which is an ancestor to all mergable
14226 : // stores. We search up through a load, to our root and then down
14227 : // through all children. For instance we will find Store{1,2,3} if
14228 : // St is Store1, Store2. or Store3 where the root is not a load
14229 : // which always true for nonvolatile ops. TODO: Expand
14230 : // the search to find all valid candidates through multiple layers of loads.
14231 : //
14232 : // Root
14233 : // |-------|-------|
14234 : // Load Load Store3
14235 : // | |
14236 : // Store1 Store2
14237 : //
14238 : // FIXME: We should be able to climb and
14239 : // descend TokenFactors to find candidates as well.
14240 :
14241 270349 : RootNode = St->getChain().getNode();
14242 :
14243 : if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14244 16733 : RootNode = Ldn->getChain().getNode();
14245 57750 : for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14246 41017 : if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14247 157841 : for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14248 134984 : if (I2.getOperandNo() == 0)
14249 : if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14250 105519 : BaseIndexOffset Ptr;
14251 : int64_t PtrDiff;
14252 105519 : if (CandidateMatch(OtherST, Ptr, PtrDiff))
14253 82631 : StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14254 : }
14255 : } else
14256 1855035 : for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14257 1601419 : if (I.getOperandNo() == 0)
14258 : if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14259 1205878 : BaseIndexOffset Ptr;
14260 : int64_t PtrDiff;
14261 1205878 : if (CandidateMatch(OtherST, Ptr, PtrDiff))
14262 826809 : StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14263 : }
14264 : }
14265 :
14266 : // We need to check that merging these stores does not cause a loop in
14267 : // the DAG. Any store candidate may depend on another candidate
14268 : // indirectly through its operand (we already consider dependencies
14269 : // through the chain). Check in parallel by searching up from
14270 : // non-chain operands of candidates.
14271 0 : bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14272 : SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14273 : SDNode *RootNode) {
14274 : // FIXME: We should be able to truncate a full search of
14275 : // predecessors by doing a BFS and keeping tabs the originating
14276 : // stores from which worklist nodes come from in a similar way to
14277 : // TokenFactor simplfication.
14278 :
14279 : SmallPtrSet<const SDNode *, 32> Visited;
14280 : SmallVector<const SDNode *, 8> Worklist;
14281 :
14282 : // RootNode is a predecessor to all candidates so we need not search
14283 : // past it. Add RootNode (peeking through TokenFactors). Do not count
14284 : // these towards size check.
14285 :
14286 0 : Worklist.push_back(RootNode);
14287 0 : while (!Worklist.empty()) {
14288 : auto N = Worklist.pop_back_val();
14289 0 : if (!Visited.insert(N).second)
14290 0 : continue; // Already present in Visited.
14291 0 : if (N->getOpcode() == ISD::TokenFactor) {
14292 0 : for (SDValue Op : N->ops())
14293 0 : Worklist.push_back(Op.getNode());
14294 : }
14295 : }
14296 :
14297 : // Don't count pruning nodes towards max.
14298 0 : unsigned int Max = 1024 + Visited.size();
14299 : // Search Ops of store candidates.
14300 0 : for (unsigned i = 0; i < NumStores; ++i) {
14301 0 : SDNode *N = StoreNodes[i].MemNode;
14302 : // Of the 4 Store Operands:
14303 : // * Chain (Op 0) -> We have already considered these
14304 : // in candidate selection and can be
14305 : // safely ignored
14306 : // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14307 : // * Address (Op 2) -> Merged addresses may only vary by a fixed constant
14308 : // and so no cycles are possible.
14309 : // * (Op 3) -> appears to always be undef. Cannot be source of cycle.
14310 : //
14311 : // Thus we need only check predecessors of the value operands.
14312 0 : auto *Op = N->getOperand(1).getNode();
14313 0 : if (Visited.insert(Op).second)
14314 0 : Worklist.push_back(Op);
14315 : }
14316 : // Search through DAG. We can stop early if we find a store node.
14317 0 : for (unsigned i = 0; i < NumStores; ++i)
14318 0 : if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14319 : Max))
14320 0 : return false;
14321 : return true;
14322 : }
14323 :
14324 7161723 : bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14325 7161723 : if (OptLevel == CodeGenOpt::None)
14326 : return false;
14327 :
14328 2948982 : EVT MemVT = St->getMemoryVT();
14329 2948982 : int64_t ElementSizeBytes = MemVT.getStoreSize();
14330 2948982 : unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14331 :
14332 2948982 : if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14333 : return false;
14334 :
14335 2173596 : bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14336 : Attribute::NoImplicitFloat);
14337 :
14338 : // This function cannot currently deal with non-byte-sized memory sizes.
14339 2173596 : if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14340 : return false;
14341 :
14342 2169832 : if (!MemVT.isSimple())
14343 : return false;
14344 :
14345 : // Perform an early exit check. Do not bother looking at stored values that
14346 : // are not constants, loads, or extracted vector elements.
14347 2168096 : SDValue StoredVal = peekThroughBitcasts(St->getValue());
14348 : bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14349 : bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14350 : isa<ConstantFPSDNode>(StoredVal);
14351 2168096 : bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14352 : StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14353 :
14354 2168096 : if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14355 : return false;
14356 :
14357 292861 : SmallVector<MemOpLink, 8> StoreNodes;
14358 : SDNode *RootNode;
14359 : // Find potential store merge candidates by searching through chain sub-DAG
14360 292861 : getStoreMergeCandidates(St, StoreNodes, RootNode);
14361 :
14362 : // Check if there is anything to merge.
14363 292861 : if (StoreNodes.size() < 2)
14364 : return false;
14365 :
14366 : // Sort the memory operands according to their distance from the
14367 : // base pointer.
14368 128311 : llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14369 0 : return LHS.OffsetFromBase < RHS.OffsetFromBase;
14370 : });
14371 :
14372 : // Store Merge attempts to merge the lowest stores. This generally
14373 : // works out as if successful, as the remaining stores are checked
14374 : // after the first collection of stores is merged. However, in the
14375 : // case that a non-mergeable store is found first, e.g., {p[-2],
14376 : // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14377 : // mergeable cases. To prevent this, we prune such stores from the
14378 : // front of StoreNodes here.
14379 :
14380 : bool RV = false;
14381 484864 : while (StoreNodes.size() > 1) {
14382 : unsigned StartIdx = 0;
14383 185704 : while ((StartIdx + 1 < StoreNodes.size()) &&
14384 335462 : StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14385 167731 : StoreNodes[StartIdx + 1].OffsetFromBase)
14386 : ++StartIdx;
14387 :
14388 : // Bail if we don't have enough candidates to merge.
14389 132094 : if (StartIdx + 1 >= StoreNodes.size())
14390 17973 : return RV;
14391 :
14392 114121 : if (StartIdx)
14393 11397 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14394 :
14395 : // Scan the memory operations on the chain and find the first
14396 : // non-consecutive store memory address.
14397 114121 : unsigned NumConsecutiveStores = 1;
14398 114121 : int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14399 : // Check that the addresses are consecutive starting from the second
14400 : // element in the list of stores.
14401 702071 : for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14402 595111 : int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14403 595111 : if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14404 : break;
14405 587950 : NumConsecutiveStores = i + 1;
14406 : }
14407 :
14408 114121 : if (NumConsecutiveStores < 2) {
14409 : StoreNodes.erase(StoreNodes.begin(),
14410 0 : StoreNodes.begin() + NumConsecutiveStores);
14411 98454 : continue;
14412 : }
14413 :
14414 : // The node with the lowest store address.
14415 114121 : LLVMContext &Context = *DAG.getContext();
14416 114121 : const DataLayout &DL = DAG.getDataLayout();
14417 :
14418 : // Store the constants into memory as one consecutive store.
14419 114121 : if (IsConstantSrc) {
14420 196515 : while (NumConsecutiveStores >= 2) {
14421 100025 : LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14422 : unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14423 100025 : unsigned FirstStoreAlign = FirstInChain->getAlignment();
14424 : unsigned LastLegalType = 1;
14425 : unsigned LastLegalVectorType = 1;
14426 : bool LastIntegerTrunc = false;
14427 : bool NonZero = false;
14428 : unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14429 430666 : for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14430 702772 : StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14431 351386 : SDValue StoredVal = ST->getValue();
14432 : bool IsElementZero = false;
14433 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14434 346375 : IsElementZero = C->isNullValue();
14435 : else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14436 5007 : IsElementZero = C->getConstantFPValue()->isNullValue();
14437 351382 : if (IsElementZero) {
14438 315458 : if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14439 : FirstZeroAfterNonZero = i;
14440 : }
14441 351386 : NonZero |= !IsElementZero;
14442 :
14443 : // Find a legal type for the constant store.
14444 351386 : unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14445 351386 : EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14446 351386 : bool IsFast = false;
14447 :
14448 : // Break early when size is too large to be legal.
14449 351386 : if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14450 : break;
14451 :
14452 431059 : if (TLI.isTypeLegal(StoreTy) &&
14453 200119 : TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14454 99701 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14455 97520 : FirstStoreAlign, &IsFast) &&
14456 : IsFast) {
14457 : LastIntegerTrunc = false;
14458 : LastLegalType = i + 1;
14459 : // Or check whether a truncstore is legal.
14460 233121 : } else if (TLI.getTypeAction(Context, StoreTy) ==
14461 : TargetLowering::TypePromoteInteger) {
14462 : EVT LegalizedStoredValTy =
14463 71783 : TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14464 73639 : if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14465 3712 : TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14466 1856 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14467 1185 : FirstStoreAlign, &IsFast) &&
14468 : IsFast) {
14469 : LastIntegerTrunc = true;
14470 : LastLegalType = i + 1;
14471 : }
14472 : }
14473 :
14474 : // We only use vectors if the constant is known to be zero or the
14475 : // target allows it and the function is not marked with the
14476 : // noimplicitfloat attribute.
14477 40595 : if ((!NonZero ||
14478 341654 : TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14479 : !NoVectors) {
14480 : // Find a legal type for the vector store.
14481 300787 : unsigned Elts = (i + 1) * NumMemElts;
14482 300787 : EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14483 423925 : if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14484 121582 : TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14485 60061 : TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14486 58942 : FirstStoreAlign, &IsFast) &&
14487 : IsFast)
14488 : LastLegalVectorType = i + 1;
14489 : }
14490 : }
14491 :
14492 100025 : bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14493 100025 : unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14494 :
14495 : // Check if we found a legal integer type that creates a meaningful
14496 : // merge.
14497 100025 : if (NumElem < 2) {
14498 : // We know that candidate stores are in order and of correct
14499 : // shape. While there is no mergeable sequence from the
14500 : // beginning one may start later in the sequence. The only
14501 : // reason a merge of size N could have failed where another of
14502 : // the same size would not have, is if the alignment has
14503 : // improved or we've dropped a non-zero value. Drop as many
14504 : // candidates as we can here.
14505 : unsigned NumSkip = 1;
14506 483495 : while (
14507 492419 : (NumSkip < NumConsecutiveStores) &&
14508 1068510 : (NumSkip < FirstZeroAfterNonZero) &&
14509 971898 : (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14510 483495 : NumSkip++;
14511 :
14512 99066 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14513 99066 : NumConsecutiveStores -= NumSkip;
14514 99066 : continue;
14515 : }
14516 :
14517 : // Check that we can merge these candidates without causing a cycle.
14518 959 : if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14519 : RootNode)) {
14520 0 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14521 0 : NumConsecutiveStores -= NumElem;
14522 0 : continue;
14523 : }
14524 :
14525 959 : RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14526 : UseVector, LastIntegerTrunc);
14527 :
14528 : // Remove merged stores for next iteration.
14529 959 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14530 959 : NumConsecutiveStores -= NumElem;
14531 : }
14532 96490 : continue;
14533 : }
14534 :
14535 : // When extracting multiple vector elements, try to store them
14536 : // in one vector store rather than a sequence of scalar stores.
14537 17631 : if (IsExtractVecSrc) {
14538 : // Loop on Consecutive Stores on success.
14539 3978 : while (NumConsecutiveStores >= 2) {
14540 2014 : LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14541 : unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14542 2014 : unsigned FirstStoreAlign = FirstInChain->getAlignment();
14543 : unsigned NumStoresToMerge = 1;
14544 11213 : for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14545 : // Find a legal type for the vector store.
14546 9612 : unsigned Elts = (i + 1) * NumMemElts;
14547 : EVT Ty =
14548 9612 : EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14549 : bool IsFast;
14550 :
14551 : // Break early when size is too large to be legal.
14552 9612 : if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14553 : break;
14554 :
14555 10466 : if (TLI.isTypeLegal(Ty) &&
14556 2220 : TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14557 953 : TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14558 357 : FirstStoreAlign, &IsFast) &&
14559 : IsFast)
14560 : NumStoresToMerge = i + 1;
14561 : }
14562 :
14563 : // Check if we found a legal integer type creating a meaningful
14564 : // merge.
14565 2014 : if (NumStoresToMerge < 2) {
14566 : // We know that candidate stores are in order and of correct
14567 : // shape. While there is no mergeable sequence from the
14568 : // beginning one may start later in the sequence. The only
14569 : // reason a merge of size N could have failed where another of
14570 : // the same size would not have, is if the alignment has
14571 : // improved. Drop as many candidates as we can here.
14572 : unsigned NumSkip = 1;
14573 14323 : while (
14574 30535 : (NumSkip < NumConsecutiveStores) &&
14575 28660 : (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14576 14323 : NumSkip++;
14577 :
14578 1882 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14579 1882 : NumConsecutiveStores -= NumSkip;
14580 1882 : continue;
14581 : }
14582 :
14583 : // Check that we can merge these candidates without causing a cycle.
14584 132 : if (!checkMergeStoreCandidatesForDependencies(
14585 : StoreNodes, NumStoresToMerge, RootNode)) {
14586 : StoreNodes.erase(StoreNodes.begin(),
14587 4 : StoreNodes.begin() + NumStoresToMerge);
14588 4 : NumConsecutiveStores -= NumStoresToMerge;
14589 4 : continue;
14590 : }
14591 :
14592 128 : RV |= MergeStoresOfConstantsOrVecElts(
14593 : StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14594 :
14595 : StoreNodes.erase(StoreNodes.begin(),
14596 128 : StoreNodes.begin() + NumStoresToMerge);
14597 128 : NumConsecutiveStores -= NumStoresToMerge;
14598 : }
14599 1964 : continue;
14600 : }
14601 :
14602 : // Below we handle the case of multiple consecutive stores that
14603 : // come from multiple consecutive loads. We merge them into a single
14604 : // wide load and a single wide store.
14605 :
14606 : // Look for load nodes which are used by the stored values.
14607 15667 : SmallVector<MemOpLink, 8> LoadNodes;
14608 :
14609 : // Find acceptable loads. Loads need to have the same chain (token factor),
14610 : // must not be zext, volatile, indexed, and they must be consecutive.
14611 15667 : BaseIndexOffset LdBasePtr;
14612 :
14613 110375 : for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14614 189416 : StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14615 94708 : SDValue Val = peekThroughBitcasts(St->getValue());
14616 : LoadSDNode *Ld = cast<LoadSDNode>(Val);
14617 :
14618 94708 : BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14619 : // If this is not the first ptr that we check.
14620 94708 : int64_t LdOffset = 0;
14621 94708 : if (LdBasePtr.getBase().getNode()) {
14622 : // The base ptr must be the same.
14623 79041 : if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14624 : break;
14625 : } else {
14626 : // Check that all other base pointers are the same as this one.
14627 15667 : LdBasePtr = LdPtr;
14628 : }
14629 :
14630 : // We found a potential memory operand to merge.
14631 94708 : LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14632 : }
14633 :
14634 30956 : while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14635 : // If we have load/store pair instructions and we only have two values,
14636 : // don't bother merging.
14637 : unsigned RequiredAlignment;
14638 8239 : if (LoadNodes.size() == 2 &&
14639 15909 : TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14640 620 : StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14641 620 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14642 620 : LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14643 620 : break;
14644 : }
14645 15289 : LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14646 : unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14647 15289 : unsigned FirstStoreAlign = FirstInChain->getAlignment();
14648 15289 : LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
14649 : unsigned FirstLoadAS = FirstLoad->getAddressSpace();
14650 15289 : unsigned FirstLoadAlign = FirstLoad->getAlignment();
14651 :
14652 : // Scan the memory operations on the chain and find the first
14653 : // non-consecutive load memory address. These variables hold the index in
14654 : // the store node array.
14655 :
14656 : unsigned LastConsecutiveLoad = 1;
14657 :
14658 : // This variable refers to the size and not index in the array.
14659 15289 : unsigned LastLegalVectorType = 1;
14660 15289 : unsigned LastLegalIntegerType = 1;
14661 : bool isDereferenceable = true;
14662 : bool DoIntegerTruncate = false;
14663 15289 : StartAddress = LoadNodes[0].OffsetFromBase;
14664 15289 : SDValue FirstChain = FirstLoad->getChain();
14665 75719 : for (unsigned i = 1; i < LoadNodes.size(); ++i) {
14666 : // All loads must share the same chain.
14667 65721 : if (LoadNodes[i].MemNode->getChain() != FirstChain)
14668 : break;
14669 :
14670 64552 : int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
14671 64552 : if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14672 : break;
14673 : LastConsecutiveLoad = i;
14674 :
14675 64160 : if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
14676 : isDereferenceable = false;
14677 :
14678 : // Find a legal type for the vector store.
14679 64160 : unsigned Elts = (i + 1) * NumMemElts;
14680 64160 : EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14681 :
14682 : // Break early when size is too large to be legal.
14683 64160 : if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14684 : break;
14685 :
14686 : bool IsFastSt, IsFastLd;
14687 65713 : if (TLI.isTypeLegal(StoreTy) &&
14688 8163 : TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14689 2880 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14690 1948 : FirstStoreAlign, &IsFastSt) &&
14691 131 : IsFastSt &&
14692 131 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14693 131 : FirstLoadAlign, &IsFastLd) &&
14694 : IsFastLd) {
14695 125 : LastLegalVectorType = i + 1;
14696 : }
14697 :
14698 : // Find a legal type for the integer store.
14699 60430 : unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14700 60430 : StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14701 67080 : if (TLI.isTypeLegal(StoreTy) &&
14702 13282 : TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14703 6632 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14704 174 : FirstStoreAlign, &IsFastSt) &&
14705 174 : IsFastSt &&
14706 174 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14707 174 : FirstLoadAlign, &IsFastLd) &&
14708 : IsFastLd) {
14709 174 : LastLegalIntegerType = i + 1;
14710 : DoIntegerTruncate = false;
14711 : // Or check whether a truncstore and extload is legal.
14712 60256 : } else if (TLI.getTypeAction(Context, StoreTy) ==
14713 : TargetLowering::TypePromoteInteger) {
14714 39602 : EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14715 40551 : if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14716 1898 : TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14717 949 : TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14718 949 : StoreTy) &&
14719 : TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14720 933 : StoreTy) &&
14721 933 : TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14722 933 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14723 25 : FirstStoreAlign, &IsFastSt) &&
14724 25 : IsFastSt &&
14725 25 : TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14726 25 : FirstLoadAlign, &IsFastLd) &&
14727 : IsFastLd) {
14728 25 : LastLegalIntegerType = i + 1;
14729 : DoIntegerTruncate = true;
14730 : }
14731 : }
14732 : }
14733 :
14734 : // Only use vector types if the vector type is larger than the integer
14735 : // type. If they are the same, use integers.
14736 15289 : bool UseVectorTy =
14737 15289 : LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14738 : unsigned LastLegalType =
14739 15289 : std::max(LastLegalVectorType, LastLegalIntegerType);
14740 :
14741 : // We add +1 here because the LastXXX variables refer to location while
14742 : // the NumElem refers to array/index size.
14743 : unsigned NumElem =
14744 18581 : std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14745 15289 : NumElem = std::min(LastLegalType, NumElem);
14746 :
14747 15289 : if (NumElem < 2) {
14748 : // We know that candidate stores are in order and of correct
14749 : // shape. While there is no mergeable sequence from the
14750 : // beginning one may start later in the sequence. The only
14751 : // reason a merge of size N could have failed where another of
14752 : // the same size would not have is if the alignment or either
14753 : // the load or store has improved. Drop as many candidates as we
14754 : // can here.
14755 : unsigned NumSkip = 1;
14756 77890 : while ((NumSkip < LoadNodes.size()) &&
14757 170256 : (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14758 77632 : (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14759 77582 : NumSkip++;
14760 15042 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14761 15042 : LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14762 15042 : NumConsecutiveStores -= NumSkip;
14763 15042 : continue;
14764 : }
14765 :
14766 : // Check that we can merge these candidates without causing a cycle.
14767 247 : if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14768 : RootNode)) {
14769 0 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14770 0 : LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14771 0 : NumConsecutiveStores -= NumElem;
14772 0 : continue;
14773 : }
14774 :
14775 : // Find if it is better to use vectors or integers to load and store
14776 : // to memory.
14777 : EVT JointMemOpVT;
14778 247 : if (UseVectorTy) {
14779 : // Find a legal type for the vector store.
14780 110 : unsigned Elts = NumElem * NumMemElts;
14781 110 : JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14782 : } else {
14783 137 : unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14784 137 : JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14785 : }
14786 :
14787 247 : SDLoc LoadDL(LoadNodes[0].MemNode);
14788 247 : SDLoc StoreDL(StoreNodes[0].MemNode);
14789 :
14790 : // The merged loads are required to have the same incoming chain, so
14791 : // using the first's chain is acceptable.
14792 :
14793 247 : SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14794 247 : AddToWorklist(NewStoreChain.getNode());
14795 :
14796 : MachineMemOperand::Flags MMOFlags =
14797 247 : isDereferenceable ? MachineMemOperand::MODereferenceable
14798 : : MachineMemOperand::MONone;
14799 :
14800 247 : SDValue NewLoad, NewStore;
14801 247 : if (UseVectorTy || !DoIntegerTruncate) {
14802 238 : NewLoad =
14803 238 : DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
14804 238 : FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14805 476 : FirstLoadAlign, MMOFlags);
14806 238 : NewStore = DAG.getStore(
14807 : NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
14808 238 : FirstInChain->getPointerInfo(), FirstStoreAlign);
14809 : } else { // This must be the truncstore/extload case
14810 : EVT ExtendedTy =
14811 9 : TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
14812 9 : NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
14813 : FirstLoad->getChain(), FirstLoad->getBasePtr(),
14814 9 : FirstLoad->getPointerInfo(), JointMemOpVT,
14815 18 : FirstLoadAlign, MMOFlags);
14816 9 : NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
14817 : FirstInChain->getBasePtr(),
14818 9 : FirstInChain->getPointerInfo(),
14819 : JointMemOpVT, FirstInChain->getAlignment(),
14820 18 : FirstInChain->getMemOperand()->getFlags());
14821 : }
14822 :
14823 : // Transfer chain users from old loads to the new load.
14824 923 : for (unsigned i = 0; i < NumElem; ++i) {
14825 676 : LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
14826 1352 : DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
14827 : SDValue(NewLoad.getNode(), 1));
14828 : }
14829 :
14830 : // Replace the all stores with the new store. Recursively remove
14831 : // corresponding value if its no longer used.
14832 923 : for (unsigned i = 0; i < NumElem; ++i) {
14833 1352 : SDValue Val = StoreNodes[i].MemNode->getOperand(1);
14834 676 : CombineTo(StoreNodes[i].MemNode, NewStore);
14835 676 : if (Val.getNode()->use_empty())
14836 676 : recursivelyDeleteUnusedNodes(Val.getNode());
14837 : }
14838 :
14839 : RV = true;
14840 247 : StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14841 247 : LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14842 247 : NumConsecutiveStores -= NumElem;
14843 : }
14844 : }
14845 : return RV;
14846 : }
14847 :
14848 420021 : SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
14849 : SDLoc SL(ST);
14850 420021 : SDValue ReplStore;
14851 :
14852 : // Replace the chain to avoid dependency.
14853 420021 : if (ST->isTruncatingStore()) {
14854 1839 : ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
14855 : ST->getBasePtr(), ST->getMemoryVT(),
14856 3678 : ST->getMemOperand());
14857 : } else {
14858 418182 : ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
14859 836364 : ST->getMemOperand());
14860 : }
14861 :
14862 : // Create token to keep both nodes around.
14863 420021 : SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
14864 420021 : MVT::Other, ST->getChain(), ReplStore);
14865 :
14866 : // Make sure the new and old chains are cleaned up.
14867 420021 : AddToWorklist(Token.getNode());
14868 :
14869 : // Don't add users to work list.
14870 420021 : return CombineTo(ST, Token, false);
14871 : }
14872 :
14873 9477 : SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
14874 9477 : SDValue Value = ST->getValue();
14875 9477 : if (Value.getOpcode() == ISD::TargetConstantFP)
14876 0 : return SDValue();
14877 :
14878 : SDLoc DL(ST);
14879 :
14880 9477 : SDValue Chain = ST->getChain();
14881 9477 : SDValue Ptr = ST->getBasePtr();
14882 :
14883 : const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
14884 :
14885 : // NOTE: If the original store is volatile, this transform must not increase
14886 : // the number of stores. For example, on x86-32 an f64 can be stored in one
14887 : // processor operation but an i64 (which is not legal) requires two. So the
14888 : // transform should not be done in this case.
14889 :
14890 9477 : SDValue Tmp;
14891 9477 : switch (CFP->getSimpleValueType(0).SimpleTy) {
14892 0 : default:
14893 0 : llvm_unreachable("Unknown FP type");
14894 1930 : case MVT::f16: // We don't do this for these yet.
14895 : case MVT::f80:
14896 : case MVT::f128:
14897 : case MVT::ppcf128:
14898 1930 : return SDValue();
14899 : case MVT::f32:
14900 1425 : if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
14901 : TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14902 : ;
14903 2788 : Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
14904 2788 : bitcastToAPInt().getZExtValue(), SDLoc(CFP),
14905 1394 : MVT::i32);
14906 1394 : return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
14907 : }
14908 :
14909 0 : return SDValue();
14910 6153 : case MVT::f64:
14911 6153 : if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
14912 1232 : !ST->isVolatile()) ||
14913 : TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
14914 : ;
14915 2464 : Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
14916 1232 : getZExtValue(), SDLoc(CFP), MVT::i64);
14917 1232 : return DAG.getStore(Chain, DL, Tmp,
14918 1232 : Ptr, ST->getMemOperand());
14919 : }
14920 :
14921 4921 : if (!ST->isVolatile() &&
14922 : TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14923 : // Many FP stores are not made apparent until after legalize, e.g. for
14924 : // argument passing. Since this is so common, custom legalize the
14925 : // 64-bit integer store into two 32-bit stores.
14926 9818 : uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
14927 4909 : SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
14928 4909 : SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
14929 4909 : if (DAG.getDataLayout().isBigEndian())
14930 : std::swap(Lo, Hi);
14931 :
14932 4909 : unsigned Alignment = ST->getAlignment();
14933 4909 : MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14934 : AAMDNodes AAInfo = ST->getAAInfo();
14935 :
14936 4909 : SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14937 4909 : ST->getAlignment(), MMOFlags, AAInfo);
14938 4909 : Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14939 4909 : DAG.getConstant(4, DL, Ptr.getValueType()));
14940 4909 : Alignment = MinAlign(Alignment, 4U);
14941 4909 : SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
14942 4909 : ST->getPointerInfo().getWithOffset(4),
14943 4909 : Alignment, MMOFlags, AAInfo);
14944 4909 : return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14945 4909 : St0, St1);
14946 : }
14947 :
14948 12 : return SDValue();
14949 : }
14950 : }
14951 :
14952 8005537 : SDValue DAGCombiner::visitSTORE(SDNode *N) {
14953 : StoreSDNode *ST = cast<StoreSDNode>(N);
14954 8005537 : SDValue Chain = ST->getChain();
14955 8005537 : SDValue Value = ST->getValue();
14956 8005537 : SDValue Ptr = ST->getBasePtr();
14957 :
14958 : // If this is a store of a bit convert, store the input value if the
14959 : // resultant store does not need a higher alignment than the original.
14960 16011074 : if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
14961 : ST->isUnindexed()) {
14962 214911 : EVT SVT = Value.getOperand(0).getValueType();
14963 : // If the store is volatile, we only want to change the store type if the
14964 : // resulting store is legal. Otherwise we might increase the number of
14965 : // memory accesses. We don't care if the original type was legal or not
14966 : // as we assume software couldn't rely on the number of accesses of an
14967 : // illegal type.
14968 12530 : if (((!LegalOperations && !ST->isVolatile()) ||
14969 416733 : TLI.isOperationLegal(ISD::STORE, SVT)) &&
14970 402462 : TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
14971 170915 : unsigned OrigAlign = ST->getAlignment();
14972 170915 : bool Fast = false;
14973 170915 : if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
14974 170915 : ST->getAddressSpace(), OrigAlign, &Fast) &&
14975 : Fast) {
14976 170675 : return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
14977 : ST->getPointerInfo(), OrigAlign,
14978 513526 : ST->getMemOperand()->getFlags(), ST->getAAInfo());
14979 : }
14980 : }
14981 : }
14982 :
14983 : // Turn 'store undef, Ptr' -> nothing.
14984 15669724 : if (Value.isUndef() && ST->isUnindexed())
14985 2112 : return Chain;
14986 :
14987 : // Try to infer better alignment information than the store already has.
14988 7832750 : if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
14989 3619290 : if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14990 2123509 : if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
14991 : SDValue NewStore =
14992 9135 : DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
14993 : ST->getMemoryVT(), Align,
14994 23593 : ST->getMemOperand()->getFlags(), ST->getAAInfo());
14995 : // NewStore will always be N as we are only refining the alignment
14996 : assert(NewStore.getNode() == N);
14997 : (void)NewStore;
14998 : }
14999 : }
15000 : }
15001 :
15002 : // Try transforming a pair floating point load / store ops to integer
15003 : // load / store ops.
15004 7832750 : if (SDValue NewST = TransformFPLoadStorePair(N))
15005 3 : return NewST;
15006 :
15007 7832747 : if (ST->isUnindexed()) {
15008 : // Walk up chain skipping non-aliasing memory nodes, on this store and any
15009 : // adjacent stores.
15010 7832510 : if (findBetterNeighborChains(ST)) {
15011 : // replaceStoreChain uses CombineTo, which handled all of the worklist
15012 : // manipulation. Return the original node to not do anything else.
15013 401881 : return SDValue(ST, 0);
15014 : }
15015 7430629 : Chain = ST->getChain();
15016 : }
15017 :
15018 : // FIXME: is there such a thing as a truncating indexed store?
15019 7430866 : if (ST->isTruncatingStore() && ST->isUnindexed() &&
15020 7484374 : Value.getValueType().isInteger()) {
15021 : // See if we can simplify the input to this truncstore with knowledge that
15022 : // only the low bits are being used. For example:
15023 : // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
15024 53272 : SDValue Shorter = DAG.GetDemandedBits(
15025 53272 : Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15026 106544 : ST->getMemoryVT().getScalarSizeInBits()));
15027 53272 : AddToWorklist(Value.getNode());
15028 53272 : if (Shorter.getNode())
15029 1489 : return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
15030 1489 : Ptr, ST->getMemoryVT(), ST->getMemOperand());
15031 :
15032 : // Otherwise, see if we can simplify the operation with
15033 : // SimplifyDemandedBits, which only works if the value has a single use.
15034 51783 : if (SimplifyDemandedBits(
15035 : Value,
15036 51783 : APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15037 103566 : ST->getMemoryVT().getScalarSizeInBits()))) {
15038 : // Re-visit the store if anything changed and the store hasn't been merged
15039 : // with another node (N is deleted) SimplifyDemandedBits will add Value's
15040 : // node back to the worklist if necessary, but we also need to re-visit
15041 : // the Store node itself.
15042 2270 : if (N->getOpcode() != ISD::DELETED_NODE)
15043 2270 : AddToWorklist(N);
15044 2270 : return SDValue(N, 0);
15045 : }
15046 : }
15047 :
15048 : // If this is a load followed by a store to the same location, then the store
15049 : // is dead/noop.
15050 : if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15051 2037692 : if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15052 6784 : ST->isUnindexed() && !ST->isVolatile() &&
15053 : // There can't be any side effects between the load and store, such as
15054 : // a call or store.
15055 2032605 : Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15056 : // The store is dead, remove it.
15057 107 : return Chain;
15058 : }
15059 : }
15060 :
15061 : if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15062 1136710 : if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15063 2258925 : !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
15064 : ST->getMemoryVT() == ST1->getMemoryVT()) {
15065 : // If this is a store followed by a store with the same value to the same
15066 : // location, then the store is dead/noop.
15067 1602 : if (ST1->getValue() == Value) {
15068 : // The store is dead, remove it.
15069 67 : return Chain;
15070 : }
15071 :
15072 : // If this is a store who's preceeding store to the same location
15073 : // and no one other node is chained to that store we can effectively
15074 : // drop the store. Do not remove stores to undef as they may be used as
15075 : // data sinks.
15076 2048 : if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15077 : !ST1->getBasePtr().isUndef()) {
15078 : // ST1 is fully overwritten and can be elided. Combine with it's chain
15079 : // value.
15080 382 : CombineTo(ST1, ST1->getChain());
15081 382 : return SDValue();
15082 : }
15083 : }
15084 : }
15085 :
15086 : // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15087 : // truncating store. We can do this even if this is already a truncstore.
15088 7426551 : if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15089 7477833 : && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15090 102564 : TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15091 : ST->getMemoryVT())) {
15092 2218 : return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15093 2227 : Ptr, ST->getMemoryVT(), ST->getMemOperand());
15094 : }
15095 :
15096 : // Always perform this optimization before types are legal. If the target
15097 : // prefers, also try this after legalization to catch stores that were created
15098 : // by intrinsics or other nodes.
15099 7424333 : if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15100 : while (true) {
15101 : // There can be multiple store sequences on the same chain.
15102 : // Keep trying to merge store sequences until we are unable to do so
15103 : // or until we merge the last store on the chain.
15104 7161723 : bool Changed = MergeConsecutiveStores(ST);
15105 7161723 : if (!Changed) break;
15106 : // Return N as merge only uses CombineTo and no worklist clean
15107 : // up is necessary.
15108 1123 : if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15109 999 : return SDValue(N, 0);
15110 : }
15111 : }
15112 :
15113 : // Try transforming N to an indexed store.
15114 7423334 : if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15115 310 : return SDValue(N, 0);
15116 :
15117 : // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15118 : //
15119 : // Make sure to do this only after attempting to merge stores in order to
15120 : // avoid changing the types of some subset of stores due to visit order,
15121 : // preventing their merging.
15122 : if (isa<ConstantFPSDNode>(ST->getValue())) {
15123 9477 : if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15124 7535 : return NewSt;
15125 : }
15126 :
15127 7415489 : if (SDValue NewSt = splitMergedValStore(ST))
15128 0 : return NewSt;
15129 :
15130 7415489 : return ReduceLoadOpStoreWidth(N);
15131 : }
15132 :
15133 : /// For the instruction sequence of store below, F and I values
15134 : /// are bundled together as an i64 value before being stored into memory.
15135 : /// Sometimes it is more efficent to generate separate stores for F and I,
15136 : /// which can remove the bitwise instructions or sink them to colder places.
15137 : ///
15138 : /// (store (or (zext (bitcast F to i32) to i64),
15139 : /// (shl (zext I to i64), 32)), addr) -->
15140 : /// (store F, addr) and (store I, addr+4)
15141 : ///
15142 : /// Similarly, splitting for other merged store can also be beneficial, like:
15143 : /// For pair of {i32, i32}, i64 store --> two i32 stores.
15144 : /// For pair of {i32, i16}, i64 store --> two i32 stores.
15145 : /// For pair of {i16, i16}, i32 store --> two i16 stores.
15146 : /// For pair of {i16, i8}, i32 store --> two i16 stores.
15147 : /// For pair of {i8, i8}, i16 store --> two i8 stores.
15148 : ///
15149 : /// We allow each target to determine specifically which kind of splitting is
15150 : /// supported.
15151 : ///
15152 : /// The store patterns are commonly seen from the simple code snippet below
15153 : /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15154 : /// void goo(const std::pair<int, float> &);
15155 : /// hoo() {
15156 : /// ...
15157 : /// goo(std::make_pair(tmp, ftmp));
15158 : /// ...
15159 : /// }
15160 : ///
15161 7415489 : SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15162 7415489 : if (OptLevel == CodeGenOpt::None)
15163 4211383 : return SDValue();
15164 :
15165 3204106 : SDValue Val = ST->getValue();
15166 : SDLoc DL(ST);
15167 :
15168 : // Match OR operand.
15169 9612318 : if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15170 3185740 : return SDValue();
15171 :
15172 : // Match SHL operand and get Lower and Higher parts of Val.
15173 18366 : SDValue Op1 = Val.getOperand(0);
15174 18366 : SDValue Op2 = Val.getOperand(1);
15175 : SDValue Lo, Hi;
15176 18366 : if (Op1.getOpcode() != ISD::SHL) {
15177 : std::swap(Op1, Op2);
15178 16763 : if (Op1.getOpcode() != ISD::SHL)
15179 13643 : return SDValue();
15180 : }
15181 4723 : Lo = Op2;
15182 4723 : Hi = Op1.getOperand(0);
15183 4723 : if (!Op1.hasOneUse())
15184 9 : return SDValue();
15185 :
15186 : // Match shift amount to HalfValBitSize.
15187 4714 : unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15188 : ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15189 6748 : if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15190 3135 : return SDValue();
15191 :
15192 : // Lo and Hi are zero-extended from int with size less equal than 32
15193 : // to i64.
15194 300 : if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15195 450 : !Lo.getOperand(0).getValueType().isScalarInteger() ||
15196 300 : Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15197 12 : Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15198 1597 : !Hi.getOperand(0).getValueType().isScalarInteger() ||
15199 6 : Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15200 1573 : return SDValue();
15201 :
15202 : // Use the EVT of low and high parts before bitcast as the input
15203 : // of target query.
15204 6 : EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15205 0 : ? Lo.getOperand(0).getValueType()
15206 6 : : Lo.getValueType();
15207 6 : EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15208 0 : ? Hi.getOperand(0).getValueType()
15209 6 : : Hi.getValueType();
15210 6 : if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15211 6 : return SDValue();
15212 :
15213 : // Start to split store.
15214 0 : unsigned Alignment = ST->getAlignment();
15215 0 : MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15216 : AAMDNodes AAInfo = ST->getAAInfo();
15217 :
15218 : // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15219 0 : EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15220 0 : Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15221 0 : Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15222 :
15223 0 : SDValue Chain = ST->getChain();
15224 0 : SDValue Ptr = ST->getBasePtr();
15225 : // Lower value store.
15226 0 : SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15227 0 : ST->getAlignment(), MMOFlags, AAInfo);
15228 0 : Ptr =
15229 0 : DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15230 0 : DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15231 : // Higher value store.
15232 : SDValue St1 =
15233 0 : DAG.getStore(St0, DL, Hi, Ptr,
15234 0 : ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15235 0 : Alignment / 2, MMOFlags, AAInfo);
15236 0 : return St1;
15237 : }
15238 :
15239 : /// Convert a disguised subvector insertion into a shuffle:
15240 : /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15241 : /// bitcast(shuffle (bitcast V), (extended X), Mask)
15242 : /// Note: We do not use an insert_subvector node because that requires a legal
15243 : /// subvector type.
15244 50985 : SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15245 50985 : SDValue InsertVal = N->getOperand(1);
15246 53745 : if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15247 2730 : !InsertVal.getOperand(0).getValueType().isVector())
15248 49911 : return SDValue();
15249 :
15250 1074 : SDValue SubVec = InsertVal.getOperand(0);
15251 1074 : SDValue DestVec = N->getOperand(0);
15252 1074 : EVT SubVecVT = SubVec.getValueType();
15253 1074 : EVT VT = DestVec.getValueType();
15254 : unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15255 1074 : unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15256 1074 : unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15257 :
15258 : // Step 1: Create a shuffle mask that implements this insert operation. The
15259 : // vector that we are inserting into will be operand 0 of the shuffle, so
15260 : // those elements are just 'i'. The inserted subvector is in the first
15261 : // positions of operand 1 of the shuffle. Example:
15262 : // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15263 1074 : SmallVector<int, 16> Mask(NumMaskVals);
15264 106808 : for (unsigned i = 0; i != NumMaskVals; ++i) {
15265 105734 : if (i / NumSrcElts == InsIndex)
15266 27190 : Mask[i] = (i % NumSrcElts) + NumMaskVals;
15267 : else
15268 184278 : Mask[i] = i;
15269 : }
15270 :
15271 : // Bail out if the target can not handle the shuffle we want to create.
15272 1074 : EVT SubVecEltVT = SubVecVT.getVectorElementType();
15273 1074 : EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15274 2148 : if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15275 1048 : return SDValue();
15276 :
15277 : // Step 2: Create a wide vector from the inserted source vector by appending
15278 : // undefined elements. This is the same size as our destination vector.
15279 : SDLoc DL(N);
15280 26 : SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15281 26 : ConcatOps[0] = SubVec;
15282 52 : SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15283 :
15284 : // Step 3: Shuffle in the padded subvector.
15285 26 : SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15286 52 : SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15287 26 : AddToWorklist(PaddedSubV.getNode());
15288 26 : AddToWorklist(DestVecBC.getNode());
15289 26 : AddToWorklist(Shuf.getNode());
15290 26 : return DAG.getBitcast(VT, Shuf);
15291 : }
15292 :
15293 52048 : SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15294 52048 : SDValue InVec = N->getOperand(0);
15295 52048 : SDValue InVal = N->getOperand(1);
15296 52048 : SDValue EltNo = N->getOperand(2);
15297 : SDLoc DL(N);
15298 :
15299 : // If the inserted element is an UNDEF, just use the input vector.
15300 104096 : if (InVal.isUndef())
15301 323 : return InVec;
15302 :
15303 51725 : EVT VT = InVec.getValueType();
15304 :
15305 : // Remove redundant insertions:
15306 : // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
15307 : if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15308 51758 : InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
15309 11 : return InVec;
15310 :
15311 : auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15312 : if (!IndexC) {
15313 : // If this is variable insert to undef vector, it might be better to splat:
15314 : // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
15315 729 : if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
15316 60 : SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
15317 120 : return DAG.getBuildVector(VT, DL, Ops);
15318 : }
15319 669 : return SDValue();
15320 : }
15321 :
15322 : // We must know which element is being inserted for folds below here.
15323 50985 : unsigned Elt = IndexC->getZExtValue();
15324 50985 : if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15325 26 : return Shuf;
15326 :
15327 : // Canonicalize insert_vector_elt dag nodes.
15328 : // Example:
15329 : // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15330 : // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15331 : //
15332 : // Do this only if the child insert_vector node has one use; also
15333 : // do this only if indices are both constants and Idx1 < Idx0.
15334 18210 : if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15335 50959 : && isa<ConstantSDNode>(InVec.getOperand(2))) {
15336 18179 : unsigned OtherElt = InVec.getConstantOperandVal(2);
15337 18179 : if (Elt < OtherElt) {
15338 : // Swap nodes.
15339 290 : SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15340 290 : InVec.getOperand(0), InVal, EltNo);
15341 290 : AddToWorklist(NewOp.getNode());
15342 290 : return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15343 580 : VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15344 : }
15345 : }
15346 :
15347 : // If we can't generate a legal BUILD_VECTOR, exit
15348 50669 : if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15349 3597 : return SDValue();
15350 :
15351 : // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15352 : // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
15353 : // vector elements.
15354 : SmallVector<SDValue, 8> Ops;
15355 : // Do not combine these two vectors if the output vector will not replace
15356 : // the input vector.
15357 64199 : if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15358 34030 : Ops.append(InVec.getNode()->op_begin(),
15359 : InVec.getNode()->op_end());
15360 30057 : } else if (InVec.isUndef()) {
15361 : unsigned NElts = VT.getVectorNumElements();
15362 15268 : Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
15363 : } else {
15364 22423 : return SDValue();
15365 : }
15366 :
15367 : // Insert the element
15368 49298 : if (Elt < Ops.size()) {
15369 : // All the operands of BUILD_VECTOR must have the same type;
15370 : // we enforce that here.
15371 24649 : EVT OpVT = Ops[0].getValueType();
15372 24649 : Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15373 : }
15374 :
15375 : // Return the new vector
15376 49298 : return DAG.getBuildVector(VT, DL, Ops);
15377 : }
15378 :
15379 744 : SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
15380 : SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
15381 : assert(!OriginalLoad->isVolatile());
15382 :
15383 744 : EVT ResultVT = EVE->getValueType(0);
15384 744 : EVT VecEltVT = InVecVT.getVectorElementType();
15385 744 : unsigned Align = OriginalLoad->getAlignment();
15386 1488 : unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15387 744 : VecEltVT.getTypeForEVT(*DAG.getContext()));
15388 :
15389 744 : if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15390 46 : return SDValue();
15391 :
15392 698 : ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15393 : ISD::NON_EXTLOAD : ISD::EXTLOAD;
15394 698 : if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15395 12 : return SDValue();
15396 :
15397 : Align = NewAlign;
15398 :
15399 686 : SDValue NewPtr = OriginalLoad->getBasePtr();
15400 686 : SDValue Offset;
15401 686 : EVT PtrType = NewPtr.getValueType();
15402 : MachinePointerInfo MPI;
15403 : SDLoc DL(EVE);
15404 : if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15405 675 : int Elt = ConstEltNo->getZExtValue();
15406 675 : unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15407 675 : Offset = DAG.getConstant(PtrOff, DL, PtrType);
15408 1350 : MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15409 : } else {
15410 11 : Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15411 11 : Offset = DAG.getNode(
15412 : ISD::MUL, DL, PtrType, Offset,
15413 11 : DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15414 11 : MPI = OriginalLoad->getPointerInfo();
15415 : }
15416 1372 : NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15417 :
15418 : // The replacement we need to do here is a little tricky: we need to
15419 : // replace an extractelement of a load with a load.
15420 : // Use ReplaceAllUsesOfValuesWith to do the replacement.
15421 : // Note that this replacement assumes that the extractvalue is the only
15422 : // use of the load; that's okay because we don't want to perform this
15423 : // transformation in other cases anyway.
15424 : SDValue Load;
15425 : SDValue Chain;
15426 686 : if (ResultVT.bitsGT(VecEltVT)) {
15427 : // If the result type of vextract is wider than the load, then issue an
15428 : // extending load instead.
15429 0 : ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15430 : VecEltVT)
15431 0 : ? ISD::ZEXTLOAD
15432 : : ISD::EXTLOAD;
15433 0 : Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15434 : OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15435 0 : Align, OriginalLoad->getMemOperand()->getFlags(),
15436 0 : OriginalLoad->getAAInfo());
15437 : Chain = Load.getValue(1);
15438 : } else {
15439 686 : Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15440 686 : MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15441 1372 : OriginalLoad->getAAInfo());
15442 686 : Chain = Load.getValue(1);
15443 686 : if (ResultVT.bitsLT(VecEltVT))
15444 0 : Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15445 : else
15446 686 : Load = DAG.getBitcast(ResultVT, Load);
15447 : }
15448 : WorklistRemover DeadNodes(*this);
15449 : SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15450 686 : SDValue To[] = { Load, Chain };
15451 686 : DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15452 : // Since we're explicitly calling ReplaceAllUses, add the new node to the
15453 : // worklist explicitly as well.
15454 686 : AddToWorklist(Load.getNode());
15455 : AddUsersToWorklist(Load.getNode()); // Add users too
15456 : // Make sure to revisit this node to clean it up; it will usually be dead.
15457 686 : AddToWorklist(EVE);
15458 : ++OpsNarrowed;
15459 686 : return SDValue(EVE, 0);
15460 : }
15461 :
15462 416312 : SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15463 416312 : SDValue InVec = N->getOperand(0);
15464 416312 : EVT VT = InVec.getValueType();
15465 832624 : EVT NVT = N->getValueType(0);
15466 416312 : if (InVec.isUndef())
15467 31 : return DAG.getUNDEF(NVT);
15468 :
15469 : // (vextract (scalar_to_vector val, 0) -> val
15470 416281 : if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15471 : // Check if the result type doesn't match the inserted element type. A
15472 : // SCALAR_TO_VECTOR may truncate the inserted element and the
15473 : // EXTRACT_VECTOR_ELT may widen the extracted vector.
15474 337 : SDValue InOp = InVec.getOperand(0);
15475 0 : if (InOp.getValueType() != NVT) {
15476 : assert(InOp.getValueType().isInteger() && NVT.isInteger());
15477 10 : return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
15478 : }
15479 332 : return InOp;
15480 : }
15481 :
15482 415944 : SDValue EltNo = N->getOperand(1);
15483 : ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
15484 :
15485 : // extract_vector_elt of out-of-bounds element -> UNDEF
15486 1234011 : if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
15487 6 : return DAG.getUNDEF(NVT);
15488 :
15489 : // extract_vector_elt (build_vector x, y), 1 -> y
15490 411331 : if (ConstEltNo &&
15491 : InVec.getOpcode() == ISD::BUILD_VECTOR &&
15492 514496 : TLI.isTypeLegal(VT) &&
15493 34190 : (InVec.hasOneUse() ||
15494 34190 : TLI.aggressivelyPreferBuildVectorSources(VT))) {
15495 145236 : SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
15496 : EVT InEltVT = Elt.getValueType();
15497 :
15498 : // Sometimes build_vector's scalar input types do not match result type.
15499 0 : if (NVT == InEltVT)
15500 48412 : return Elt;
15501 :
15502 : // TODO: It may be useful to truncate if free if the build_vector implicitly
15503 : // converts.
15504 : }
15505 :
15506 367526 : if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST) {
15507 : // The vector index of the LSBs of the source depend on the endian-ness.
15508 99371 : bool IsLE = DAG.getDataLayout().isLittleEndian();
15509 :
15510 : // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
15511 100909 : unsigned BCTruncElt = IsLE ? 0 : VT.getVectorNumElements() - 1;
15512 99371 : SDValue BCSrc = InVec.getOperand(0);
15513 140589 : if (InVec.hasOneUse() && ConstEltNo->getZExtValue() == BCTruncElt &&
15514 126295 : VT.isInteger() && BCSrc.getValueType().isScalarInteger())
15515 2620 : return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
15516 : }
15517 :
15518 : // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15519 : //
15520 : // This only really matters if the index is non-constant since other combines
15521 : // on the constant elements already work.
15522 732432 : if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15523 1188 : EltNo == InVec.getOperand(2)) {
15524 30 : SDValue Elt = InVec.getOperand(1);
15525 49 : return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
15526 : }
15527 :
15528 : // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
15529 : // We only perform this optimization before the op legalization phase because
15530 : // we may introduce new vector instructions which are not backed by TD
15531 : // patterns. For example on AVX, extracting elements from a wide vector
15532 : // without using extract_subvector. However, if we can find an underlying
15533 : // scalar value, then we can always use that.
15534 366186 : if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
15535 917 : int NumElem = VT.getVectorNumElements();
15536 : ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
15537 : // Find the new index to extract from.
15538 1834 : int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
15539 :
15540 : // Extracting an undef index is undef.
15541 917 : if (OrigElt == -1)
15542 387 : return DAG.getUNDEF(NVT);
15543 :
15544 : // Select the right vector half to extract from.
15545 : SDValue SVInVec;
15546 890 : if (OrigElt < NumElem) {
15547 554 : SVInVec = InVec->getOperand(0);
15548 : } else {
15549 336 : SVInVec = InVec->getOperand(1);
15550 336 : OrigElt -= NumElem;
15551 : }
15552 :
15553 890 : if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
15554 260 : SDValue InOp = SVInVec.getOperand(OrigElt);
15555 130 : if (InOp.getValueType() != NVT) {
15556 : assert(InOp.getValueType().isInteger() && NVT.isInteger());
15557 0 : InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
15558 : }
15559 :
15560 130 : return InOp;
15561 : }
15562 :
15563 : // FIXME: We should handle recursing on other vector shuffles and
15564 : // scalar_to_vector here as well.
15565 :
15566 760 : if (!LegalOperations ||
15567 : // FIXME: Should really be just isOperationLegalOrCustom.
15568 760 : TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
15569 : TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
15570 230 : EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15571 460 : return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
15572 690 : DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
15573 : }
15574 : }
15575 :
15576 : // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15577 : // simplify it based on the (valid) extraction indices.
15578 731598 : if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
15579 1588386 : return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15580 1588386 : Use->getOperand(0) == InVec &&
15581 : isa<ConstantSDNode>(Use->getOperand(1));
15582 : })) {
15583 271577 : APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
15584 1581452 : for (SDNode *Use : InVec->uses()) {
15585 1309875 : auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15586 2619750 : if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
15587 1309875 : DemandedElts.setBit(CstElt->getZExtValue());
15588 : }
15589 271577 : if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
15590 575 : return SDValue(N, 0);
15591 : }
15592 :
15593 : bool BCNumEltsChanged = false;
15594 365224 : EVT ExtVT = VT.getVectorElementType();
15595 365224 : EVT LVT = ExtVT;
15596 :
15597 : // If the result of load has to be truncated, then it's not necessarily
15598 : // profitable.
15599 365224 : if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
15600 0 : return SDValue();
15601 :
15602 730448 : if (InVec.getOpcode() == ISD::BITCAST) {
15603 : // Don't duplicate a load with other uses.
15604 97820 : if (!InVec.hasOneUse())
15605 78733 : return SDValue();
15606 :
15607 57261 : EVT BCVT = InVec.getOperand(0).getValueType();
15608 19087 : if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
15609 12598 : return SDValue();
15610 6489 : if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
15611 : BCNumEltsChanged = true;
15612 6489 : InVec = InVec.getOperand(0);
15613 6489 : ExtVT = BCVT.getVectorElementType();
15614 : }
15615 :
15616 : // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
15617 114780 : if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
15618 274717 : ISD::isNormalLoad(InVec.getNode()) &&
15619 67 : !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
15620 67 : SDValue Index = N->getOperand(1);
15621 : if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
15622 67 : if (!OrigLoad->isVolatile()) {
15623 : return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
15624 54 : OrigLoad);
15625 : }
15626 : }
15627 : }
15628 :
15629 : // Perform only after legalization to ensure build_vector / vector_shuffle
15630 : // optimizations have already been done.
15631 273839 : if (!LegalOperations) return SDValue();
15632 :
15633 : // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
15634 : // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
15635 : // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
15636 :
15637 159113 : if (ConstEltNo) {
15638 158926 : int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
15639 :
15640 : LoadSDNode *LN0 = nullptr;
15641 : const ShuffleVectorSDNode *SVN = nullptr;
15642 158926 : if (ISD::isNormalLoad(InVec.getNode())) {
15643 : LN0 = cast<LoadSDNode>(InVec);
15644 0 : } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15645 38347 : InVec.getOperand(0).getValueType() == ExtVT &&
15646 : ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
15647 : // Don't duplicate a load with other uses.
15648 17 : if (!InVec.hasOneUse())
15649 15 : return SDValue();
15650 :
15651 2 : LN0 = cast<LoadSDNode>(InVec.getOperand(0));
15652 : } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
15653 : // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
15654 : // =>
15655 : // (load $addr+1*size)
15656 :
15657 : // Don't duplicate a load with other uses.
15658 505 : if (!InVec.hasOneUse())
15659 443 : return SDValue();
15660 :
15661 : // If the bit convert changed the number of elements, it is unsafe
15662 : // to examine the mask.
15663 62 : if (BCNumEltsChanged)
15664 0 : return SDValue();
15665 :
15666 : // Select the input vector, guarding against out of range extract vector.
15667 : unsigned NumElems = VT.getVectorNumElements();
15668 62 : int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
15669 62 : InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
15670 :
15671 124 : if (InVec.getOpcode() == ISD::BITCAST) {
15672 : // Don't duplicate a load with other uses.
15673 51 : if (!InVec.hasOneUse())
15674 42 : return SDValue();
15675 :
15676 18 : InVec = InVec.getOperand(0);
15677 : }
15678 20 : if (ISD::isNormalLoad(InVec.getNode())) {
15679 : LN0 = cast<LoadSDNode>(InVec);
15680 13 : Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
15681 13 : EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
15682 : }
15683 : }
15684 :
15685 : // Make sure we found a non-volatile load and the extractelement is
15686 : // the only use.
15687 120594 : if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
15688 157736 : return SDValue();
15689 :
15690 : // If Idx was -1 above, Elt is going to be -1, so just return undef.
15691 690 : if (Elt == -1)
15692 0 : return DAG.getUNDEF(LVT);
15693 :
15694 690 : return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
15695 : }
15696 :
15697 187 : return SDValue();
15698 : }
15699 :
15700 : // Simplify (build_vec (ext )) to (bitcast (build_vec ))
15701 697780 : SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
15702 : // We perform this optimization post type-legalization because
15703 : // the type-legalizer often scalarizes integer-promoted vectors.
15704 : // Performing this optimization before may create bit-casts which
15705 : // will be type-legalized to complex code sequences.
15706 : // We perform this optimization only before the operation legalizer because we
15707 : // may introduce illegal operations.
15708 697780 : if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
15709 572087 : return SDValue();
15710 :
15711 125693 : unsigned NumInScalars = N->getNumOperands();
15712 : SDLoc DL(N);
15713 125693 : EVT VT = N->getValueType(0);
15714 :
15715 : // Check to see if this is a BUILD_VECTOR of a bunch of values
15716 : // which come from any_extend or zero_extend nodes. If so, we can create
15717 : // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
15718 : // optimizations. We do not handle sign-extend because we can't fill the sign
15719 : // using shuffles.
15720 125693 : EVT SourceType = MVT::Other;
15721 : bool AllAnyExt = true;
15722 :
15723 136494 : for (unsigned i = 0; i != NumInScalars; ++i) {
15724 267554 : SDValue In = N->getOperand(i);
15725 : // Ignore undef inputs.
15726 133777 : if (In.isUndef()) continue;
15727 :
15728 132453 : bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
15729 : bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15730 :
15731 : // Abort if the element is not an extension.
15732 132453 : if (!ZeroExt && !AnyExt) {
15733 122974 : SourceType = MVT::Other;
15734 122974 : break;
15735 : }
15736 :
15737 : // The input is a ZeroExt or AnyExt. Check the original type.
15738 9479 : EVT InTy = In.getOperand(0).getValueType();
15739 :
15740 : // Check that all of the widened source types are the same.
15741 : if (SourceType == MVT::Other)
15742 : // First time.
15743 3342 : SourceType = InTy;
15744 6137 : else if (InTy != SourceType) {
15745 : // Multiple income types. Abort.
15746 2 : SourceType = MVT::Other;
15747 2 : break;
15748 : }
15749 :
15750 : // Check if all of the extends are ANY_EXTENDs.
15751 : AllAnyExt &= AnyExt;
15752 : }
15753 :
15754 : // In order to have valid types, all of the inputs must be extended from the
15755 : // same source type and all of the inputs must be any or zero extend.
15756 : // Scalar sizes must be a power of two.
15757 125693 : EVT OutScalarTy = VT.getScalarType();
15758 125693 : bool ValidTypes = SourceType != MVT::Other &&
15759 2717 : isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
15760 2717 : isPowerOf2_32(SourceType.getSizeInBits());
15761 :
15762 : // Create a new simpler BUILD_VECTOR sequence which other optimizations can
15763 : // turn into a single shuffle instruction.
15764 : if (!ValidTypes)
15765 122976 : return SDValue();
15766 :
15767 2717 : bool isLE = DAG.getDataLayout().isLittleEndian();
15768 2717 : unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
15769 : assert(ElemRatio > 1 && "Invalid element size ratio");
15770 982 : SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
15771 2717 : DAG.getConstant(0, DL, SourceType);
15772 :
15773 2717 : unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
15774 2717 : SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
15775 :
15776 : // Populate the new build_vector
15777 11469 : for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15778 17504 : SDValue Cast = N->getOperand(i);
15779 : assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
15780 : Cast.getOpcode() == ISD::ZERO_EXTEND ||
15781 : Cast.isUndef()) && "Invalid cast opcode");
15782 : SDValue In;
15783 8752 : if (Cast.isUndef())
15784 74 : In = DAG.getUNDEF(SourceType);
15785 : else
15786 8678 : In = Cast->getOperand(0);
15787 8752 : unsigned Index = isLE ? (i * ElemRatio) :
15788 22 : (i * ElemRatio + (ElemRatio - 1));
15789 :
15790 : assert(Index < Ops.size() && "Invalid index");
15791 17504 : Ops[Index] = In;
15792 : }
15793 :
15794 : // The type of the new BUILD_VECTOR node.
15795 2717 : EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
15796 : assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
15797 : "Invalid vector size");
15798 : // Check if the new vector type is legal.
15799 2717 : if (!isTypeLegal(VecVT) ||
15800 : (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
15801 : TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
15802 1304 : return SDValue();
15803 :
15804 : // Make the new BUILD_VECTOR.
15805 2826 : SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
15806 :
15807 : // The new BUILD_VECTOR node has the potential to be further optimized.
15808 1413 : AddToWorklist(BV.getNode());
15809 : // Bitcast to the desired type.
15810 1413 : return DAG.getBitcast(VT, BV);
15811 : }
15812 :
15813 696367 : SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
15814 696367 : EVT VT = N->getValueType(0);
15815 :
15816 696367 : unsigned NumInScalars = N->getNumOperands();
15817 : SDLoc DL(N);
15818 :
15819 : EVT SrcVT = MVT::Other;
15820 : unsigned Opcode = ISD::DELETED_NODE;
15821 : unsigned NumDefs = 0;
15822 :
15823 703634 : for (unsigned i = 0; i != NumInScalars; ++i) {
15824 1406202 : SDValue In = N->getOperand(i);
15825 : unsigned Opc = In.getOpcode();
15826 :
15827 703101 : if (Opc == ISD::UNDEF)
15828 : continue;
15829 :
15830 : // If all scalar values are floats and converted from integers.
15831 697929 : if (Opcode == ISD::DELETED_NODE &&
15832 696367 : (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
15833 : Opcode = Opc;
15834 : }
15835 :
15836 697929 : if (Opc != Opcode)
15837 695834 : return SDValue();
15838 :
15839 2095 : EVT InVT = In.getOperand(0).getValueType();
15840 :
15841 : // If all scalar values are typed differently, bail out. It's chosen to
15842 : // simplify BUILD_VECTOR of integer types.
15843 : if (SrcVT == MVT::Other)
15844 543 : SrcVT = InVT;
15845 0 : if (SrcVT != InVT)
15846 0 : return SDValue();
15847 2095 : NumDefs++;
15848 : }
15849 :
15850 : // If the vector has just one element defined, it's not worth to fold it into
15851 : // a vectorized one.
15852 533 : if (NumDefs < 2)
15853 5 : return SDValue();
15854 :
15855 : assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
15856 : && "Should only handle conversion from integer to float.");
15857 : assert(SrcVT != MVT::Other && "Cannot determine source type!");
15858 :
15859 528 : EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
15860 :
15861 528 : if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
15862 510 : return SDValue();
15863 :
15864 : // Just because the floating-point vector type is legal does not necessarily
15865 : // mean that the corresponding integer vector type is.
15866 18 : if (!isTypeLegal(NVT))
15867 0 : return SDValue();
15868 :
15869 : SmallVector<SDValue, 8> Opnds;
15870 78 : for (unsigned i = 0; i != NumInScalars; ++i) {
15871 120 : SDValue In = N->getOperand(i);
15872 :
15873 60 : if (In.isUndef())
15874 12 : Opnds.push_back(DAG.getUNDEF(SrcVT));
15875 : else
15876 48 : Opnds.push_back(In.getOperand(0));
15877 : }
15878 36 : SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
15879 18 : AddToWorklist(BV.getNode());
15880 :
15881 36 : return DAG.getNode(Opcode, DL, VT, BV);
15882 : }
15883 :
15884 0 : SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
15885 : ArrayRef<int> VectorMask,
15886 : SDValue VecIn1, SDValue VecIn2,
15887 : unsigned LeftIdx) {
15888 0 : MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15889 0 : SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
15890 :
15891 0 : EVT VT = N->getValueType(0);
15892 0 : EVT InVT1 = VecIn1.getValueType();
15893 0 : EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
15894 :
15895 : unsigned Vec2Offset = 0;
15896 : unsigned NumElems = VT.getVectorNumElements();
15897 : unsigned ShuffleNumElems = NumElems;
15898 :
15899 : // In case both the input vectors are extracted from same base
15900 : // vector we do not need extra addend (Vec2Offset) while
15901 : // computing shuffle mask.
15902 0 : if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15903 0 : !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15904 0 : !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
15905 : Vec2Offset = InVT1.getVectorNumElements();
15906 :
15907 : // We can't generate a shuffle node with mismatched input and output types.
15908 : // Try to make the types match the type of the output.
15909 0 : if (InVT1 != VT || InVT2 != VT) {
15910 0 : if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
15911 : // If the output vector length is a multiple of both input lengths,
15912 : // we can concatenate them and pad the rest with undefs.
15913 0 : unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
15914 : assert(NumConcats >= 2 && "Concat needs at least two inputs!");
15915 0 : SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
15916 0 : ConcatOps[0] = VecIn1;
15917 0 : ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
15918 0 : VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15919 0 : VecIn2 = SDValue();
15920 0 : } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
15921 0 : if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
15922 0 : return SDValue();
15923 :
15924 0 : if (!VecIn2.getNode()) {
15925 : // If we only have one input vector, and it's twice the size of the
15926 : // output, split it in two.
15927 0 : VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
15928 0 : DAG.getConstant(NumElems, DL, IdxTy));
15929 0 : VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
15930 : // Since we now have shorter input vectors, adjust the offset of the
15931 : // second vector's start.
15932 : Vec2Offset = NumElems;
15933 0 : } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
15934 : // VecIn1 is wider than the output, and we have another, possibly
15935 : // smaller input. Pad the smaller input with undefs, shuffle at the
15936 : // input vector width, and extract the output.
15937 : // The shuffle type is different than VT, so check legality again.
15938 0 : if (LegalOperations &&
15939 0 : !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
15940 0 : return SDValue();
15941 :
15942 : // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
15943 : // lower it back into a BUILD_VECTOR. So if the inserted type is
15944 : // illegal, don't even try.
15945 0 : if (InVT1 != InVT2) {
15946 0 : if (!TLI.isTypeLegal(InVT2))
15947 0 : return SDValue();
15948 0 : VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
15949 0 : DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
15950 : }
15951 0 : ShuffleNumElems = NumElems * 2;
15952 : } else {
15953 : // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
15954 : // than VecIn1. We can't handle this for now - this case will disappear
15955 : // when we start sorting the vectors by type.
15956 0 : return SDValue();
15957 : }
15958 0 : } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
15959 : InVT1.getSizeInBits() == VT.getSizeInBits()) {
15960 0 : SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
15961 0 : ConcatOps[0] = VecIn2;
15962 0 : VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15963 : } else {
15964 : // TODO: Support cases where the length mismatch isn't exactly by a
15965 : // factor of 2.
15966 : // TODO: Move this check upwards, so that if we have bad type
15967 : // mismatches, we don't create any DAG nodes.
15968 0 : return SDValue();
15969 : }
15970 : }
15971 :
15972 : // Initialize mask to undef.
15973 0 : SmallVector<int, 8> Mask(ShuffleNumElems, -1);
15974 :
15975 : // Only need to run up to the number of elements actually used, not the
15976 : // total number of elements in the shuffle - if we are shuffling a wider
15977 : // vector, the high lanes should be set to undef.
15978 0 : for (unsigned i = 0; i != NumElems; ++i) {
15979 0 : if (VectorMask[i] <= 0)
15980 0 : continue;
15981 :
15982 0 : unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
15983 0 : if (VectorMask[i] == (int)LeftIdx) {
15984 0 : Mask[i] = ExtIndex;
15985 0 : } else if (VectorMask[i] == (int)LeftIdx + 1) {
15986 0 : Mask[i] = Vec2Offset + ExtIndex;
15987 : }
15988 : }
15989 :
15990 : // The type the input vectors may have changed above.
15991 0 : InVT1 = VecIn1.getValueType();
15992 :
15993 : // If we already have a VecIn2, it should have the same type as VecIn1.
15994 : // If we don't, get an undef/zero vector of the appropriate type.
15995 0 : VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
15996 : assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
15997 :
15998 0 : SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
15999 0 : if (ShuffleNumElems > NumElems)
16000 0 : Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16001 :
16002 0 : return Shuffle;
16003 : }
16004 :
16005 : // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16006 : // operations. If the types of the vectors we're extracting from allow it,
16007 : // turn this into a vector_shuffle node.
16008 696349 : SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16009 : SDLoc DL(N);
16010 696349 : EVT VT = N->getValueType(0);
16011 :
16012 : // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16013 696349 : if (!isTypeLegal(VT))
16014 0 : return SDValue();
16015 :
16016 : // May only combine to shuffle after legalize if shuffle is legal.
16017 696349 : if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16018 369950 : return SDValue();
16019 :
16020 : bool UsesZeroVector = false;
16021 326399 : unsigned NumElems = N->getNumOperands();
16022 :
16023 : // Record, for each element of the newly built vector, which input vector
16024 : // that element comes from. -1 stands for undef, 0 for the zero vector,
16025 : // and positive values for the input vectors.
16026 : // VectorMask maps each element to its vector number, and VecIn maps vector
16027 : // numbers to their initial SDValues.
16028 :
16029 326399 : SmallVector<int, 8> VectorMask(NumElems, -1);
16030 : SmallVector<SDValue, 8> VecIn;
16031 326399 : VecIn.push_back(SDValue());
16032 :
16033 2040534 : for (unsigned i = 0; i != NumElems; ++i) {
16034 3845394 : SDValue Op = N->getOperand(i);
16035 :
16036 1922697 : if (Op.isUndef())
16037 1685484 : continue;
16038 :
16039 : // See if we can use a blend with a zero vector.
16040 : // TODO: Should we generalize this to a blend with an arbitrary constant
16041 : // vector?
16042 1888797 : if (isNullConstant(Op) || isNullFPConstant(Op)) {
16043 : UsesZeroVector = true;
16044 1651584 : VectorMask[i] = 0;
16045 1651584 : continue;
16046 : }
16047 :
16048 : // Not an undef or zero. If the input is something other than an
16049 : // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16050 237213 : if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16051 : !isa<ConstantSDNode>(Op.getOperand(1)))
16052 208457 : return SDValue();
16053 28756 : SDValue ExtractedFromVec = Op.getOperand(0);
16054 :
16055 28756 : APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
16056 115024 : if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16057 2 : return SDValue();
16058 :
16059 : // All inputs must have the same element type as the output.
16060 28754 : if (VT.getVectorElementType() !=
16061 57508 : ExtractedFromVec.getValueType().getVectorElementType())
16062 103 : return SDValue();
16063 :
16064 : // Have we seen this input vector before?
16065 : // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16066 : // a map back from SDValues to numbers isn't worth it.
16067 : unsigned Idx = std::distance(
16068 : VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16069 28651 : if (Idx == VecIn.size())
16070 7880 : VecIn.push_back(ExtractedFromVec);
16071 :
16072 28651 : VectorMask[i] = Idx;
16073 : }
16074 :
16075 : // If we didn't find at least one input vector, bail out.
16076 235674 : if (VecIn.size() < 2)
16077 111226 : return SDValue();
16078 :
16079 : // If all the Operands of BUILD_VECTOR extract from same
16080 : // vector, then split the vector efficiently based on the maximum
16081 : // vector access index and adjust the VectorMask and
16082 : // VecIn accordingly.
16083 6611 : if (VecIn.size() == 2) {
16084 5817 : unsigned MaxIndex = 0;
16085 : unsigned NearestPow2 = 0;
16086 5817 : SDValue Vec = VecIn.back();
16087 5817 : EVT InVT = Vec.getValueType();
16088 5817 : MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16089 5817 : SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16090 :
16091 56436 : for (unsigned i = 0; i < NumElems; i++) {
16092 101238 : if (VectorMask[i] <= 0)
16093 27627 : continue;
16094 68976 : unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16095 22992 : IndexVec[i] = Index;
16096 22992 : MaxIndex = std::max(MaxIndex, Index);
16097 : }
16098 :
16099 5817 : NearestPow2 = PowerOf2Ceil(MaxIndex);
16100 5817 : if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16101 2981 : NumElems * 2 < NearestPow2) {
16102 462 : unsigned SplitSize = NearestPow2 / 2;
16103 462 : EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16104 462 : InVT.getVectorElementType(), SplitSize);
16105 462 : if (TLI.isTypeLegal(SplitVT)) {
16106 324 : SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16107 324 : DAG.getConstant(SplitSize, DL, IdxTy));
16108 324 : SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16109 324 : DAG.getConstant(0, DL, IdxTy));
16110 : VecIn.pop_back();
16111 324 : VecIn.push_back(VecIn1);
16112 324 : VecIn.push_back(VecIn2);
16113 :
16114 2850 : for (unsigned i = 0; i < NumElems; i++) {
16115 5052 : if (VectorMask[i] <= 0)
16116 : continue;
16117 3538 : VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16118 : }
16119 : }
16120 : }
16121 : }
16122 :
16123 : // TODO: We want to sort the vectors by descending length, so that adjacent
16124 : // pairs have similar length, and the longer vector is always first in the
16125 : // pair.
16126 :
16127 : // TODO: Should this fire if some of the input vectors has illegal type (like
16128 : // it does now), or should we let legalization run its course first?
16129 :
16130 : // Shuffle phase:
16131 : // Take pairs of vectors, and shuffle them so that the result has elements
16132 : // from these vectors in the correct places.
16133 : // For example, given:
16134 : // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16135 : // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16136 : // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16137 : // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16138 : // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16139 : // We will generate:
16140 : // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16141 : // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16142 : SmallVector<SDValue, 4> Shuffles;
16143 19172 : for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16144 6782 : unsigned LeftIdx = 2 * In + 1;
16145 6782 : SDValue VecLeft = VecIn[LeftIdx];
16146 : SDValue VecRight =
16147 6782 : (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16148 :
16149 6782 : if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16150 6782 : VecRight, LeftIdx))
16151 5950 : Shuffles.push_back(Shuffle);
16152 : else
16153 832 : return SDValue();
16154 : }
16155 :
16156 : // If we need the zero vector as an "ingredient" in the blend tree, add it
16157 : // to the list of shuffles.
16158 5779 : if (UsesZeroVector)
16159 737 : Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16160 158 : : DAG.getConstantFP(0.0, DL, VT));
16161 :
16162 : // If we only have one shuffle, we're done.
16163 5779 : if (Shuffles.size() == 1)
16164 5061 : return Shuffles[0];
16165 :
16166 : // Update the vector mask to point to the post-shuffle vectors.
16167 6310 : for (int &Vec : VectorMask)
16168 5592 : if (Vec == 0)
16169 2210 : Vec = Shuffles.size() - 1;
16170 : else
16171 3382 : Vec = (Vec - 1) / 2;
16172 :
16173 : // More than one shuffle. Generate a binary tree of blends, e.g. if from
16174 : // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16175 : // generate:
16176 : // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16177 : // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16178 : // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16179 : // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16180 : // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16181 : // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16182 : // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16183 :
16184 : // Make sure the initial size of the shuffle list is even.
16185 718 : if (Shuffles.size() % 2)
16186 2 : Shuffles.push_back(DAG.getUNDEF(VT));
16187 :
16188 1451 : for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16189 733 : if (CurSize % 2) {
16190 0 : Shuffles[CurSize] = DAG.getUNDEF(VT);
16191 0 : CurSize++;
16192 : }
16193 1485 : for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16194 752 : int Left = 2 * In;
16195 752 : int Right = 2 * In + 1;
16196 752 : SmallVector<int, 8> Mask(NumElems, -1);
16197 6872 : for (unsigned i = 0; i != NumElems; ++i) {
16198 12240 : if (VectorMask[i] == Left) {
16199 2792 : Mask[i] = i;
16200 2792 : VectorMask[i] = In;
16201 3328 : } else if (VectorMask[i] == Right) {
16202 2912 : Mask[i] = i + NumElems;
16203 2912 : VectorMask[i] = In;
16204 : }
16205 : }
16206 :
16207 752 : Shuffles[In] =
16208 3008 : DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16209 : }
16210 : }
16211 718 : return Shuffles[0];
16212 : }
16213 :
16214 : // Try to turn a build vector of zero extends of extract vector elts into a
16215 : // a vector zero extend and possibly an extract subvector.
16216 : // TODO: Support sign extend or any extend?
16217 : // TODO: Allow undef elements?
16218 : // TODO: Don't require the extracts to start at element 0.
16219 697793 : SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16220 697793 : if (LegalOperations)
16221 370717 : return SDValue();
16222 :
16223 327076 : EVT VT = N->getValueType(0);
16224 :
16225 327076 : SDValue Op0 = N->getOperand(0);
16226 : auto checkElem = [&](SDValue Op) -> int64_t {
16227 : if (Op.getOpcode() == ISD::ZERO_EXTEND &&
16228 : Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16229 : Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
16230 : if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
16231 : return C->getZExtValue();
16232 : return -1;
16233 327076 : };
16234 :
16235 : // Make sure the first element matches
16236 : // (zext (extract_vector_elt X, C))
16237 327076 : int64_t Offset = checkElem(Op0);
16238 327076 : if (Offset < 0)
16239 327057 : return SDValue();
16240 :
16241 19 : unsigned NumElems = N->getNumOperands();
16242 38 : SDValue In = Op0.getOperand(0).getOperand(0);
16243 19 : EVT InSVT = In.getValueType().getScalarType();
16244 19 : EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
16245 :
16246 : // Don't create an illegal input type after type legalization.
16247 19 : if (LegalTypes && !TLI.isTypeLegal(InVT))
16248 0 : return SDValue();
16249 :
16250 : // Ensure all the elements come from the same vector and are adjacent.
16251 53 : for (unsigned i = 1; i != NumElems; ++i) {
16252 80 : if ((Offset + i) != checkElem(N->getOperand(i)))
16253 6 : return SDValue();
16254 : }
16255 :
16256 : SDLoc DL(N);
16257 13 : In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
16258 26 : Op0.getOperand(0).getOperand(1));
16259 26 : return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
16260 : }
16261 :
16262 698113 : SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
16263 698113 : EVT VT = N->getValueType(0);
16264 :
16265 : // A vector built entirely of undefs is undef.
16266 698113 : if (ISD::allOperandsUndef(N))
16267 154 : return DAG.getUNDEF(VT);
16268 :
16269 : // If this is a splat of a bitcast from another vector, change to a
16270 : // concat_vector.
16271 : // For example:
16272 : // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
16273 : // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
16274 : //
16275 : // If X is a build_vector itself, the concat can become a larger build_vector.
16276 : // TODO: Maybe this is useful for non-splat too?
16277 697959 : if (!LegalOperations) {
16278 327242 : if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
16279 253623 : Splat = peekThroughBitcasts(Splat);
16280 507246 : EVT SrcVT = Splat.getValueType();
16281 253623 : if (SrcVT.isVector()) {
16282 9 : unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
16283 9 : EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
16284 9 : SrcVT.getVectorElementType(), NumElts);
16285 9 : if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
16286 8 : SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
16287 8 : SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
16288 8 : NewVT, Ops);
16289 8 : return DAG.getBitcast(VT, Concat);
16290 : }
16291 : }
16292 : }
16293 : }
16294 :
16295 : // Check if we can express BUILD VECTOR via subvector extract.
16296 697951 : if (!LegalTypes && (N->getNumOperands() > 1)) {
16297 230896 : SDValue Op0 = N->getOperand(0);
16298 : auto checkElem = [&](SDValue Op) -> uint64_t {
16299 : if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
16300 : (Op0.getOperand(0) == Op.getOperand(0)))
16301 : if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
16302 : return CNode->getZExtValue();
16303 : return -1;
16304 230896 : };
16305 :
16306 230896 : int Offset = checkElem(Op0);
16307 234797 : for (unsigned i = 0; i < N->getNumOperands(); ++i) {
16308 469254 : if (Offset + i != checkElem(N->getOperand(i))) {
16309 : Offset = -1;
16310 : break;
16311 : }
16312 : }
16313 :
16314 230896 : if ((Offset == 0) &&
16315 274 : (Op0.getOperand(0).getValueType() == N->getValueType(0)))
16316 158 : return Op0.getOperand(0);
16317 230805 : if ((Offset != -1) &&
16318 230963 : ((Offset % N->getValueType(0).getVectorNumElements()) ==
16319 : 0)) // IDX must be multiple of output size.
16320 67 : return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
16321 134 : Op0.getOperand(0), Op0.getOperand(1));
16322 : }
16323 :
16324 697793 : if (SDValue V = convertBuildVecZextToZext(N))
16325 13 : return V;
16326 :
16327 697780 : if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16328 1413 : return V;
16329 :
16330 696367 : if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
16331 18 : return V;
16332 :
16333 696349 : if (SDValue V = reduceBuildVecToShuffle(N))
16334 5779 : return V;
16335 :
16336 690570 : return SDValue();
16337 : }
16338 :
16339 26693 : static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16340 26693 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16341 53386 : EVT OpVT = N->getOperand(0).getValueType();
16342 :
16343 : // If the operands are legal vectors, leave them alone.
16344 : if (TLI.isTypeLegal(OpVT))
16345 22718 : return SDValue();
16346 :
16347 : SDLoc DL(N);
16348 7950 : EVT VT = N->getValueType(0);
16349 : SmallVector<SDValue, 8> Ops;
16350 :
16351 3975 : EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16352 3975 : SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16353 :
16354 : // Keep track of what we encounter.
16355 : bool AnyInteger = false;
16356 : bool AnyFP = false;
16357 4055 : for (const SDValue &Op : N->ops()) {
16358 8256 : if (ISD::BITCAST == Op.getOpcode() &&
16359 462 : !Op.getOperand(0).getValueType().isVector())
16360 66 : Ops.push_back(Op.getOperand(0));
16361 3963 : else if (ISD::UNDEF == Op.getOpcode())
16362 17 : Ops.push_back(ScalarUndef);
16363 : else
16364 3946 : return SDValue();
16365 :
16366 : // Note whether we encounter an integer or floating point scalar.
16367 : // If it's neither, bail out, it could be something weird like x86mmx.
16368 83 : EVT LastOpVT = Ops.back().getValueType();
16369 83 : if (LastOpVT.isFloatingPoint())
16370 : AnyFP = true;
16371 74 : else if (LastOpVT.isInteger())
16372 : AnyInteger = true;
16373 : else
16374 3 : return SDValue();
16375 : }
16376 :
16377 : // If any of the operands is a floating point scalar bitcast to a vector,
16378 : // use floating point types throughout, and bitcast everything.
16379 : // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16380 26 : if (AnyFP) {
16381 3 : SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16382 3 : ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16383 3 : if (AnyInteger) {
16384 10 : for (SDValue &Op : Ops) {
16385 16 : if (Op.getValueType() == SVT)
16386 5 : continue;
16387 3 : if (Op.isUndef())
16388 1 : Op = ScalarUndef;
16389 : else
16390 2 : Op = DAG.getBitcast(SVT, Op);
16391 : }
16392 : }
16393 : }
16394 :
16395 26 : EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16396 26 : VT.getSizeInBits() / SVT.getSizeInBits());
16397 26 : return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16398 : }
16399 :
16400 : // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16401 : // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16402 : // most two distinct vectors the same size as the result, attempt to turn this
16403 : // into a legal shuffle.
16404 12000 : static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16405 12000 : EVT VT = N->getValueType(0);
16406 24000 : EVT OpVT = N->getOperand(0).getValueType();
16407 12000 : int NumElts = VT.getVectorNumElements();
16408 12000 : int NumOpElts = OpVT.getVectorNumElements();
16409 :
16410 12000 : SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16411 : SmallVector<int, 8> Mask;
16412 :
16413 16285 : for (SDValue Op : N->ops()) {
16414 14900 : Op = peekThroughBitcasts(Op);
16415 :
16416 : // UNDEF nodes convert to UNDEF shuffle mask values.
16417 14900 : if (Op.isUndef()) {
16418 2775 : Mask.append((unsigned)NumOpElts, -1);
16419 2775 : continue;
16420 : }
16421 :
16422 12125 : if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16423 10144 : return SDValue();
16424 :
16425 : // What vector are we extracting the subvector from and at what index?
16426 1981 : SDValue ExtVec = Op.getOperand(0);
16427 :
16428 : // We want the EVT of the original extraction to correctly scale the
16429 : // extraction index.
16430 1981 : EVT ExtVT = ExtVec.getValueType();
16431 1981 : ExtVec = peekThroughBitcasts(ExtVec);
16432 :
16433 : // UNDEF nodes convert to UNDEF shuffle mask values.
16434 1981 : if (ExtVec.isUndef()) {
16435 0 : Mask.append((unsigned)NumOpElts, -1);
16436 0 : continue;
16437 : }
16438 :
16439 : if (!isa<ConstantSDNode>(Op.getOperand(1)))
16440 0 : return SDValue();
16441 1981 : int ExtIdx = Op.getConstantOperandVal(1);
16442 :
16443 : // Ensure that we are extracting a subvector from a vector the same
16444 : // size as the result.
16445 1981 : if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16446 471 : return SDValue();
16447 :
16448 : // Scale the subvector index to account for any bitcast.
16449 1510 : int NumExtElts = ExtVT.getVectorNumElements();
16450 1510 : if (0 == (NumExtElts % NumElts))
16451 1489 : ExtIdx /= (NumExtElts / NumElts);
16452 21 : else if (0 == (NumElts % NumExtElts))
16453 21 : ExtIdx *= (NumElts / NumExtElts);
16454 : else
16455 0 : return SDValue();
16456 :
16457 : // At most we can reference 2 inputs in the final shuffle.
16458 1510 : if (SV0.isUndef() || SV0 == ExtVec) {
16459 1428 : SV0 = ExtVec;
16460 7083 : for (int i = 0; i != NumOpElts; ++i)
16461 5655 : Mask.push_back(i + ExtIdx);
16462 82 : } else if (SV1.isUndef() || SV1 == ExtVec) {
16463 82 : SV1 = ExtVec;
16464 619 : for (int i = 0; i != NumOpElts; ++i)
16465 537 : Mask.push_back(i + ExtIdx + NumElts);
16466 : } else {
16467 0 : return SDValue();
16468 : }
16469 : }
16470 :
16471 2770 : if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16472 14 : return SDValue();
16473 :
16474 1371 : return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16475 4113 : DAG.getBitcast(VT, SV1), Mask);
16476 : }
16477 :
16478 30650 : SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16479 : // If we only have one input vector, we don't need to do any concatenation.
16480 30650 : if (N->getNumOperands() == 1)
16481 0 : return N->getOperand(0);
16482 :
16483 : // Check if all of the operands are undefs.
16484 30650 : EVT VT = N->getValueType(0);
16485 30650 : if (ISD::allOperandsUndef(N))
16486 2 : return DAG.getUNDEF(VT);
16487 :
16488 : // Optimize concat_vectors where all but the first of the vectors are undef.
16489 61296 : if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16490 0 : return Op.isUndef();
16491 : })) {
16492 7728 : SDValue In = N->getOperand(0);
16493 : assert(In.getValueType().isVector() && "Must concat vectors");
16494 :
16495 : // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
16496 8184 : if (In->getOpcode() == ISD::BITCAST &&
16497 8543 : !In->getOperand(0).getValueType().isVector()) {
16498 97 : SDValue Scalar = In->getOperand(0);
16499 :
16500 : // If the bitcast type isn't legal, it might be a trunc of a legal type;
16501 : // look through the trunc so we can still do the transform:
16502 : // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16503 97 : if (Scalar->getOpcode() == ISD::TRUNCATE &&
16504 97 : !TLI.isTypeLegal(Scalar.getValueType()) &&
16505 4 : TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16506 4 : Scalar = Scalar->getOperand(0);
16507 :
16508 97 : EVT SclTy = Scalar->getValueType(0);
16509 :
16510 97 : if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
16511 17 : return SDValue();
16512 :
16513 : // Bail out if the vector size is not a multiple of the scalar size.
16514 80 : if (VT.getSizeInBits() % SclTy.getSizeInBits())
16515 3 : return SDValue();
16516 :
16517 77 : unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
16518 77 : if (VNTNumElms < 2)
16519 0 : return SDValue();
16520 :
16521 77 : EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
16522 77 : if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
16523 5 : return SDValue();
16524 :
16525 72 : SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
16526 72 : return DAG.getBitcast(VT, Res);
16527 : }
16528 : }
16529 :
16530 : // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
16531 : // We have already tested above for an UNDEF only concatenation.
16532 : // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
16533 : // -> (BUILD_VECTOR A, B, ..., C, D, ...)
16534 : auto IsBuildVectorOrUndef = [](const SDValue &Op) {
16535 0 : return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
16536 : };
16537 30551 : if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
16538 : SmallVector<SDValue, 8> Opnds;
16539 3858 : EVT SVT = VT.getScalarType();
16540 :
16541 3858 : EVT MinVT = SVT;
16542 3858 : if (!SVT.isFloatingPoint()) {
16543 : // If BUILD_VECTOR are from built from integer, they may have different
16544 : // operand types. Get the smallest type and truncate all operands to it.
16545 : bool FoundMinVT = false;
16546 12226 : for (const SDValue &Op : N->ops())
16547 17408 : if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16548 6747 : EVT OpSVT = Op.getOperand(0).getValueType();
16549 9972 : MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
16550 : FoundMinVT = true;
16551 : }
16552 : assert(FoundMinVT && "Concat vector type mismatch");
16553 : }
16554 :
16555 13414 : for (const SDValue &Op : N->ops()) {
16556 19112 : EVT OpVT = Op.getValueType();
16557 : unsigned NumElts = OpVT.getVectorNumElements();
16558 :
16559 9556 : if (ISD::UNDEF == Op.getOpcode())
16560 2243 : Opnds.append(NumElts, DAG.getUNDEF(MinVT));
16561 :
16562 19112 : if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16563 7313 : if (SVT.isFloatingPoint()) {
16564 : assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
16565 566 : Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
16566 : } else {
16567 29584 : for (unsigned i = 0; i != NumElts; ++i)
16568 22837 : Opnds.push_back(
16569 68511 : DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
16570 : }
16571 : }
16572 : }
16573 :
16574 : assert(VT.getVectorNumElements() == Opnds.size() &&
16575 : "Concat vector type mismatch");
16576 7716 : return DAG.getBuildVector(VT, SDLoc(N), Opnds);
16577 : }
16578 :
16579 : // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
16580 26693 : if (SDValue V = combineConcatVectorOfScalars(N, DAG))
16581 26 : return V;
16582 :
16583 : // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
16584 26667 : if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16585 12000 : if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
16586 1371 : return V;
16587 :
16588 : // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
16589 : // nodes often generate nop CONCAT_VECTOR nodes.
16590 : // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
16591 : // place the incoming vectors at the exact same location.
16592 : SDValue SingleSource = SDValue();
16593 50592 : unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
16594 :
16595 25616 : for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16596 25589 : SDValue Op = N->getOperand(i);
16597 :
16598 25589 : if (Op.isUndef())
16599 : continue;
16600 :
16601 : // Check if this is the identity extract:
16602 25484 : if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16603 24554 : return SDValue();
16604 :
16605 : // Find the single incoming vector for the extract_subvector.
16606 930 : if (SingleSource.getNode()) {
16607 : if (Op.getOperand(0) != SingleSource)
16608 140 : return SDValue();
16609 : } else {
16610 777 : SingleSource = Op.getOperand(0);
16611 :
16612 : // Check the source type is the same as the type of the result.
16613 : // If not, this concat may extend the vector, so we can not
16614 : // optimize it away.
16615 777 : if (SingleSource.getValueType() != N->getValueType(0))
16616 573 : return SDValue();
16617 : }
16618 :
16619 217 : unsigned IdentityIndex = i * PartNumElem;
16620 : ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16621 : // The extract index must be constant.
16622 : if (!CS)
16623 0 : return SDValue();
16624 :
16625 : // Check that we are reading from the identity index.
16626 434 : if (CS->getZExtValue() != IdentityIndex)
16627 2 : return SDValue();
16628 : }
16629 :
16630 27 : if (SingleSource.getNode())
16631 27 : return SingleSource;
16632 :
16633 0 : return SDValue();
16634 : }
16635 :
16636 : /// If we are extracting a subvector produced by a wide binary operator with at
16637 : /// at least one operand that was the result of a vector concatenation, then try
16638 : /// to use the narrow vector operands directly to avoid the concatenation and
16639 : /// extraction.
16640 67764 : static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
16641 : // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
16642 : // some of these bailouts with other transforms.
16643 :
16644 : // The extract index must be a constant, so we can map it to a concat operand.
16645 67764 : auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16646 : if (!ExtractIndex)
16647 0 : return SDValue();
16648 :
16649 : // Only handle the case where we are doubling and then halving. A larger ratio
16650 : // may require more than two narrow binops to replace the wide binop.
16651 135528 : EVT VT = Extract->getValueType(0);
16652 : unsigned NumElems = VT.getVectorNumElements();
16653 : assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
16654 : "Extract index is not a multiple of the vector length.");
16655 67764 : if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
16656 9117 : return SDValue();
16657 :
16658 : // We are looking for an optionally bitcasted wide vector binary operator
16659 : // feeding an extract subvector.
16660 117294 : SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
16661 :
16662 : // TODO: The motivating case for this transform is an x86 AVX1 target. That
16663 : // target has temptingly almost legal versions of bitwise logic ops in 256-bit
16664 : // flavors, but no other 256-bit integer support. This could be extended to
16665 : // handle any binop, but that may require fixing/adding other folds to avoid
16666 : // codegen regressions.
16667 : unsigned BOpcode = BinOp.getOpcode();
16668 58647 : if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
16669 56085 : return SDValue();
16670 :
16671 : // The binop must be a vector type, so we can chop it in half.
16672 5124 : EVT WideBVT = BinOp.getValueType();
16673 2562 : if (!WideBVT.isVector())
16674 26 : return SDValue();
16675 :
16676 : // Bail out if the target does not support a narrower version of the binop.
16677 2536 : EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
16678 2536 : WideBVT.getVectorNumElements() / 2);
16679 2536 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16680 2536 : if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
16681 14 : return SDValue();
16682 :
16683 2522 : SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
16684 2522 : SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
16685 :
16686 : // We need at least one concatenation operation of a binop operand to make
16687 : // this transform worthwhile. The concat must double the input vector sizes.
16688 : // TODO: Should we also handle INSERT_SUBVECTOR patterns?
16689 : bool ConcatL =
16690 2522 : LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
16691 : bool ConcatR =
16692 2522 : RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
16693 2522 : if (!ConcatL && !ConcatR)
16694 2377 : return SDValue();
16695 :
16696 : // If one of the binop operands was not the result of a concat, we must
16697 : // extract a half-sized operand for our new narrow binop. We can't just reuse
16698 : // the original extract index operand because we may have bitcasted.
16699 290 : unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
16700 145 : unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
16701 290 : EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
16702 : SDLoc DL(Extract);
16703 :
16704 : // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
16705 : // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
16706 : // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
16707 103 : SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
16708 : : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16709 : BinOp.getOperand(0),
16710 145 : DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16711 :
16712 74 : SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
16713 : : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16714 : BinOp.getOperand(1),
16715 219 : DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16716 :
16717 145 : SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
16718 145 : return DAG.getBitcast(VT, NarrowBinOp);
16719 : }
16720 :
16721 : /// If we are extracting a subvector from a wide vector load, convert to a
16722 : /// narrow load to eliminate the extraction:
16723 : /// (extract_subvector (load wide vector)) --> (load narrow vector)
16724 70710 : static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
16725 : // TODO: Add support for big-endian. The offset calculation must be adjusted.
16726 70710 : if (DAG.getDataLayout().isBigEndian())
16727 525 : return SDValue();
16728 :
16729 : // TODO: The one-use check is overly conservative. Check the cost of the
16730 : // extract instead or remove that condition entirely.
16731 70185 : auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
16732 : auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16733 70406 : if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
16734 : !ExtIdx)
16735 69975 : return SDValue();
16736 :
16737 : // The narrow load will be offset from the base address of the old load if
16738 : // we are extracting from something besides index 0 (little-endian).
16739 420 : EVT VT = Extract->getValueType(0);
16740 : SDLoc DL(Extract);
16741 210 : SDValue BaseAddr = Ld->getOperand(1);
16742 420 : unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
16743 :
16744 : // TODO: Use "BaseIndexOffset" to make this more effective.
16745 210 : SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
16746 210 : MachineFunction &MF = DAG.getMachineFunction();
16747 210 : MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
16748 : VT.getStoreSize());
16749 210 : SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
16750 210 : DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
16751 210 : return NewLd;
16752 : }
16753 :
16754 74221 : SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
16755 74221 : EVT NVT = N->getValueType(0);
16756 74221 : SDValue V = N->getOperand(0);
16757 :
16758 : // Extract from UNDEF is UNDEF.
16759 74221 : if (V.isUndef())
16760 3 : return DAG.getUNDEF(NVT);
16761 :
16762 74218 : if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
16763 70710 : if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
16764 210 : return NarrowLoad;
16765 :
16766 : // Combine:
16767 : // (extract_subvec (concat V1, V2, ...), i)
16768 : // Into:
16769 : // Vi if possible
16770 : // Only operand 0 is checked as 'concat' assumes all inputs of the same
16771 : // type.
16772 74008 : if (V->getOpcode() == ISD::CONCAT_VECTORS &&
16773 74008 : isa<ConstantSDNode>(N->getOperand(1)) &&
16774 5654 : V->getOperand(0).getValueType() == NVT) {
16775 1846 : unsigned Idx = N->getConstantOperandVal(1);
16776 : unsigned NumElems = NVT.getVectorNumElements();
16777 : assert((Idx % NumElems) == 0 &&
16778 : "IDX in concat is not a multiple of the result vector length.");
16779 3692 : return V->getOperand(Idx / NumElems);
16780 : }
16781 :
16782 72162 : V = peekThroughBitcasts(V);
16783 :
16784 : // If the input is a build vector. Try to make a smaller build vector.
16785 72162 : if (V->getOpcode() == ISD::BUILD_VECTOR) {
16786 1892 : if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
16787 1892 : EVT InVT = V->getValueType(0);
16788 1892 : unsigned ExtractSize = NVT.getSizeInBits();
16789 : unsigned EltSize = InVT.getScalarSizeInBits();
16790 : // Only do this if we won't split any elements.
16791 1892 : if (ExtractSize % EltSize == 0) {
16792 1891 : unsigned NumElems = ExtractSize / EltSize;
16793 1891 : EVT EltVT = InVT.getVectorElementType();
16794 : EVT ExtractVT = NumElems == 1 ? EltVT :
16795 1891 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
16796 120 : if ((Level < AfterLegalizeDAG ||
16797 : (NumElems == 1 ||
16798 1894 : TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
16799 1888 : (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
16800 1888 : unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
16801 : EltSize;
16802 1888 : if (NumElems == 1) {
16803 268 : SDValue Src = V->getOperand(IdxVal);
16804 0 : if (EltVT != Src.getValueType())
16805 0 : Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
16806 :
16807 134 : return DAG.getBitcast(NVT, Src);
16808 : }
16809 :
16810 : // Extract the pieces from the original build_vector.
16811 3508 : SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
16812 1754 : makeArrayRef(V->op_begin() + IdxVal,
16813 3508 : NumElems));
16814 1754 : return DAG.getBitcast(NVT, BuildVec);
16815 : }
16816 : }
16817 : }
16818 : }
16819 :
16820 70274 : if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
16821 : // Handle only simple case where vector being inserted and vector
16822 : // being extracted are of same size.
16823 2510 : EVT SmallVT = V->getOperand(1).getValueType();
16824 2510 : if (!NVT.bitsEq(SmallVT))
16825 499 : return SDValue();
16826 :
16827 : // Only handle cases where both indexes are constants.
16828 2011 : ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
16829 : ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
16830 :
16831 2011 : if (InsIdx && ExtIdx) {
16832 : // Combine:
16833 : // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
16834 : // Into:
16835 : // indices are equal or bit offsets are equal => V1
16836 : // otherwise => (extract_subvec V1, ExtIdx)
16837 2011 : if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
16838 4022 : ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
16839 3352 : return DAG.getBitcast(NVT, V->getOperand(1));
16840 335 : return DAG.getNode(
16841 335 : ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
16842 335 : DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
16843 1675 : N->getOperand(1));
16844 : }
16845 : }
16846 :
16847 67764 : if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
16848 145 : return NarrowBOp;
16849 :
16850 67619 : if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16851 214 : return SDValue(N, 0);
16852 :
16853 67405 : return SDValue();
16854 : }
16855 :
16856 : // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
16857 : // or turn a shuffle of a single concat into simpler shuffle then concat.
16858 2336 : static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
16859 4672 : EVT VT = N->getValueType(0);
16860 : unsigned NumElts = VT.getVectorNumElements();
16861 :
16862 2336 : SDValue N0 = N->getOperand(0);
16863 2336 : SDValue N1 = N->getOperand(1);
16864 : ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16865 :
16866 : SmallVector<SDValue, 4> Ops;
16867 4672 : EVT ConcatVT = N0.getOperand(0).getValueType();
16868 : unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
16869 2336 : unsigned NumConcats = NumElts / NumElemsPerConcat;
16870 :
16871 : // Special case: shuffle(concat(A,B)) can be more efficiently represented
16872 : // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
16873 : // half vector elements.
16874 3626 : if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
16875 2580 : std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
16876 3106 : SVN->getMask().end(), [](int i) { return i == -1; })) {
16877 520 : N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
16878 520 : makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
16879 520 : N1 = DAG.getUNDEF(ConcatVT);
16880 1040 : return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
16881 : }
16882 :
16883 : // Look at every vector that's inserted. We're looking for exact
16884 : // subvector-sized copies from a concatenated vector
16885 3744 : for (unsigned I = 0; I != NumConcats; ++I) {
16886 : // Make sure we're dealing with a copy.
16887 3361 : unsigned Begin = I * NumElemsPerConcat;
16888 : bool AllUndef = true, NoUndef = true;
16889 26092 : for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
16890 45462 : if (SVN->getMaskElt(J) >= 0)
16891 : AllUndef = false;
16892 : else
16893 : NoUndef = false;
16894 : }
16895 :
16896 3361 : if (NoUndef) {
16897 4644 : if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
16898 255 : return SDValue();
16899 :
16900 8047 : for (unsigned J = 1; J != NumElemsPerConcat; ++J)
16901 21153 : if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
16902 1071 : return SDValue();
16903 :
16904 996 : unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
16905 996 : if (FirstElt < N0.getNumOperands())
16906 956 : Ops.push_back(N0.getOperand(FirstElt));
16907 : else
16908 80 : Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
16909 :
16910 1039 : } else if (AllUndef) {
16911 1864 : Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
16912 : } else { // Mixed with general masks and undefs, can't do optimization.
16913 107 : return SDValue();
16914 : }
16915 : }
16916 :
16917 766 : return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16918 : }
16919 :
16920 : // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16921 : // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16922 : //
16923 : // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
16924 : // a simplification in some sense, but it isn't appropriate in general: some
16925 : // BUILD_VECTORs are substantially cheaper than others. The general case
16926 : // of a BUILD_VECTOR requires inserting each element individually (or
16927 : // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
16928 : // all constants is a single constant pool load. A BUILD_VECTOR where each
16929 : // element is identical is a splat. A BUILD_VECTOR where most of the operands
16930 : // are undef lowers to a small number of element insertions.
16931 : //
16932 : // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
16933 : // We don't fold shuffles where one side is a non-zero constant, and we don't
16934 : // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
16935 : // non-constant operands. This seems to work out reasonably well in practice.
16936 43114 : static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
16937 : SelectionDAG &DAG,
16938 : const TargetLowering &TLI) {
16939 86228 : EVT VT = SVN->getValueType(0);
16940 : unsigned NumElts = VT.getVectorNumElements();
16941 43114 : SDValue N0 = SVN->getOperand(0);
16942 43114 : SDValue N1 = SVN->getOperand(1);
16943 :
16944 75026 : if (!N0->hasOneUse() || !N1->hasOneUse())
16945 17728 : return SDValue();
16946 :
16947 : // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
16948 : // discussed above.
16949 25386 : if (!N1.isUndef()) {
16950 12717 : bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
16951 12717 : bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
16952 12717 : if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
16953 27 : return SDValue();
16954 12690 : if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
16955 31 : return SDValue();
16956 : }
16957 :
16958 : // If both inputs are splats of the same value then we can safely merge this
16959 : // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
16960 : bool IsSplat = false;
16961 : auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
16962 : auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
16963 25328 : if (BV0 && BV1)
16964 73 : if (SDValue Splat0 = BV0->getSplatValue())
16965 110 : IsSplat = (Splat0 == BV1->getSplatValue());
16966 :
16967 : SmallVector<SDValue, 8> Ops;
16968 25328 : SmallSet<SDValue, 16> DuplicateOps;
16969 36888 : for (int M : SVN->getMask()) {
16970 36329 : SDValue Op = DAG.getUNDEF(VT.getScalarType());
16971 36329 : if (M >= 0) {
16972 29541 : int Idx = M < (int)NumElts ? M : M - NumElts;
16973 29541 : SDValue &S = (M < (int)NumElts ? N0 : N1);
16974 59082 : if (S.getOpcode() == ISD::BUILD_VECTOR) {
16975 9412 : Op = S.getOperand(Idx);
16976 24835 : } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16977 : assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
16978 184 : Op = S.getOperand(0);
16979 : } else {
16980 : // Operand can't be combined - bail out.
16981 24651 : return SDValue();
16982 : }
16983 : }
16984 :
16985 : // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
16986 : // generating a splat; semantically, this is fine, but it's likely to
16987 : // generate low-quality code if the target can't reconstruct an appropriate
16988 : // shuffle.
16989 23356 : if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
16990 2505 : if (!IsSplat && !DuplicateOps.insert(Op).second)
16991 118 : return SDValue();
16992 :
16993 11560 : Ops.push_back(Op);
16994 : }
16995 :
16996 : // BUILD_VECTOR requires all inputs to be of the same type, find the
16997 : // maximum type and extend them all.
16998 559 : EVT SVT = VT.getScalarType();
16999 559 : if (SVT.isInteger())
17000 7103 : for (SDValue &Op : Ops)
17001 13096 : SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
17002 559 : if (SVT != VT.getScalarType())
17003 70 : for (SDValue &Op : Ops)
17004 128 : Op = TLI.isZExtFree(Op.getValueType(), SVT)
17005 128 : ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
17006 128 : : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
17007 1118 : return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
17008 : }
17009 :
17010 : // Match shuffles that can be converted to any_vector_extend_in_reg.
17011 : // This is often generated during legalization.
17012 : // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17013 : // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17014 66089 : static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
17015 : SelectionDAG &DAG,
17016 : const TargetLowering &TLI,
17017 : bool LegalOperations,
17018 : bool LegalTypes) {
17019 66089 : EVT VT = SVN->getValueType(0);
17020 66089 : bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17021 :
17022 : // TODO Add support for big-endian when we have a test case.
17023 66089 : if (!VT.isInteger() || IsBigEndian)
17024 14816 : return SDValue();
17025 :
17026 51273 : unsigned NumElts = VT.getVectorNumElements();
17027 : unsigned EltSizeInBits = VT.getScalarSizeInBits();
17028 51273 : ArrayRef<int> Mask = SVN->getMask();
17029 51273 : SDValue N0 = SVN->getOperand(0);
17030 :
17031 : // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17032 : auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
17033 220318 : for (unsigned i = 0; i != NumElts; ++i) {
17034 436044 : if (Mask[i] < 0)
17035 : continue;
17036 185748 : if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
17037 : continue;
17038 : return false;
17039 : }
17040 : return true;
17041 : };
17042 :
17043 : // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17044 : // power-of-2 extensions as they are the most likely.
17045 171646 : for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
17046 : // Check for non power of 2 vector sizes
17047 120659 : if (NumElts % Scale != 0)
17048 118363 : continue;
17049 120640 : if (!isAnyExtend(Scale))
17050 : continue;
17051 :
17052 2296 : EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17053 2296 : EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17054 2296 : if (!LegalTypes || TLI.isTypeLegal(OutVT))
17055 2288 : if (!LegalOperations ||
17056 : TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
17057 : return DAG.getBitcast(VT,
17058 572 : DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
17059 : }
17060 :
17061 50987 : return SDValue();
17062 : }
17063 :
17064 : // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17065 : // each source element of a large type into the lowest elements of a smaller
17066 : // destination type. This is often generated during legalization.
17067 : // If the source node itself was a '*_extend_vector_inreg' node then we should
17068 : // then be able to remove it.
17069 65803 : static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
17070 : SelectionDAG &DAG) {
17071 65803 : EVT VT = SVN->getValueType(0);
17072 65803 : bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17073 :
17074 : // TODO Add support for big-endian when we have a test case.
17075 65803 : if (!VT.isInteger() || IsBigEndian)
17076 14816 : return SDValue();
17077 :
17078 101974 : SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17079 :
17080 50987 : unsigned Opcode = N0.getOpcode();
17081 50987 : if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17082 50987 : Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17083 : Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
17084 50878 : return SDValue();
17085 :
17086 109 : SDValue N00 = N0.getOperand(0);
17087 109 : ArrayRef<int> Mask = SVN->getMask();
17088 109 : unsigned NumElts = VT.getVectorNumElements();
17089 : unsigned EltSizeInBits = VT.getScalarSizeInBits();
17090 109 : unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17091 109 : unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17092 :
17093 109 : if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17094 0 : return SDValue();
17095 109 : unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17096 :
17097 : // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17098 : // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17099 : // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17100 : auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17101 39 : for (unsigned i = 0; i != NumElts; ++i) {
17102 76 : if (Mask[i] < 0)
17103 : continue;
17104 32 : if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17105 : continue;
17106 : return false;
17107 : }
17108 : return true;
17109 : };
17110 :
17111 : // At the moment we just handle the case where we've truncated back to the
17112 : // same size as before the extension.
17113 : // TODO: handle more extension/truncation cases as cases arise.
17114 109 : if (EltSizeInBits != ExtSrcSizeInBits)
17115 91 : return SDValue();
17116 :
17117 : // We can remove *extend_vector_inreg only if the truncation happens at
17118 : // the same scale as the extension.
17119 18 : if (isTruncate(ExtScale))
17120 1 : return DAG.getBitcast(VT, N00);
17121 :
17122 17 : return SDValue();
17123 : }
17124 :
17125 : // Combine shuffles of splat-shuffles of the form:
17126 : // shuffle (shuffle V, undef, splat-mask), undef, M
17127 : // If splat-mask contains undef elements, we need to be careful about
17128 : // introducing undef's in the folded mask which are not the result of composing
17129 : // the masks of the shuffles.
17130 101 : static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
17131 : ShuffleVectorSDNode *Splat,
17132 : SelectionDAG &DAG) {
17133 101 : ArrayRef<int> SplatMask = Splat->getMask();
17134 : assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
17135 :
17136 : // Prefer simplifying to the splat-shuffle, if possible. This is legal if
17137 : // every undef mask element in the splat-shuffle has a corresponding undef
17138 : // element in the user-shuffle's mask or if the composition of mask elements
17139 : // would result in undef.
17140 : // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
17141 : // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
17142 : // In this case it is not legal to simplify to the splat-shuffle because we
17143 : // may be exposing the users of the shuffle an undef element at index 1
17144 : // which was not there before the combine.
17145 : // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
17146 : // In this case the composition of masks yields SplatMask, so it's ok to
17147 : // simplify to the splat-shuffle.
17148 : // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
17149 : // In this case the composed mask includes all undef elements of SplatMask
17150 : // and in addition sets element zero to undef. It is safe to simplify to
17151 : // the splat-shuffle.
17152 : auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
17153 : ArrayRef<int> SplatMask) {
17154 660 : for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
17155 1234 : if (UserMask[i] != -1 && SplatMask[i] == -1 &&
17156 392 : SplatMask[UserMask[i]] != -1)
17157 : return false;
17158 : return true;
17159 : };
17160 101 : if (CanSimplifyToExistingSplat(UserMask, SplatMask))
17161 43 : return SDValue(Splat, 0);
17162 :
17163 : // Create a new shuffle with a mask that is composed of the two shuffles'
17164 : // masks.
17165 : SmallVector<int, 32> NewMask;
17166 402 : for (int Idx : UserMask)
17167 344 : NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
17168 :
17169 58 : return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
17170 58 : Splat->getOperand(0), Splat->getOperand(1),
17171 174 : NewMask);
17172 : }
17173 :
17174 : /// If the shuffle mask is taking exactly one element from the first vector
17175 : /// operand and passing through all other elements from the second vector
17176 : /// operand, return the index of the mask element that is choosing an element
17177 : /// from the first operand. Otherwise, return -1.
17178 : static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
17179 68101 : int MaskSize = Mask.size();
17180 : int EltFromOp0 = -1;
17181 : // TODO: This does not match if there are undef elements in the shuffle mask.
17182 : // Should we ignore undefs in the shuffle mask instead? The trade-off is
17183 : // removing an instruction (a shuffle), but losing the knowledge that some
17184 : // vector lanes are not needed.
17185 309007 : for (int i = 0; i != MaskSize; ++i) {
17186 609474 : if (Mask[i] >= 0 && Mask[i] < MaskSize) {
17187 : // We're looking for a shuffle of exactly one element from operand 0.
17188 143438 : if (EltFromOp0 != -1)
17189 : return -1;
17190 : EltFromOp0 = i;
17191 161299 : } else if (Mask[i] != i + MaskSize) {
17192 : // Nothing from operand 1 can change lanes.
17193 : return -1;
17194 : }
17195 : }
17196 : return EltFromOp0;
17197 : }
17198 :
17199 : /// If a shuffle inserts exactly one element from a source vector operand into
17200 : /// another vector operand and we can access the specified element as a scalar,
17201 : /// then we can eliminate the shuffle.
17202 70737 : static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
17203 : SelectionDAG &DAG) {
17204 : // First, check if we are taking one element of a vector and shuffling that
17205 : // element into another vector.
17206 70737 : ArrayRef<int> Mask = Shuf->getMask();
17207 : SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
17208 70737 : SDValue Op0 = Shuf->getOperand(0);
17209 70737 : SDValue Op1 = Shuf->getOperand(1);
17210 : int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
17211 70737 : if (ShufOp0Index == -1) {
17212 : // Commute mask and check again.
17213 : ShuffleVectorSDNode::commuteMask(CommutedMask);
17214 : ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
17215 68101 : if (ShufOp0Index == -1)
17216 66467 : return SDValue();
17217 : // Commute operands to match the commuted shuffle mask.
17218 : std::swap(Op0, Op1);
17219 : Mask = CommutedMask;
17220 : }
17221 :
17222 : // The shuffle inserts exactly one element from operand 0 into operand 1.
17223 : // Now see if we can access that element as a scalar via a real insert element
17224 : // instruction.
17225 : // TODO: We can try harder to locate the element as a scalar. Examples: it
17226 : // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
17227 : assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
17228 : "Shuffle mask value must be from operand 0");
17229 4270 : if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
17230 3911 : return SDValue();
17231 :
17232 : auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
17233 1077 : if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
17234 7 : return SDValue();
17235 :
17236 : // There's an existing insertelement with constant insertion index, so we
17237 : // don't need to check the legality/profitability of a replacement operation
17238 : // that differs at most in the constant value. The target should be able to
17239 : // lower any of those in a similar way. If not, legalization will expand this
17240 : // to a scalar-to-vector plus shuffle.
17241 : //
17242 : // Note that the shuffle may move the scalar from the position that the insert
17243 : // element used. Therefore, our new insert element occurs at the shuffle's
17244 : // mask index value, not the insert's index value.
17245 : // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
17246 352 : SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
17247 352 : Op0.getOperand(2).getValueType());
17248 352 : return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
17249 704 : Op1, Op0.getOperand(1), NewInsIndex);
17250 : }
17251 :
17252 70837 : SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
17253 141674 : EVT VT = N->getValueType(0);
17254 : unsigned NumElts = VT.getVectorNumElements();
17255 :
17256 70837 : SDValue N0 = N->getOperand(0);
17257 70837 : SDValue N1 = N->getOperand(1);
17258 :
17259 : assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
17260 :
17261 : // Canonicalize shuffle undef, undef -> undef
17262 70837 : if (N0.isUndef() && N1.isUndef())
17263 2 : return DAG.getUNDEF(VT);
17264 :
17265 : ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17266 :
17267 : // Canonicalize shuffle v, v -> v, undef
17268 : if (N0 == N1) {
17269 : SmallVector<int, 8> NewMask;
17270 189 : for (unsigned i = 0; i != NumElts; ++i) {
17271 160 : int Idx = SVN->getMaskElt(i);
17272 160 : if (Idx >= (int)NumElts) Idx -= NumElts;
17273 160 : NewMask.push_back(Idx);
17274 : }
17275 116 : return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
17276 : }
17277 :
17278 : // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
17279 70806 : if (N0.isUndef())
17280 25 : return DAG.getCommutedVectorShuffle(*SVN);
17281 :
17282 : // Remove references to rhs if it is undef
17283 70781 : if (N1.isUndef()) {
17284 : bool Changed = false;
17285 : SmallVector<int, 8> NewMask;
17286 871280 : for (unsigned i = 0; i != NumElts; ++i) {
17287 827741 : int Idx = SVN->getMaskElt(i);
17288 827741 : if (Idx >= (int)NumElts) {
17289 127 : Idx = -1;
17290 : Changed = true;
17291 : }
17292 827741 : NewMask.push_back(Idx);
17293 : }
17294 43539 : if (Changed)
17295 88 : return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
17296 : }
17297 :
17298 70737 : if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
17299 352 : return InsElt;
17300 :
17301 : // A shuffle of a single vector that is a splat can always be folded.
17302 : if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
17303 6831 : if (N1->isUndef() && N0Shuf->isSplat())
17304 101 : return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
17305 :
17306 : // If it is a splat, check if the argument vector is another splat or a
17307 : // build_vector.
17308 70284 : if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
17309 : SDNode *V = N0.getNode();
17310 :
17311 : // If this is a bit convert that changes the element type of the vector but
17312 : // not the number of vector elements, look through it. Be careful not to
17313 : // look though conversions that change things like v4f32 to v2f64.
17314 12865 : if (V->getOpcode() == ISD::BITCAST) {
17315 698 : SDValue ConvInput = V->getOperand(0);
17316 2033 : if (ConvInput.getValueType().isVector() &&
17317 1335 : ConvInput.getValueType().getVectorNumElements() == NumElts)
17318 : V = ConvInput.getNode();
17319 : }
17320 :
17321 12865 : if (V->getOpcode() == ISD::BUILD_VECTOR) {
17322 : assert(V->getNumOperands() == NumElts &&
17323 : "BUILD_VECTOR has wrong number of operands");
17324 : SDValue Base;
17325 : bool AllSame = true;
17326 2457 : for (unsigned i = 0; i != NumElts; ++i) {
17327 7371 : if (!V->getOperand(i).isUndef()) {
17328 2411 : Base = V->getOperand(i);
17329 2411 : break;
17330 : }
17331 : }
17332 : // Splat of <u, u, u, u>, return <u, u, u, u>
17333 2411 : if (!Base.getNode())
17334 0 : return N0;
17335 4819 : for (unsigned i = 0; i != NumElts; ++i) {
17336 4816 : if (V->getOperand(i) != Base) {
17337 : AllSame = false;
17338 : break;
17339 : }
17340 : }
17341 : // Splat of <x, x, x, x>, return <x, x, x, x>
17342 2411 : if (AllSame)
17343 3 : return N0;
17344 :
17345 : // Canonicalize any other splat as a build_vector.
17346 2408 : const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17347 2408 : SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17348 2424 : SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17349 :
17350 : // We may have jumped through bitcasts, so the type of the
17351 : // BUILD_VECTOR may not match the type of the shuffle.
17352 4820 : if (V->getValueType(0) != VT)
17353 0 : NewBV = DAG.getBitcast(VT, NewBV);
17354 2408 : return NewBV;
17355 : }
17356 : }
17357 :
17358 : // Simplify source operands based on shuffle mask.
17359 67873 : if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17360 1784 : return SDValue(N, 0);
17361 :
17362 : // Match shuffles that can be converted to any_vector_extend_in_reg.
17363 66089 : if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
17364 286 : return V;
17365 :
17366 : // Combine "truncate_vector_in_reg" style shuffles.
17367 65803 : if (SDValue V = combineTruncationShuffle(SVN, DAG))
17368 1 : return V;
17369 :
17370 2988 : if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17371 65802 : Level < AfterLegalizeVectorOps &&
17372 663 : (N1.isUndef() ||
17373 0 : (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17374 720 : N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17375 2336 : if (SDValue V = partitionShuffleOfConcats(N, DAG))
17376 903 : return V;
17377 : }
17378 :
17379 : // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17380 : // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17381 64899 : if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17382 43114 : if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17383 559 : return Res;
17384 :
17385 : // If this shuffle only has a single input that is a bitcasted shuffle,
17386 : // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17387 : // back to their original types.
17388 11777 : if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17389 70209 : N1.isUndef() && Level < AfterLegalizeVectorOps &&
17390 2552 : TLI.isTypeLegal(VT)) {
17391 : auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17392 : if (Scale == 1)
17393 : return SmallVector<int, 8>(Mask.begin(), Mask.end());
17394 :
17395 : SmallVector<int, 8> NewMask;
17396 : for (int M : Mask)
17397 : for (int s = 0; s != Scale; ++s)
17398 : NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17399 : return NewMask;
17400 : };
17401 :
17402 2510 : SDValue BC0 = peekThroughOneUseBitcasts(N0);
17403 2510 : if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17404 104 : EVT SVT = VT.getScalarType();
17405 104 : EVT InnerVT = BC0->getValueType(0);
17406 104 : EVT InnerSVT = InnerVT.getScalarType();
17407 :
17408 : // Determine which shuffle works with the smaller scalar type.
17409 104 : EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17410 104 : EVT ScaleSVT = ScaleVT.getScalarType();
17411 :
17412 104 : if (TLI.isTypeLegal(ScaleVT) &&
17413 104 : 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17414 104 : 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17415 104 : int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17416 104 : int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17417 :
17418 : // Scale the shuffle masks to the smaller scalar type.
17419 : ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17420 : SmallVector<int, 8> InnerMask =
17421 104 : ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17422 : SmallVector<int, 8> OuterMask =
17423 104 : ScaleShuffleMask(SVN->getMask(), OuterScale);
17424 :
17425 : // Merge the shuffle masks.
17426 : SmallVector<int, 8> NewMask;
17427 1340 : for (int M : OuterMask)
17428 1236 : NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17429 :
17430 : // Test for shuffle mask legality over both commutations.
17431 104 : SDValue SV0 = BC0->getOperand(0);
17432 104 : SDValue SV1 = BC0->getOperand(1);
17433 208 : bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17434 104 : if (!LegalMask) {
17435 : std::swap(SV0, SV1);
17436 : ShuffleVectorSDNode::commuteMask(NewMask);
17437 4 : LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17438 : }
17439 :
17440 104 : if (LegalMask) {
17441 102 : SV0 = DAG.getBitcast(ScaleVT, SV0);
17442 102 : SV1 = DAG.getBitcast(ScaleVT, SV1);
17443 102 : return DAG.getBitcast(
17444 204 : VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17445 : }
17446 : }
17447 : }
17448 : }
17449 :
17450 : // Canonicalize shuffles according to rules:
17451 : // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17452 : // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17453 : // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17454 1951 : if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17455 65052 : N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17456 782 : TLI.isTypeLegal(VT)) {
17457 : // The incoming shuffle must be of the same type as the result of the
17458 : // current shuffle.
17459 : assert(N1->getOperand(0).getValueType() == VT &&
17460 : "Shuffle types don't match");
17461 :
17462 778 : SDValue SV0 = N1->getOperand(0);
17463 778 : SDValue SV1 = N1->getOperand(1);
17464 : bool HasSameOp0 = N0 == SV0;
17465 : bool IsSV1Undef = SV1.isUndef();
17466 778 : if (HasSameOp0 || IsSV1Undef || N0 == SV1)
17467 : // Commute the operands of this shuffle so that next rule
17468 : // will trigger.
17469 500 : return DAG.getCommutedVectorShuffle(*SVN);
17470 : }
17471 :
17472 : // Try to fold according to rules:
17473 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17474 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17475 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17476 : // Don't try to fold shuffles with illegal type.
17477 : // Only fold if this shuffle is the only user of the other shuffle.
17478 4348 : if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
17479 66888 : Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
17480 : ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
17481 :
17482 : // Don't try to fold splats; they're likely to simplify somehow, or they
17483 : // might be free.
17484 3008 : if (OtherSV->isSplat())
17485 95 : return SDValue();
17486 :
17487 : // The incoming shuffle must be of the same type as the result of the
17488 : // current shuffle.
17489 : assert(OtherSV->getOperand(0).getValueType() == VT &&
17490 : "Shuffle types don't match");
17491 :
17492 : SDValue SV0, SV1;
17493 : SmallVector<int, 4> Mask;
17494 : // Compute the combined shuffle mask for a shuffle with SV0 as the first
17495 : // operand, and SV1 as the second operand.
17496 26353 : for (unsigned i = 0; i != NumElts; ++i) {
17497 24118 : int Idx = SVN->getMaskElt(i);
17498 24118 : if (Idx < 0) {
17499 : // Propagate Undef.
17500 4026 : Mask.push_back(Idx);
17501 14593 : continue;
17502 : }
17503 :
17504 : SDValue CurrentVec;
17505 20092 : if (Idx < (int)NumElts) {
17506 : // This shuffle index refers to the inner shuffle N0. Lookup the inner
17507 : // shuffle mask to identify which vector is actually referenced.
17508 12189 : Idx = OtherSV->getMaskElt(Idx);
17509 12189 : if (Idx < 0) {
17510 : // Propagate Undef.
17511 200 : Mask.push_back(Idx);
17512 200 : continue;
17513 : }
17514 :
17515 14576 : CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
17516 2587 : : OtherSV->getOperand(1);
17517 : } else {
17518 : // This shuffle index references an element within N1.
17519 7903 : CurrentVec = N1;
17520 : }
17521 :
17522 : // Simple case where 'CurrentVec' is UNDEF.
17523 19892 : if (CurrentVec.isUndef()) {
17524 16 : Mask.push_back(-1);
17525 16 : continue;
17526 : }
17527 :
17528 : // Canonicalize the shuffle index. We don't know yet if CurrentVec
17529 : // will be the first or second operand of the combined shuffle.
17530 19876 : Idx = Idx % NumElts;
17531 19876 : if (!SV0.getNode() || SV0 == CurrentVec) {
17532 : // Ok. CurrentVec is the left hand side.
17533 : // Update the mask accordingly.
17534 10351 : SV0 = CurrentVec;
17535 10351 : Mask.push_back(Idx);
17536 10351 : continue;
17537 : }
17538 :
17539 : // Bail out if we cannot convert the shuffle pair into a single shuffle.
17540 9525 : if (SV1.getNode() && SV1 != CurrentVec)
17541 678 : return SDValue();
17542 :
17543 : // Ok. CurrentVec is the right hand side.
17544 : // Update the mask accordingly.
17545 8847 : SV1 = CurrentVec;
17546 8847 : Mask.push_back(Idx + NumElts);
17547 : }
17548 :
17549 : // Check if all indices in Mask are Undef. In case, propagate Undef.
17550 : bool isUndefMask = true;
17551 4629 : for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
17552 4788 : isUndefMask &= Mask[i] < 0;
17553 :
17554 2235 : if (isUndefMask)
17555 8 : return DAG.getUNDEF(VT);
17556 :
17557 2227 : if (!SV0.getNode())
17558 0 : SV0 = DAG.getUNDEF(VT);
17559 2227 : if (!SV1.getNode())
17560 760 : SV1 = DAG.getUNDEF(VT);
17561 :
17562 : // Avoid introducing shuffles with illegal mask.
17563 4454 : if (!TLI.isShuffleMaskLegal(Mask, VT)) {
17564 : ShuffleVectorSDNode::commuteMask(Mask);
17565 :
17566 8 : if (!TLI.isShuffleMaskLegal(Mask, VT))
17567 4 : return SDValue();
17568 :
17569 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
17570 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
17571 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
17572 : std::swap(SV0, SV1);
17573 : }
17574 :
17575 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17576 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17577 : // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17578 4458 : return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
17579 : }
17580 :
17581 60730 : return SDValue();
17582 : }
17583 :
17584 23174 : SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
17585 23174 : SDValue InVal = N->getOperand(0);
17586 46348 : EVT VT = N->getValueType(0);
17587 :
17588 : // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
17589 : // with a VECTOR_SHUFFLE and possible truncate.
17590 23174 : if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17591 484 : SDValue InVec = InVal->getOperand(0);
17592 484 : SDValue EltNo = InVal->getOperand(1);
17593 484 : auto InVecT = InVec.getValueType();
17594 : if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
17595 968 : SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
17596 968 : int Elt = C0->getZExtValue();
17597 484 : NewMask[0] = Elt;
17598 : SDValue Val;
17599 : // If we have an implict truncate do truncate here as long as it's legal.
17600 : // if it's not legal, this should
17601 510 : if (VT.getScalarType() != InVal.getValueType() &&
17602 26 : InVal.getValueType().isScalarInteger() &&
17603 26 : isTypeLegal(VT.getScalarType())) {
17604 0 : Val =
17605 0 : DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
17606 0 : return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
17607 : }
17608 942 : if (VT.getScalarType() == InVecT.getScalarType() &&
17609 458 : VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
17610 942 : TLI.isShuffleMaskLegal(NewMask, VT)) {
17611 916 : Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
17612 916 : DAG.getUNDEF(InVecT), NewMask);
17613 : // If the initial vector is the correct size this shuffle is a
17614 : // valid result.
17615 458 : if (VT == InVecT)
17616 309 : return Val;
17617 : // If not we must truncate the vector.
17618 149 : if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
17619 149 : MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17620 298 : SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
17621 : EVT SubVT =
17622 149 : EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
17623 149 : VT.getVectorNumElements());
17624 298 : Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
17625 149 : ZeroIdx);
17626 149 : return Val;
17627 : }
17628 : }
17629 : }
17630 : }
17631 :
17632 22716 : return SDValue();
17633 : }
17634 :
17635 23652 : SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
17636 23652 : EVT VT = N->getValueType(0);
17637 23652 : SDValue N0 = N->getOperand(0);
17638 23652 : SDValue N1 = N->getOperand(1);
17639 23652 : SDValue N2 = N->getOperand(2);
17640 :
17641 : // If inserting an UNDEF, just return the original vector.
17642 23652 : if (N1.isUndef())
17643 55 : return N0;
17644 :
17645 : // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
17646 : // us to pull BITCASTs from input to output.
17647 23597 : if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
17648 4330 : if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
17649 288 : return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
17650 :
17651 : // If this is an insert of an extracted vector into an undef vector, we can
17652 : // just use the input to the extract.
17653 15954 : if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17654 25728 : N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
17655 191 : return N1.getOperand(0);
17656 :
17657 : // If we are inserting a bitcast value into an undef, with the same
17658 : // number of elements, just use the bitcast input of the extract.
17659 : // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
17660 : // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
17661 15763 : if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
17662 3433 : N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17663 335 : N1.getOperand(0).getOperand(1) == N2 &&
17664 332 : N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
17665 23262 : VT.getVectorNumElements() &&
17666 23265 : N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
17667 3 : VT.getSizeInBits()) {
17668 6 : return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
17669 : }
17670 :
17671 : // If both N1 and N2 are bitcast values on which insert_subvector
17672 : // would makes sense, pull the bitcast through.
17673 : // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
17674 : // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
17675 23259 : if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
17676 213 : SDValue CN0 = N0.getOperand(0);
17677 213 : SDValue CN1 = N1.getOperand(0);
17678 213 : EVT CN0VT = CN0.getValueType();
17679 213 : EVT CN1VT = CN1.getValueType();
17680 213 : if (CN0VT.isVector() && CN1VT.isVector() &&
17681 564 : CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
17682 : CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
17683 4 : SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
17684 4 : CN0.getValueType(), CN0, CN1, N2);
17685 4 : return DAG.getBitcast(VT, NewINSERT);
17686 : }
17687 : }
17688 :
17689 : // Combine INSERT_SUBVECTORs where we are inserting to the same index.
17690 : // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
17691 : // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
17692 0 : if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17693 23255 : N0.getOperand(1).getValueType() == N1.getValueType() &&
17694 4364 : N0.getOperand(2) == N2)
17695 46 : return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
17696 92 : N1, N2);
17697 :
17698 : if (!isa<ConstantSDNode>(N2))
17699 0 : return SDValue();
17700 :
17701 23209 : unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
17702 :
17703 : // Canonicalize insert_subvector dag nodes.
17704 : // Example:
17705 : // (insert_subvector (insert_subvector A, Idx0), Idx1)
17706 : // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
17707 4318 : if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
17708 27395 : N1.getValueType() == N0.getOperand(1).getValueType() &&
17709 : isa<ConstantSDNode>(N0.getOperand(2))) {
17710 4186 : unsigned OtherIdx = N0.getConstantOperandVal(2);
17711 4186 : if (InsIdx < OtherIdx) {
17712 : // Swap nodes.
17713 0 : SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
17714 0 : N0.getOperand(0), N1, N2);
17715 0 : AddToWorklist(NewOp.getNode());
17716 0 : return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
17717 0 : VT, NewOp, N0.getOperand(1), N0.getOperand(2));
17718 : }
17719 : }
17720 :
17721 : // If the input vector is a concatenation, and the insert replaces
17722 : // one of the pieces, we can optimize into a single concat_vectors.
17723 23209 : if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
17724 0 : N0.getOperand(0).getValueType() == N1.getValueType()) {
17725 0 : unsigned Factor = N1.getValueType().getVectorNumElements();
17726 :
17727 0 : SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
17728 0 : Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
17729 :
17730 0 : return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17731 : }
17732 :
17733 23209 : return SDValue();
17734 : }
17735 :
17736 0 : SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
17737 4584 : SDValue N0 = N->getOperand(0);
17738 :
17739 : // fold (fp_to_fp16 (fp16_to_fp op)) -> op
17740 4584 : if (N0->getOpcode() == ISD::FP16_TO_FP)
17741 1018 : return N0->getOperand(0);
17742 :
17743 0 : return SDValue();
17744 : }
17745 :
17746 0 : SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
17747 0 : SDValue N0 = N->getOperand(0);
17748 :
17749 : // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
17750 0 : if (N0->getOpcode() == ISD::AND) {
17751 0 : ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
17752 0 : if (AndConst && AndConst->getAPIntValue() == 0xffff) {
17753 0 : return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
17754 0 : N0.getOperand(0));
17755 : }
17756 : }
17757 :
17758 0 : return SDValue();
17759 : }
17760 :
17761 : /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
17762 : /// with the destination vector and a zero vector.
17763 : /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
17764 : /// vector_shuffle V, Zero, <0, 4, 2, 4>
17765 53297 : SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
17766 : assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
17767 :
17768 53297 : EVT VT = N->getValueType(0);
17769 53297 : SDValue LHS = N->getOperand(0);
17770 53297 : SDValue RHS = peekThroughBitcasts(N->getOperand(1));
17771 : SDLoc DL(N);
17772 :
17773 : // Make sure we're not running after operation legalization where it
17774 : // may have custom lowered the vector shuffles.
17775 53297 : if (LegalOperations)
17776 41311 : return SDValue();
17777 :
17778 23972 : if (RHS.getOpcode() != ISD::BUILD_VECTOR)
17779 6918 : return SDValue();
17780 :
17781 10136 : EVT RVT = RHS.getValueType();
17782 5068 : unsigned NumElts = RHS.getNumOperands();
17783 :
17784 : // Attempt to create a valid clear mask, splitting the mask into
17785 : // sub elements and checking to see if each is
17786 : // all zeros or all ones - suitable for shuffle masking.
17787 : auto BuildClearMask = [&](int Split) {
17788 : int NumSubElts = NumElts * Split;
17789 : int NumSubBits = RVT.getScalarSizeInBits() / Split;
17790 :
17791 : SmallVector<int, 8> Indices;
17792 : for (int i = 0; i != NumSubElts; ++i) {
17793 : int EltIdx = i / Split;
17794 : int SubIdx = i % Split;
17795 : SDValue Elt = RHS.getOperand(EltIdx);
17796 : if (Elt.isUndef()) {
17797 : Indices.push_back(-1);
17798 : continue;
17799 : }
17800 :
17801 : APInt Bits;
17802 : if (isa<ConstantSDNode>(Elt))
17803 : Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
17804 : else if (isa<ConstantFPSDNode>(Elt))
17805 : Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
17806 : else
17807 : return SDValue();
17808 :
17809 : // Extract the sub element from the constant bit mask.
17810 : if (DAG.getDataLayout().isBigEndian()) {
17811 : Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
17812 : } else {
17813 : Bits.lshrInPlace(SubIdx * NumSubBits);
17814 : }
17815 :
17816 : if (Split > 1)
17817 : Bits = Bits.trunc(NumSubBits);
17818 :
17819 : if (Bits.isAllOnesValue())
17820 : Indices.push_back(i);
17821 : else if (Bits == 0)
17822 : Indices.push_back(i + NumSubElts);
17823 : else
17824 : return SDValue();
17825 : }
17826 :
17827 : // Let's see if the target supports this vector_shuffle.
17828 : EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
17829 : EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
17830 : if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
17831 : return SDValue();
17832 :
17833 : SDValue Zero = DAG.getConstant(0, DL, ClearVT);
17834 : return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
17835 : DAG.getBitcast(ClearVT, LHS),
17836 : Zero, Indices));
17837 5068 : };
17838 :
17839 : // Determine maximum split level (byte level masking).
17840 : int MaxSplit = 1;
17841 5068 : if (RVT.getScalarSizeInBits() % 8 == 0)
17842 5056 : MaxSplit = RVT.getScalarSizeInBits() / 8;
17843 :
17844 22688 : for (int Split = 1; Split <= MaxSplit; ++Split)
17845 18709 : if (RVT.getScalarSizeInBits() % Split == 0)
17846 13296 : if (SDValue S = BuildClearMask(Split))
17847 1089 : return S;
17848 :
17849 3979 : return SDValue();
17850 : }
17851 :
17852 : /// Visit a binary vector operation, like ADD.
17853 799028 : SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
17854 : assert(N->getValueType(0).isVector() &&
17855 : "SimplifyVBinOp only works on vectors!");
17856 :
17857 799028 : SDValue LHS = N->getOperand(0);
17858 799028 : SDValue RHS = N->getOperand(1);
17859 799028 : SDValue Ops[] = {LHS, RHS};
17860 :
17861 : // See if we can constant fold the vector operation.
17862 799028 : if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
17863 2537783 : N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
17864 438 : return Fold;
17865 :
17866 : // Type legalization might introduce new shuffles in the DAG.
17867 : // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
17868 : // -> (shuffle (VBinOp (A, B)), Undef, Mask).
17869 310020 : if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
17870 2247 : isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
17871 799640 : LHS.getOperand(1).isUndef() &&
17872 938 : RHS.getOperand(1).isUndef()) {
17873 : ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
17874 : ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
17875 :
17876 934 : if (SVN0->getMask().equals(SVN1->getMask())) {
17877 50 : EVT VT = N->getValueType(0);
17878 25 : SDValue UndefVector = LHS.getOperand(1);
17879 50 : SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
17880 : LHS.getOperand(0), RHS.getOperand(0),
17881 25 : N->getFlags());
17882 : AddUsersToWorklist(N);
17883 25 : return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
17884 50 : SVN0->getMask());
17885 : }
17886 : }
17887 :
17888 798565 : return SDValue();
17889 : }
17890 :
17891 0 : SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
17892 : SDValue N2) {
17893 : assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
17894 :
17895 : SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
17896 0 : cast<CondCodeSDNode>(N0.getOperand(2))->get());
17897 :
17898 : // If we got a simplified select_cc node back from SimplifySelectCC, then
17899 : // break it down into a new SETCC node, and a new SELECT node, and then return
17900 : // the SELECT node, since we were called with a SELECT node.
17901 0 : if (SCC.getNode()) {
17902 : // Check to see if we got a select_cc back (to turn into setcc/select).
17903 : // Otherwise, just return whatever node we got back, like fabs.
17904 0 : if (SCC.getOpcode() == ISD::SELECT_CC) {
17905 0 : SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
17906 : N0.getValueType(),
17907 : SCC.getOperand(0), SCC.getOperand(1),
17908 0 : SCC.getOperand(4));
17909 0 : AddToWorklist(SETCC.getNode());
17910 0 : return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
17911 0 : SCC.getOperand(2), SCC.getOperand(3));
17912 : }
17913 :
17914 0 : return SCC;
17915 : }
17916 0 : return SDValue();
17917 : }
17918 :
17919 : /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
17920 : /// being selected between, see if we can simplify the select. Callers of this
17921 : /// should assume that TheSelect is deleted if this returns true. As such, they
17922 : /// should return the appropriate thing (e.g. the node) back to the top-level of
17923 : /// the DAG combiner loop to avoid it being looked at.
17924 0 : bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
17925 : SDValue RHS) {
17926 : // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17927 : // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
17928 0 : if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
17929 0 : if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
17930 : // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
17931 : SDValue Sqrt = RHS;
17932 : ISD::CondCode CC;
17933 : SDValue CmpLHS;
17934 : const ConstantFPSDNode *Zero = nullptr;
17935 :
17936 0 : if (TheSelect->getOpcode() == ISD::SELECT_CC) {
17937 0 : CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
17938 0 : CmpLHS = TheSelect->getOperand(0);
17939 0 : Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
17940 : } else {
17941 : // SELECT or VSELECT
17942 0 : SDValue Cmp = TheSelect->getOperand(0);
17943 0 : if (Cmp.getOpcode() == ISD::SETCC) {
17944 0 : CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
17945 0 : CmpLHS = Cmp.getOperand(0);
17946 0 : Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
17947 : }
17948 : }
17949 0 : if (Zero && Zero->isZero() &&
17950 0 : Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
17951 0 : CC == ISD::SETULT || CC == ISD::SETLT)) {
17952 : // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17953 0 : CombineTo(TheSelect, Sqrt);
17954 0 : return true;
17955 : }
17956 : }
17957 : }
17958 : // Cannot simplify select with vector condition
17959 0 : if (TheSelect->getOperand(0).getValueType().isVector()) return false;
17960 :
17961 : // If this is a select from two identical things, try to pull the operation
17962 : // through the select.
17963 0 : if (LHS.getOpcode() != RHS.getOpcode() ||
17964 0 : !LHS.hasOneUse() || !RHS.hasOneUse())
17965 0 : return false;
17966 :
17967 : // If this is a load and the token chain is identical, replace the select
17968 : // of two loads with a load through a select of the address to load from.
17969 : // This triggers in things like "select bool X, 10.0, 123.0" after the FP
17970 : // constants have been dropped into the constant pool.
17971 0 : if (LHS.getOpcode() == ISD::LOAD) {
17972 : LoadSDNode *LLD = cast<LoadSDNode>(LHS);
17973 : LoadSDNode *RLD = cast<LoadSDNode>(RHS);
17974 :
17975 : // Token chains must be identical.
17976 0 : if (LHS.getOperand(0) != RHS.getOperand(0) ||
17977 : // Do not let this transformation reduce the number of volatile loads.
17978 0 : LLD->isVolatile() || RLD->isVolatile() ||
17979 : // FIXME: If either is a pre/post inc/dec load,
17980 : // we'd need to split out the address adjustment.
17981 0 : LLD->isIndexed() || RLD->isIndexed() ||
17982 : // If this is an EXTLOAD, the VT's must match.
17983 0 : LLD->getMemoryVT() != RLD->getMemoryVT() ||
17984 : // If this is an EXTLOAD, the kind of extension must match.
17985 0 : (LLD->getExtensionType() != RLD->getExtensionType() &&
17986 : // The only exception is if one of the extensions is anyext.
17987 0 : LLD->getExtensionType() != ISD::EXTLOAD &&
17988 0 : RLD->getExtensionType() != ISD::EXTLOAD) ||
17989 : // FIXME: this discards src value information. This is
17990 : // over-conservative. It would be beneficial to be able to remember
17991 : // both potential memory locations. Since we are discarding
17992 : // src value info, don't do the transformation if the memory
17993 : // locations are not in the default address space.
17994 0 : LLD->getPointerInfo().getAddrSpace() != 0 ||
17995 0 : RLD->getPointerInfo().getAddrSpace() != 0 ||
17996 0 : !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
17997 : LLD->getBasePtr().getValueType()))
17998 0 : return false;
17999 :
18000 : // The loads must not depend on one another.
18001 0 : if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
18002 0 : return false;
18003 :
18004 : // Check that the select condition doesn't reach either load. If so,
18005 : // folding this will induce a cycle into the DAG. If not, this is safe to
18006 : // xform, so create a select of the addresses.
18007 :
18008 : SmallPtrSet<const SDNode *, 32> Visited;
18009 : SmallVector<const SDNode *, 16> Worklist;
18010 :
18011 : // Always fail if LLD and RLD are not independent. TheSelect is a
18012 : // predecessor to all Nodes in question so we need not search past it.
18013 :
18014 0 : Visited.insert(TheSelect);
18015 0 : Worklist.push_back(LLD);
18016 0 : Worklist.push_back(RLD);
18017 :
18018 0 : if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
18019 0 : SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
18020 0 : return false;
18021 :
18022 0 : SDValue Addr;
18023 0 : if (TheSelect->getOpcode() == ISD::SELECT) {
18024 : // We cannot do this optimization if any pair of {RLD, LLD} is a
18025 : // predecessor to {RLD, LLD, CondNode}. As we've already compared the
18026 : // Loads, we only need to check if CondNode is a successor to one of the
18027 : // loads. We can further avoid this if there's no use of their chain
18028 : // value.
18029 0 : SDNode *CondNode = TheSelect->getOperand(0).getNode();
18030 0 : Worklist.push_back(CondNode);
18031 :
18032 0 : if ((LLD->hasAnyUseOfValue(1) &&
18033 0 : SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18034 0 : (RLD->hasAnyUseOfValue(1) &&
18035 0 : SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18036 0 : return false;
18037 :
18038 0 : Addr = DAG.getSelect(SDLoc(TheSelect),
18039 : LLD->getBasePtr().getValueType(),
18040 0 : TheSelect->getOperand(0), LLD->getBasePtr(),
18041 0 : RLD->getBasePtr());
18042 : } else { // Otherwise SELECT_CC
18043 : // We cannot do this optimization if any pair of {RLD, LLD} is a
18044 : // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
18045 : // the Loads, we only need to check if CondLHS/CondRHS is a successor to
18046 : // one of the loads. We can further avoid this if there's no use of their
18047 : // chain value.
18048 :
18049 0 : SDNode *CondLHS = TheSelect->getOperand(0).getNode();
18050 0 : SDNode *CondRHS = TheSelect->getOperand(1).getNode();
18051 0 : Worklist.push_back(CondLHS);
18052 0 : Worklist.push_back(CondRHS);
18053 :
18054 0 : if ((LLD->hasAnyUseOfValue(1) &&
18055 0 : SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18056 0 : (RLD->hasAnyUseOfValue(1) &&
18057 0 : SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18058 0 : return false;
18059 :
18060 0 : Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
18061 : LLD->getBasePtr().getValueType(),
18062 : TheSelect->getOperand(0),
18063 : TheSelect->getOperand(1),
18064 : LLD->getBasePtr(), RLD->getBasePtr(),
18065 0 : TheSelect->getOperand(4));
18066 : }
18067 :
18068 : SDValue Load;
18069 : // It is safe to replace the two loads if they have different alignments,
18070 : // but the new load must be the minimum (most restrictive) alignment of the
18071 : // inputs.
18072 0 : unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
18073 0 : MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
18074 0 : if (!RLD->isInvariant())
18075 : MMOFlags &= ~MachineMemOperand::MOInvariant;
18076 0 : if (!RLD->isDereferenceable())
18077 : MMOFlags &= ~MachineMemOperand::MODereferenceable;
18078 0 : if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
18079 : // FIXME: Discards pointer and AA info.
18080 0 : Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
18081 : LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
18082 0 : MMOFlags);
18083 : } else {
18084 : // FIXME: Discards pointer and AA info.
18085 0 : Load = DAG.getExtLoad(
18086 : LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
18087 : : LLD->getExtensionType(),
18088 0 : SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
18089 0 : MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
18090 : }
18091 :
18092 : // Users of the select now use the result of the load.
18093 0 : CombineTo(TheSelect, Load);
18094 :
18095 : // Users of the old loads now use the new load's chain. We know the
18096 : // old-load value is dead now.
18097 : CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
18098 : CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
18099 0 : return true;
18100 : }
18101 :
18102 : return false;
18103 : }
18104 :
18105 : /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
18106 : /// bitwise 'and'.
18107 70497 : SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
18108 : SDValue N1, SDValue N2, SDValue N3,
18109 : ISD::CondCode CC) {
18110 : // If this is a select where the false operand is zero and the compare is a
18111 : // check of the sign bit, see if we can perform the "gzip trick":
18112 : // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
18113 : // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
18114 70497 : EVT XType = N0.getValueType();
18115 70497 : EVT AType = N2.getValueType();
18116 70497 : if (!isNullConstant(N3) || !XType.bitsGE(AType))
18117 33748 : return SDValue();
18118 :
18119 : // If the comparison is testing for a positive value, we have to invert
18120 : // the sign bit mask, so only do that transform if the target has a bitwise
18121 : // 'and not' instruction (the invert is free).
18122 36749 : if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
18123 : // (X > -1) ? A : 0
18124 : // (X > 0) ? X : 0 <-- This is canonical signed max.
18125 162 : if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
18126 148 : return SDValue();
18127 36587 : } else if (CC == ISD::SETLT) {
18128 : // (X < 0) ? A : 0
18129 : // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
18130 674 : if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
18131 299 : return SDValue();
18132 : } else {
18133 35913 : return SDValue();
18134 : }
18135 :
18136 : // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
18137 : // constant.
18138 389 : EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
18139 : auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18140 2156 : if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
18141 255 : unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
18142 255 : SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
18143 510 : SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
18144 255 : AddToWorklist(Shift.getNode());
18145 :
18146 255 : if (XType.bitsGT(AType)) {
18147 450 : Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18148 225 : AddToWorklist(Shift.getNode());
18149 : }
18150 :
18151 255 : if (CC == ISD::SETGT)
18152 5 : Shift = DAG.getNOT(DL, Shift, AType);
18153 :
18154 510 : return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18155 : }
18156 :
18157 134 : SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
18158 268 : SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
18159 134 : AddToWorklist(Shift.getNode());
18160 :
18161 134 : if (XType.bitsGT(AType)) {
18162 130 : Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18163 65 : AddToWorklist(Shift.getNode());
18164 : }
18165 :
18166 134 : if (CC == ISD::SETGT)
18167 9 : Shift = DAG.getNOT(DL, Shift, AType);
18168 :
18169 268 : return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18170 : }
18171 :
18172 : /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
18173 : /// where 'cond' is the comparison specified by CC.
18174 70586 : SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
18175 : SDValue N2, SDValue N3, ISD::CondCode CC,
18176 : bool NotExtCompare) {
18177 : // (x ? y : y) -> y.
18178 70586 : if (N2 == N3) return N2;
18179 :
18180 141172 : EVT VT = N2.getValueType();
18181 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
18182 : ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18183 :
18184 : // Determine if the condition we're dealing with is constant
18185 : SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
18186 70586 : N0, N1, CC, DL, false);
18187 70586 : if (SCC.getNode()) AddToWorklist(SCC.getNode());
18188 :
18189 : if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
18190 : // fold select_cc true, x, y -> x
18191 : // fold select_cc false, x, y -> y
18192 119 : return !SCCC->isNullValue() ? N2 : N3;
18193 : }
18194 :
18195 : // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
18196 : // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
18197 : // in it. This is a win when the constant is not otherwise available because
18198 : // it replaces two constant pool loads with one. We only do this if the FP
18199 : // type is known to be legal, because if it isn't, then we are before legalize
18200 : // types an we want the other legalization to happen first (e.g. to avoid
18201 : // messing with soft float) and if the ConstantFP is not legal, because if
18202 : // it is legal, we may not need to store the FP constant in a constant pool.
18203 : if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
18204 : if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
18205 1558 : if (TLI.isTypeLegal(N2.getValueType()) &&
18206 : (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
18207 461 : TargetLowering::Legal &&
18208 1475 : !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
18209 276 : !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
18210 : // If both constants have multiple uses, then we won't need to do an
18211 : // extra load, they are likely around in registers for other users.
18212 : (TV->hasOneUse() || FV->hasOneUse())) {
18213 : Constant *Elts[] = {
18214 43 : const_cast<ConstantFP*>(FV->getConstantFPValue()),
18215 43 : const_cast<ConstantFP*>(TV->getConstantFPValue())
18216 86 : };
18217 43 : Type *FPTy = Elts[0]->getType();
18218 43 : const DataLayout &TD = DAG.getDataLayout();
18219 :
18220 : // Create a ConstantArray of the two constants.
18221 43 : Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
18222 : SDValue CPIdx =
18223 86 : DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
18224 86 : TD.getPrefTypeAlignment(FPTy));
18225 43 : unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
18226 :
18227 : // Get the offsets to the 0 and 1 element of the array so that we can
18228 : // select between them.
18229 43 : SDValue Zero = DAG.getIntPtrConstant(0, DL);
18230 43 : unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
18231 43 : SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
18232 :
18233 43 : SDValue Cond = DAG.getSetCC(DL,
18234 : getSetCCResultType(N0.getValueType()),
18235 43 : N0, N1, CC);
18236 43 : AddToWorklist(Cond.getNode());
18237 43 : SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
18238 86 : Cond, One, Zero);
18239 43 : AddToWorklist(CstOffset.getNode());
18240 43 : CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
18241 43 : CstOffset);
18242 43 : AddToWorklist(CPIdx.getNode());
18243 43 : return DAG.getLoad(
18244 43 : TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
18245 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
18246 43 : Alignment);
18247 : }
18248 : }
18249 :
18250 70497 : if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
18251 389 : return V;
18252 :
18253 : // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
18254 : // where y is has a single bit set.
18255 : // A plaintext description would be, we can turn the SELECT_CC into an AND
18256 : // when the condition can be materialized as an all-ones register. Any
18257 : // single bit-test can be materialized as an all-ones register with
18258 : // shift-left and shift-right-arith.
18259 37882 : if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
18260 73349 : N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
18261 33 : SDValue AndLHS = N0->getOperand(0);
18262 : ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18263 66 : if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
18264 : // Shift the tested bit over the sign bit.
18265 : const APInt &AndMask = ConstAndRHS->getAPIntValue();
18266 : SDValue ShlAmt =
18267 30 : DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
18268 60 : getShiftAmountTy(AndLHS.getValueType()));
18269 54 : SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
18270 :
18271 : // Now arithmetic right shift it all the way over, so the result is either
18272 : // all-ones, or zero.
18273 : SDValue ShrAmt =
18274 30 : DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
18275 60 : getShiftAmountTy(Shl.getValueType()));
18276 54 : SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
18277 :
18278 60 : return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
18279 : }
18280 : }
18281 :
18282 : // fold select C, 16, 0 -> shl C, 4
18283 70078 : if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
18284 60952 : TLI.getBooleanContents(N0.getValueType()) ==
18285 : TargetLowering::ZeroOrOneBooleanContent) {
18286 :
18287 : // If the caller doesn't want us to simplify this into a zext of a compare,
18288 : // don't do it.
18289 57702 : if (NotExtCompare && N2C->isOne())
18290 28810 : return SDValue();
18291 :
18292 : // Get a SetCC of the condition
18293 : // NOTE: Don't create a SETCC if it's not legal on this target.
18294 82 : if (!LegalOperations ||
18295 : TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
18296 : SDValue Temp, SCC;
18297 : // cast from setcc result type to select result type
18298 63 : if (LegalTypes) {
18299 4 : SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
18300 2 : N0, N1, CC);
18301 6 : if (N2.getValueType().bitsLT(SCC.getValueType()))
18302 0 : Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
18303 0 : N2.getValueType());
18304 : else
18305 4 : Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
18306 4 : N2.getValueType(), SCC);
18307 : } else {
18308 64 : SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
18309 122 : Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
18310 122 : N2.getValueType(), SCC);
18311 : }
18312 :
18313 63 : AddToWorklist(SCC.getNode());
18314 63 : AddToWorklist(Temp.getNode());
18315 :
18316 126 : if (N2C->isOne())
18317 15 : return Temp;
18318 :
18319 : // shl setcc result by log2 n2c
18320 48 : return DAG.getNode(
18321 : ISD::SHL, DL, N2.getValueType(), Temp,
18322 48 : DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
18323 144 : getShiftAmountTy(Temp.getValueType())));
18324 : }
18325 : }
18326 :
18327 : // Check to see if this is an integer abs.
18328 : // select_cc setg[te] X, 0, X, -X ->
18329 : // select_cc setgt X, -1, X, -X ->
18330 : // select_cc setl[te] X, 0, -X, X ->
18331 : // select_cc setlt X, 1, -X, X ->
18332 : // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
18333 41205 : if (N1C) {
18334 : ConstantSDNode *SubC = nullptr;
18335 66701 : if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
18336 891 : (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
18337 25836 : N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
18338 : SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
18339 40347 : else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
18340 443 : (N1C->isOne() && CC == ISD::SETLT)) &&
18341 26772 : N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
18342 : SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18343 :
18344 24694 : EVT XType = N0.getValueType();
18345 24805 : if (SubC && SubC->isNullValue() && XType.isInteger()) {
18346 : SDLoc DL(N0);
18347 111 : SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
18348 : N0,
18349 111 : DAG.getConstant(XType.getSizeInBits() - 1, DL,
18350 111 : getShiftAmountTy(N0.getValueType())));
18351 111 : SDValue Add = DAG.getNode(ISD::ADD, DL,
18352 111 : XType, N0, Shift);
18353 111 : AddToWorklist(Shift.getNode());
18354 111 : AddToWorklist(Add.getNode());
18355 222 : return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
18356 : }
18357 : }
18358 :
18359 : // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18360 : // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18361 : // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18362 : // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18363 : // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18364 : // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18365 : // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18366 : // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18367 65677 : if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18368 15374 : SDValue ValueOnZero = N2;
18369 15374 : SDValue Count = N3;
18370 : // If the condition is NE instead of E, swap the operands.
18371 15374 : if (CC == ISD::SETNE)
18372 : std::swap(ValueOnZero, Count);
18373 : // Check if the value on zero is a constant equal to the bits in the type.
18374 : if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18375 5234 : if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18376 : // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18377 : // legal, combine to just cttz.
18378 161 : if ((Count.getOpcode() == ISD::CTTZ ||
18379 : Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18380 185 : N0 == Count.getOperand(0) &&
18381 22 : (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18382 44 : return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18383 : // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18384 : // legal, combine to just ctlz.
18385 139 : if ((Count.getOpcode() == ISD::CTLZ ||
18386 : Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18387 154 : N0 == Count.getOperand(0) &&
18388 15 : (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18389 30 : return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18390 : }
18391 : }
18392 : }
18393 :
18394 41057 : return SDValue();
18395 : }
18396 :
18397 : /// This is a stub for TargetLowering::SimplifySetCC.
18398 : SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18399 : ISD::CondCode Cond, const SDLoc &DL,
18400 : bool foldBooleans) {
18401 : TargetLowering::DAGCombinerInfo
18402 391408 : DagCombineInfo(DAG, Level, false, this);
18403 391408 : return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18404 : }
18405 :
18406 : /// Given an ISD::SDIV node expressing a divide by constant, return
18407 : /// a DAG expression to select that will generate the same value by multiplying
18408 : /// by a magic number.
18409 : /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18410 4025 : SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18411 : // when optimising for minimum size, we don't want to expand a div to a mul
18412 : // and a shift.
18413 4025 : if (DAG.getMachineFunction().getFunction().optForMinSize())
18414 34 : return SDValue();
18415 :
18416 : SmallVector<SDNode *, 8> Built;
18417 3991 : if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18418 9819 : for (SDNode *N : Built)
18419 5924 : AddToWorklist(N);
18420 3895 : return S;
18421 : }
18422 :
18423 96 : return SDValue();
18424 : }
18425 :
18426 : /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
18427 : /// DAG expression that will generate the same value by right shifting.
18428 595 : SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
18429 1190 : ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18430 595 : if (!C)
18431 112 : return SDValue();
18432 :
18433 : // Avoid division by zero.
18434 966 : if (C->isNullValue())
18435 0 : return SDValue();
18436 :
18437 : SmallVector<SDNode *, 8> Built;
18438 483 : if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
18439 89 : for (SDNode *N : Built)
18440 57 : AddToWorklist(N);
18441 32 : return S;
18442 : }
18443 :
18444 451 : return SDValue();
18445 : }
18446 :
18447 : /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
18448 : /// expression that will generate the same value by multiplying by a magic
18449 : /// number.
18450 : /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18451 957 : SDValue DAGCombiner::BuildUDIV(SDNode *N) {
18452 : // when optimising for minimum size, we don't want to expand a div to a mul
18453 : // and a shift.
18454 957 : if (DAG.getMachineFunction().getFunction().optForMinSize())
18455 32 : return SDValue();
18456 :
18457 : SmallVector<SDNode *, 8> Built;
18458 925 : if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
18459 4100 : for (SDNode *N : Built)
18460 3288 : AddToWorklist(N);
18461 812 : return S;
18462 : }
18463 :
18464 113 : return SDValue();
18465 : }
18466 :
18467 : /// Determines the LogBase2 value for a non-null input value using the
18468 : /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
18469 0 : SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
18470 0 : EVT VT = V.getValueType();
18471 : unsigned EltBits = VT.getScalarSizeInBits();
18472 0 : SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
18473 0 : SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
18474 0 : SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
18475 0 : return LogBase2;
18476 : }
18477 :
18478 : /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18479 : /// For the reciprocal, we need to find the zero of the function:
18480 : /// F(X) = A X - 1 [which has a zero at X = 1/A]
18481 : /// =>
18482 : /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
18483 : /// does not require additional intermediate precision]
18484 1147 : SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
18485 1147 : if (Level >= AfterLegalizeDAG)
18486 170 : return SDValue();
18487 :
18488 : // TODO: Handle half and/or extended types?
18489 977 : EVT VT = Op.getValueType();
18490 977 : if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18491 18 : return SDValue();
18492 :
18493 : // If estimates are explicitly disabled for this function, we're done.
18494 959 : MachineFunction &MF = DAG.getMachineFunction();
18495 959 : int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
18496 959 : if (Enabled == TLI.ReciprocalEstimate::Disabled)
18497 74 : return SDValue();
18498 :
18499 : // Estimates may be explicitly enabled for this type with a custom number of
18500 : // refinement steps.
18501 885 : int Iterations = TLI.getDivRefinementSteps(VT, MF);
18502 885 : if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
18503 640 : AddToWorklist(Est.getNode());
18504 :
18505 640 : if (Iterations) {
18506 276 : EVT VT = Op.getValueType();
18507 : SDLoc DL(Op);
18508 276 : SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18509 :
18510 : // Newton iterations: Est = Est + Est (1 - Arg * Est)
18511 652 : for (int i = 0; i < Iterations; ++i) {
18512 376 : SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
18513 376 : AddToWorklist(NewEst.getNode());
18514 :
18515 376 : NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
18516 376 : AddToWorklist(NewEst.getNode());
18517 :
18518 376 : NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18519 376 : AddToWorklist(NewEst.getNode());
18520 :
18521 376 : Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
18522 376 : AddToWorklist(Est.getNode());
18523 : }
18524 : }
18525 640 : return Est;
18526 : }
18527 :
18528 245 : return SDValue();
18529 : }
18530 :
18531 : /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18532 : /// For the reciprocal sqrt, we need to find the zero of the function:
18533 : /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18534 : /// =>
18535 : /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
18536 : /// As a result, we precompute A/2 prior to the iteration loop.
18537 21 : SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
18538 : unsigned Iterations,
18539 : SDNodeFlags Flags, bool Reciprocal) {
18540 21 : EVT VT = Arg.getValueType();
18541 : SDLoc DL(Arg);
18542 21 : SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
18543 :
18544 : // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
18545 : // this entire sequence requires only one FP constant.
18546 21 : SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
18547 21 : AddToWorklist(HalfArg.getNode());
18548 :
18549 21 : HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
18550 21 : AddToWorklist(HalfArg.getNode());
18551 :
18552 : // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
18553 52 : for (unsigned i = 0; i < Iterations; ++i) {
18554 31 : SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
18555 31 : AddToWorklist(NewEst.getNode());
18556 :
18557 31 : NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
18558 31 : AddToWorklist(NewEst.getNode());
18559 :
18560 31 : NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
18561 31 : AddToWorklist(NewEst.getNode());
18562 :
18563 31 : Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18564 31 : AddToWorklist(Est.getNode());
18565 : }
18566 :
18567 : // If non-reciprocal square root is requested, multiply the result by Arg.
18568 21 : if (!Reciprocal) {
18569 6 : Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
18570 6 : AddToWorklist(Est.getNode());
18571 : }
18572 :
18573 21 : return Est;
18574 : }
18575 :
18576 : /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18577 : /// For the reciprocal sqrt, we need to find the zero of the function:
18578 : /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18579 : /// =>
18580 : /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
18581 54 : SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
18582 : unsigned Iterations,
18583 : SDNodeFlags Flags, bool Reciprocal) {
18584 54 : EVT VT = Arg.getValueType();
18585 : SDLoc DL(Arg);
18586 54 : SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
18587 54 : SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
18588 :
18589 : // This routine must enter the loop below to work correctly
18590 : // when (Reciprocal == false).
18591 : assert(Iterations > 0);
18592 :
18593 : // Newton iterations for reciprocal square root:
18594 : // E = (E * -0.5) * ((A * E) * E + -3.0)
18595 110 : for (unsigned i = 0; i < Iterations; ++i) {
18596 56 : SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
18597 56 : AddToWorklist(AE.getNode());
18598 :
18599 56 : SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
18600 56 : AddToWorklist(AEE.getNode());
18601 :
18602 56 : SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
18603 56 : AddToWorklist(RHS.getNode());
18604 :
18605 : // When calculating a square root at the last iteration build:
18606 : // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
18607 : // (notice a common subexpression)
18608 56 : SDValue LHS;
18609 56 : if (Reciprocal || (i + 1) < Iterations) {
18610 : // RSQRT: LHS = (E * -0.5)
18611 26 : LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
18612 : } else {
18613 : // SQRT: LHS = (A * E) * -0.5
18614 30 : LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
18615 : }
18616 56 : AddToWorklist(LHS.getNode());
18617 :
18618 56 : Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
18619 56 : AddToWorklist(Est.getNode());
18620 : }
18621 :
18622 54 : return Est;
18623 : }
18624 :
18625 : /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
18626 : /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
18627 : /// Op can be zero.
18628 368 : SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
18629 : bool Reciprocal) {
18630 368 : if (Level >= AfterLegalizeDAG)
18631 111 : return SDValue();
18632 :
18633 : // TODO: Handle half and/or extended types?
18634 257 : EVT VT = Op.getValueType();
18635 257 : if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18636 6 : return SDValue();
18637 :
18638 : // If estimates are explicitly disabled for this function, we're done.
18639 251 : MachineFunction &MF = DAG.getMachineFunction();
18640 251 : int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
18641 251 : if (Enabled == TLI.ReciprocalEstimate::Disabled)
18642 43 : return SDValue();
18643 :
18644 : // Estimates may be explicitly enabled for this type with a custom number of
18645 : // refinement steps.
18646 208 : int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
18647 :
18648 208 : bool UseOneConstNR = false;
18649 208 : if (SDValue Est =
18650 208 : TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
18651 208 : Reciprocal)) {
18652 115 : AddToWorklist(Est.getNode());
18653 :
18654 115 : if (Iterations) {
18655 75 : Est = UseOneConstNR
18656 75 : ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
18657 54 : : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
18658 :
18659 75 : if (!Reciprocal) {
18660 : // The estimate is now completely wrong if the input was exactly 0.0 or
18661 : // possibly a denormal. Force the answer to 0.0 for those cases.
18662 36 : EVT VT = Op.getValueType();
18663 : SDLoc DL(Op);
18664 36 : EVT CCVT = getSetCCResultType(VT);
18665 36 : ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
18666 36 : const Function &F = DAG.getMachineFunction().getFunction();
18667 36 : Attribute Denorms = F.getFnAttribute("denormal-fp-math");
18668 66 : if (Denorms.getValueAsString().equals("ieee")) {
18669 : // fabs(X) < SmallestNormal ? 0.0 : Est
18670 6 : const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
18671 : APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
18672 6 : SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
18673 6 : SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18674 12 : SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
18675 6 : SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
18676 12 : Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
18677 6 : AddToWorklist(Fabs.getNode());
18678 6 : AddToWorklist(IsDenorm.getNode());
18679 6 : AddToWorklist(Est.getNode());
18680 : } else {
18681 : // X == 0.0 ? 0.0 : Est
18682 30 : SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18683 30 : SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
18684 60 : Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
18685 30 : AddToWorklist(IsZero.getNode());
18686 30 : AddToWorklist(Est.getNode());
18687 : }
18688 : }
18689 : }
18690 115 : return Est;
18691 : }
18692 :
18693 93 : return SDValue();
18694 : }
18695 :
18696 : SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18697 130 : return buildSqrtEstimateImpl(Op, Flags, true);
18698 : }
18699 :
18700 : SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18701 238 : return buildSqrtEstimateImpl(Op, Flags, false);
18702 : }
18703 :
18704 : /// Return true if there is any possibility that the two addresses overlap.
18705 0 : bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
18706 : // If they are the same then they must be aliases.
18707 0 : if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
18708 :
18709 : // If they are both volatile then they cannot be reordered.
18710 0 : if (Op0->isVolatile() && Op1->isVolatile()) return true;
18711 :
18712 : // If one operation reads from invariant memory, and the other may store, they
18713 : // cannot alias. These should really be checking the equivalent of mayWrite,
18714 : // but it only matters for memory nodes other than load /store.
18715 0 : if (Op0->isInvariant() && Op1->writeMem())
18716 0 : return false;
18717 :
18718 0 : if (Op1->isInvariant() && Op0->writeMem())
18719 0 : return false;
18720 :
18721 0 : unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
18722 0 : unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
18723 :
18724 : // Check for BaseIndexOffset matching.
18725 0 : BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
18726 0 : BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
18727 : int64_t PtrDiff;
18728 0 : if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
18729 0 : if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
18730 0 : return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
18731 :
18732 : // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
18733 : // able to calculate their relative offset if at least one arises
18734 : // from an alloca. However, these allocas cannot overlap and we
18735 : // can infer there is no alias.
18736 0 : if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
18737 0 : if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
18738 0 : MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18739 : // If the base are the same frame index but the we couldn't find a
18740 : // constant offset, (indices are different) be conservative.
18741 0 : if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
18742 0 : !MFI.isFixedObjectIndex(B->getIndex())))
18743 0 : return false;
18744 : }
18745 :
18746 0 : bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
18747 0 : bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
18748 0 : bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
18749 0 : bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
18750 0 : bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
18751 0 : bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
18752 :
18753 : // If of mismatched base types or checkable indices we can check
18754 : // they do not alias.
18755 0 : if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
18756 0 : (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
18757 0 : (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
18758 0 : return false;
18759 : }
18760 :
18761 : // If we know required SrcValue1 and SrcValue2 have relatively large
18762 : // alignment compared to the size and offset of the access, we may be able
18763 : // to prove they do not alias. This check is conservative for now to catch
18764 : // cases created by splitting vector types.
18765 0 : int64_t SrcValOffset0 = Op0->getSrcValueOffset();
18766 0 : int64_t SrcValOffset1 = Op1->getSrcValueOffset();
18767 : unsigned OrigAlignment0 = Op0->getOriginalAlignment();
18768 : unsigned OrigAlignment1 = Op1->getOriginalAlignment();
18769 0 : if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
18770 0 : NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
18771 0 : int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
18772 0 : int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
18773 :
18774 : // There is no overlap between these relatively aligned accesses of
18775 : // similar size. Return no alias.
18776 0 : if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
18777 0 : (OffAlign1 + NumBytes1) <= OffAlign0)
18778 0 : return false;
18779 : }
18780 :
18781 0 : bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
18782 0 : ? CombinerGlobalAA
18783 0 : : DAG.getSubtarget().useAA();
18784 : #ifndef NDEBUG
18785 : if (CombinerAAOnlyFunc.getNumOccurrences() &&
18786 : CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
18787 : UseAA = false;
18788 : #endif
18789 :
18790 0 : if (UseAA && AA &&
18791 0 : Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
18792 : // Use alias analysis information.
18793 0 : int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
18794 0 : int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
18795 0 : int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
18796 : AliasResult AAResult =
18797 0 : AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
18798 0 : UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
18799 0 : MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
18800 0 : UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
18801 0 : if (AAResult == NoAlias)
18802 0 : return false;
18803 : }
18804 :
18805 : // Otherwise we have to assume they alias.
18806 : return true;
18807 : }
18808 :
18809 : /// Walk up chain skipping non-aliasing memory nodes,
18810 : /// looking for aliasing nodes and adding them to the Aliases vector.
18811 6560055 : void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
18812 : SmallVectorImpl<SDValue> &Aliases) {
18813 : SmallVector<SDValue, 8> Chains; // List of chains to visit.
18814 : SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
18815 :
18816 : // Get alias information for node.
18817 6560055 : bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
18818 :
18819 : // Starting off.
18820 6560055 : Chains.push_back(OriginalChain);
18821 : unsigned Depth = 0;
18822 :
18823 : // Look at each chain and determine if it is an alias. If so, add it to the
18824 : // aliases list. If not, then continue up the chain looking for the next
18825 : // candidate.
18826 21458263 : while (!Chains.empty()) {
18827 15085585 : SDValue Chain = Chains.pop_back_val();
18828 :
18829 : // For TokenFactor nodes, look at each operand and only continue up the
18830 : // chain until we reach the depth limit.
18831 : //
18832 : // FIXME: The depth check could be made to return the last non-aliasing
18833 : // chain we found before we hit a tokenfactor rather than the original
18834 : // chain.
18835 15085585 : if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
18836 : Aliases.clear();
18837 187377 : Aliases.push_back(OriginalChain);
18838 187377 : return;
18839 : }
18840 :
18841 : // Don't bother if we've been before.
18842 14898208 : if (!Visited.insert(Chain.getNode()).second)
18843 387763 : continue;
18844 :
18845 29020890 : switch (Chain.getOpcode()) {
18846 : case ISD::EntryToken:
18847 : // Entry token is ideal chain operand, but handled in FindBetterChain.
18848 : break;
18849 :
18850 8801760 : case ISD::LOAD:
18851 : case ISD::STORE: {
18852 : // Get alias information for Chain.
18853 8801760 : bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
18854 4290841 : !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
18855 :
18856 : // If chain is alias then stop here.
18857 17216372 : if (!(IsLoad && IsOpLoad) &&
18858 8414612 : isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
18859 4764900 : Aliases.push_back(Chain);
18860 : } else {
18861 : // Look further up the chain.
18862 8073720 : Chains.push_back(Chain.getOperand(0));
18863 4036860 : ++Depth;
18864 : }
18865 : break;
18866 : }
18867 :
18868 2278834 : case ISD::TokenFactor:
18869 : // We have to check each of the operands of the token factor for "small"
18870 : // token factors, so we queue them up. Adding the operands to the queue
18871 : // (stack) in reverse order maintains the original order and increases the
18872 : // likelihood that getNode will find a matching token factor (CSE.)
18873 2278834 : if (Chain.getNumOperands() > 16) {
18874 61863 : Aliases.push_back(Chain);
18875 61863 : break;
18876 : }
18877 8834928 : for (unsigned n = Chain.getNumOperands(); n;)
18878 15452885 : Chains.push_back(Chain.getOperand(--n));
18879 2216971 : ++Depth;
18880 2216971 : break;
18881 :
18882 45016 : case ISD::CopyFromReg:
18883 : // Forward past CopyFromReg.
18884 45016 : Chains.push_back(Chain.getOperand(0));
18885 45016 : ++Depth;
18886 45016 : break;
18887 :
18888 1113133 : default:
18889 : // For all other instructions we will just have to take what we can get.
18890 1113133 : Aliases.push_back(Chain);
18891 1113133 : break;
18892 : }
18893 : }
18894 : }
18895 :
18896 : /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
18897 : /// (aliasing node.)
18898 10404506 : SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
18899 10404506 : if (OptLevel == CodeGenOpt::None)
18900 3844451 : return OldChain;
18901 :
18902 : // Ops for replacing token factor.
18903 : SmallVector<SDValue, 8> Aliases;
18904 :
18905 : // Accumulate all the aliases to this node.
18906 6560055 : GatherAllAliases(N, OldChain, Aliases);
18907 :
18908 : // If no operands then chain to entry token.
18909 13120110 : if (Aliases.size() == 0)
18910 2011188 : return DAG.getEntryNode();
18911 :
18912 : // If a single operand then chain to it. We don't need to revisit it.
18913 4548867 : if (Aliases.size() == 1)
18914 3812138 : return Aliases[0];
18915 :
18916 : // Construct a custom tailored token factor.
18917 2047551 : return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
18918 : }
18919 :
18920 : // This function tries to collect a bunch of potentially interesting
18921 : // nodes to improve the chains of, all at once. This might seem
18922 : // redundant, as this function gets called when visiting every store
18923 : // node, so why not let the work be done on each store as it's visited?
18924 : //
18925 : // I believe this is mainly important because MergeConsecutiveStores
18926 : // is unable to deal with merging stores of different sizes, so unless
18927 : // we improve the chains of all the potential candidates up-front
18928 : // before running MergeConsecutiveStores, it might only see some of
18929 : // the nodes that will eventually be candidates, and then not be able
18930 : // to go from a partially-merged state to the desired final
18931 : // fully-merged state.
18932 7832510 : bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
18933 7832510 : if (OptLevel == CodeGenOpt::None)
18934 : return false;
18935 :
18936 : // This holds the base pointer, index, and the offset in bytes from the base
18937 : // pointer.
18938 3619287 : BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18939 :
18940 : // We must have a base and an offset.
18941 3619287 : if (!BasePtr.getBase().getNode())
18942 : return false;
18943 :
18944 : // Do not handle stores to undef base pointers.
18945 3619287 : if (BasePtr.getBase().isUndef())
18946 : return false;
18947 :
18948 : SmallVector<StoreSDNode *, 8> ChainedStores;
18949 3607780 : ChainedStores.push_back(St);
18950 :
18951 : // Walk up the chain and look for nodes with offsets from the same
18952 : // base pointer. Stop when reaching an instruction with a different kind
18953 : // or instruction which has a different base pointer.
18954 3607780 : StoreSDNode *Index = St;
18955 7226774 : while (Index) {
18956 : // If the chain has more than one use, then we can't reorder the mem ops.
18957 4055512 : if (Index != St && !SDValue(Index, 0)->hasOneUse())
18958 : break;
18959 :
18960 3897033 : if (Index->isVolatile() || Index->isIndexed())
18961 : break;
18962 :
18963 : // Find the base pointer and offset for this memory node.
18964 3860421 : BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
18965 :
18966 : // Check that the base pointer is the same as the original one.
18967 3860421 : if (!BasePtr.equalBaseIndex(Ptr, DAG))
18968 : break;
18969 :
18970 : // Walk up the chain to find the next store node, ignoring any
18971 : // intermediate loads. Any other kind of node will halt the loop.
18972 3622697 : SDNode *NextInChain = Index->getChain().getNode();
18973 : while (true) {
18974 5523810 : if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
18975 : // We found a store node. Use it for the next iteration.
18976 451435 : if (STn->isVolatile() || STn->isIndexed()) {
18977 : Index = nullptr;
18978 3622697 : break;
18979 : }
18980 447732 : ChainedStores.push_back(STn);
18981 447732 : Index = STn;
18982 447732 : break;
18983 : } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
18984 1901113 : NextInChain = Ldn->getChain().getNode();
18985 1901113 : continue;
18986 : } else {
18987 : Index = nullptr;
18988 : break;
18989 : }
18990 : }// end while
18991 : }
18992 :
18993 : // At this point, ChainedStores lists all of the Store nodes
18994 : // reachable by iterating up through chain nodes matching the above
18995 : // conditions. For each such store identified, try to find an
18996 : // earlier chain to attach the store to which won't violate the
18997 : // required ordering.
18998 : bool MadeChangeToSt = false;
18999 : SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
19000 :
19001 7663292 : for (StoreSDNode *ChainedStore : ChainedStores) {
19002 4055512 : SDValue Chain = ChainedStore->getChain();
19003 4055512 : SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
19004 :
19005 : if (Chain != BetterChain) {
19006 420021 : if (ChainedStore == St)
19007 : MadeChangeToSt = true;
19008 420021 : BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
19009 : }
19010 : }
19011 :
19012 : // Do all replacements after finding the replacements to make to avoid making
19013 : // the chains more complicated by introducing new TokenFactors.
19014 4027801 : for (auto Replacement : BetterChains)
19015 420021 : replaceStoreChain(Replacement.first, Replacement.second);
19016 :
19017 : return MadeChangeToSt;
19018 : }
19019 :
19020 : /// This is the entry point for the file.
19021 2767992 : void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
19022 : CodeGenOpt::Level OptLevel) {
19023 : /// This is the main entry point to this class.
19024 2767992 : DAGCombiner(*this, AA, OptLevel).Run(Level);
19025 2767992 : }
|