LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
143 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::Hidden, cl::init(false),
145 cl::desc("DAG combiner force override the narrowing profitable check when "
146 "reducing the width of load/op/store sequences"));
147
149 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
150 cl::desc("DAG combiner enable load/<replace bytes>/store with "
151 "a narrower store"));
152
153static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
154 cl::init(false),
155 cl::desc("Disable the DAG combiner"));
156
157namespace {
158
159 class DAGCombiner {
160 SelectionDAG &DAG;
161 const TargetLowering &TLI;
162 const SelectionDAGTargetInfo *STI;
164 CodeGenOptLevel OptLevel;
165 bool LegalDAG = false;
166 bool LegalOperations = false;
167 bool LegalTypes = false;
168 bool ForCodeSize;
169 bool DisableGenericCombines;
170
171 /// Worklist of all of the nodes that need to be simplified.
172 ///
173 /// This must behave as a stack -- new nodes to process are pushed onto the
174 /// back and when processing we pop off of the back.
175 ///
176 /// The worklist will not contain duplicates but may contain null entries
177 /// due to nodes being deleted from the underlying DAG. For fast lookup and
178 /// deduplication, the index of the node in this vector is stored in the
179 /// node in SDNode::CombinerWorklistIndex.
181
182 /// This records all nodes attempted to be added to the worklist since we
183 /// considered a new worklist entry. As we keep do not add duplicate nodes
184 /// in the worklist, this is different from the tail of the worklist.
186
187 /// Map from candidate StoreNode to the pair of RootNode and count.
188 /// The count is used to track how many times we have seen the StoreNode
189 /// with the same RootNode bail out in dependence check. If we have seen
190 /// the bail out for the same pair many times over a limit, we won't
191 /// consider the StoreNode with the same RootNode as store merging
192 /// candidate again.
194
195 // BatchAA - Used for DAG load/store alias analysis.
196 BatchAAResults *BatchAA;
197
198 /// This caches all chains that have already been processed in
199 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
200 /// stores candidates.
201 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
202
203 /// When an instruction is simplified, add all users of the instruction to
204 /// the work lists because they might get more simplified now.
205 void AddUsersToWorklist(SDNode *N) {
206 for (SDNode *Node : N->users())
207 AddToWorklist(Node);
208 }
209
210 /// Convenient shorthand to add a node and all of its user to the worklist.
211 void AddToWorklistWithUsers(SDNode *N) {
212 AddUsersToWorklist(N);
213 AddToWorklist(N);
214 }
215
216 // Prune potentially dangling nodes. This is called after
217 // any visit to a node, but should also be called during a visit after any
218 // failed combine which may have created a DAG node.
219 void clearAddedDanglingWorklistEntries() {
220 // Check any nodes added to the worklist to see if they are prunable.
221 while (!PruningList.empty()) {
222 auto *N = PruningList.pop_back_val();
223 if (N->use_empty())
224 recursivelyDeleteUnusedNodes(N);
225 }
226 }
227
228 SDNode *getNextWorklistEntry() {
229 // Before we do any work, remove nodes that are not in use.
230 clearAddedDanglingWorklistEntries();
231 SDNode *N = nullptr;
232 // The Worklist holds the SDNodes in order, but it may contain null
233 // entries.
234 while (!N && !Worklist.empty()) {
235 N = Worklist.pop_back_val();
236 }
237
238 if (N) {
239 assert(N->getCombinerWorklistIndex() >= 0 &&
240 "Found a worklist entry without a corresponding map entry!");
241 // Set to -2 to indicate that we combined the node.
242 N->setCombinerWorklistIndex(-2);
243 }
244 return N;
245 }
246
247 /// Call the node-specific routine that folds each particular type of node.
248 SDValue visit(SDNode *N);
249
250 public:
251 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
252 : DAG(D), TLI(D.getTargetLoweringInfo()),
253 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
254 BatchAA(BatchAA) {
255 ForCodeSize = DAG.shouldOptForSize();
256 DisableGenericCombines =
257 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
277 bool SkipIfCombinedBefore = false) {
278 assert(N->getOpcode() != ISD::DELETED_NODE &&
279 "Deleted Node added to Worklist");
280
281 // Skip handle nodes as they can't usefully be combined and confuse the
282 // zero-use deletion strategy.
283 if (N->getOpcode() == ISD::HANDLENODE)
284 return;
285
286 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
287 return;
288
289 if (IsCandidateForPruning)
290 ConsiderForPruning(N);
291
292 if (N->getCombinerWorklistIndex() < 0) {
293 N->setCombinerWorklistIndex(Worklist.size());
294 Worklist.push_back(N);
295 }
296 }
297
298 /// Remove all instances of N from the worklist.
299 void removeFromWorklist(SDNode *N) {
300 PruningList.remove(N);
301 StoreRootCountMap.erase(N);
302
303 int WorklistIndex = N->getCombinerWorklistIndex();
304 // If not in the worklist, the index might be -1 or -2 (was combined
305 // before). As the node gets deleted anyway, there's no need to update
306 // the index.
307 if (WorklistIndex < 0)
308 return; // Not in the worklist.
309
310 // Null out the entry rather than erasing it to avoid a linear operation.
311 Worklist[WorklistIndex] = nullptr;
312 N->setCombinerWorklistIndex(-1);
313 }
314
315 void deleteAndRecombine(SDNode *N);
316 bool recursivelyDeleteUnusedNodes(SDNode *N);
317
318 /// Replaces all uses of the results of one DAG node with new values.
319 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
320 bool AddTo = true);
321
322 /// Replaces all uses of the results of one DAG node with new values.
323 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
324 return CombineTo(N, &Res, 1, AddTo);
325 }
326
327 /// Replaces all uses of the results of one DAG node with new values.
328 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
329 bool AddTo = true) {
330 SDValue To[] = { Res0, Res1 };
331 return CombineTo(N, To, 2, AddTo);
332 }
333
334 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
335 bool AddTo = true) {
336 return CombineTo(N, To->data(), To->size(), AddTo);
337 }
338
339 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
340
341 private:
342 unsigned MaximumLegalStoreInBits;
343
344 /// Check the specified integer node value to see if it can be simplified or
345 /// if things it uses can be simplified by bit propagation.
346 /// If so, return true.
347 bool SimplifyDemandedBits(SDValue Op) {
348 unsigned BitWidth = Op.getScalarValueSizeInBits();
349 APInt DemandedBits = APInt::getAllOnes(BitWidth);
350 return SimplifyDemandedBits(Op, DemandedBits);
351 }
352
353 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
354 EVT VT = Op.getValueType();
355 APInt DemandedElts = VT.isFixedLengthVector()
357 : APInt(1, 1);
358 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
359 }
360
361 /// Check the specified vector node value to see if it can be simplified or
362 /// if things it uses can be simplified as it only uses some of the
363 /// elements. If so, return true.
364 bool SimplifyDemandedVectorElts(SDValue Op) {
365 // TODO: For now just pretend it cannot be simplified.
366 if (Op.getValueType().isScalableVector())
367 return false;
368
369 unsigned NumElts = Op.getValueType().getVectorNumElements();
370 APInt DemandedElts = APInt::getAllOnes(NumElts);
371 return SimplifyDemandedVectorElts(Op, DemandedElts);
372 }
373
374 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
375 const APInt &DemandedElts,
376 bool AssumeSingleUse = false);
377 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
378 bool AssumeSingleUse = false);
379
380 bool CombineToPreIndexedLoadStore(SDNode *N);
381 bool CombineToPostIndexedLoadStore(SDNode *N);
382 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
383 bool SliceUpLoad(SDNode *N);
384
385 // Looks up the chain to find a unique (unaliased) store feeding the passed
386 // load. If no such store is found, returns a nullptr.
387 // Note: This will look past a CALLSEQ_START if the load is chained to it so
388 // so that it can find stack stores for byval params.
389 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
390 // Scalars have size 0 to distinguish from singleton vectors.
391 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
392 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
393 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
394
395 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
396 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
397 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
398 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue PromoteIntBinOp(SDValue Op);
400 SDValue PromoteIntShiftOp(SDValue Op);
401 SDValue PromoteExtend(SDValue Op);
402 bool PromoteLoad(SDValue Op);
403
404 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407
408 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
409 SDValue RHS, SDValue True, SDValue False,
410 ISD::CondCode CC);
411
412 /// Call the node-specific routine that knows how to fold each
413 /// particular type of node. If that doesn't do anything, try the
414 /// target-specific DAG combines.
415 SDValue combine(SDNode *N);
416
417 // Visitation implementation - Implement dag node combining for different
418 // node types. The semantics are as follows:
419 // Return Value:
420 // SDValue.getNode() == 0 - No change was made
421 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
422 // otherwise - N should be replaced by the returned Operand.
423 //
424 SDValue visitTokenFactor(SDNode *N);
425 SDValue visitMERGE_VALUES(SDNode *N);
426 SDValue visitADD(SDNode *N);
427 SDValue visitADDLike(SDNode *N);
428 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
429 SDNode *LocReference);
430 SDValue visitPTRADD(SDNode *N);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFMULADD(SDNode *N);
513 SDValue visitFDIV(SDNode *N);
514 SDValue visitFREM(SDNode *N);
515 SDValue visitFSQRT(SDNode *N);
516 SDValue visitFCOPYSIGN(SDNode *N);
517 SDValue visitFPOW(SDNode *N);
518 SDValue visitFCANONICALIZE(SDNode *N);
519 SDValue visitSINT_TO_FP(SDNode *N);
520 SDValue visitUINT_TO_FP(SDNode *N);
521 SDValue visitFP_TO_SINT(SDNode *N);
522 SDValue visitFP_TO_UINT(SDNode *N);
523 SDValue visitXROUND(SDNode *N);
524 SDValue visitFP_ROUND(SDNode *N);
525 SDValue visitFP_EXTEND(SDNode *N);
526 SDValue visitFNEG(SDNode *N);
527 SDValue visitFABS(SDNode *N);
528 SDValue visitFCEIL(SDNode *N);
529 SDValue visitFTRUNC(SDNode *N);
530 SDValue visitFFREXP(SDNode *N);
531 SDValue visitFFLOOR(SDNode *N);
532 SDValue visitFMinMax(SDNode *N);
533 SDValue visitBRCOND(SDNode *N);
534 SDValue visitBR_CC(SDNode *N);
535 SDValue visitLOAD(SDNode *N);
536
537 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
538 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
539 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
540
541 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
542
543 SDValue visitSTORE(SDNode *N);
544 SDValue visitATOMIC_STORE(SDNode *N);
545 SDValue visitLIFETIME_END(SDNode *N);
546 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
547 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
548 SDValue visitBUILD_VECTOR(SDNode *N);
549 SDValue visitCONCAT_VECTORS(SDNode *N);
550 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
551 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
552 SDValue visitVECTOR_SHUFFLE(SDNode *N);
553 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
554 SDValue visitINSERT_SUBVECTOR(SDNode *N);
555 SDValue visitVECTOR_COMPRESS(SDNode *N);
556 SDValue visitMLOAD(SDNode *N);
557 SDValue visitMSTORE(SDNode *N);
558 SDValue visitMGATHER(SDNode *N);
559 SDValue visitMSCATTER(SDNode *N);
560 SDValue visitMHISTOGRAM(SDNode *N);
561 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
562 SDValue visitVPGATHER(SDNode *N);
563 SDValue visitVPSCATTER(SDNode *N);
564 SDValue visitVP_STRIDED_LOAD(SDNode *N);
565 SDValue visitVP_STRIDED_STORE(SDNode *N);
566 SDValue visitFP_TO_FP16(SDNode *N);
567 SDValue visitFP16_TO_FP(SDNode *N);
568 SDValue visitFP_TO_BF16(SDNode *N);
569 SDValue visitBF16_TO_FP(SDNode *N);
570 SDValue visitVECREDUCE(SDNode *N);
571 SDValue visitVPOp(SDNode *N);
572 SDValue visitGET_FPENV_MEM(SDNode *N);
573 SDValue visitSET_FPENV_MEM(SDNode *N);
574
575 template <class MatchContextClass>
576 SDValue visitFADDForFMACombine(SDNode *N);
577 template <class MatchContextClass>
578 SDValue visitFSUBForFMACombine(SDNode *N);
579 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
580
581 SDValue XformToShuffleWithZero(SDNode *N);
582 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
583 const SDLoc &DL,
584 SDNode *N,
585 SDValue N0,
586 SDValue N1);
587 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
588 SDValue N1, SDNodeFlags Flags);
589 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
590 SDValue N1, SDNodeFlags Flags);
591 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
592 EVT VT, SDValue N0, SDValue N1,
593 SDNodeFlags Flags = SDNodeFlags());
594
595 SDValue visitShiftByConstant(SDNode *N);
596
597 SDValue foldSelectOfConstants(SDNode *N);
598 SDValue foldVSelectOfConstants(SDNode *N);
599 SDValue foldBinOpIntoSelect(SDNode *BO);
600 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
601 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
602 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
603 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
604 SDValue N2, SDValue N3, ISD::CondCode CC,
605 bool NotExtCompare = false);
606 SDValue convertSelectOfFPConstantsToLoadOffset(
607 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
608 ISD::CondCode CC);
609 SDValue foldSignChangeInBitcast(SDNode *N);
610 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
611 SDValue N2, SDValue N3, ISD::CondCode CC);
612 SDValue foldSelectOfBinops(SDNode *N);
613 SDValue foldSextSetcc(SDNode *N);
614 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
615 const SDLoc &DL);
616 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
617 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
618 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
619 SDValue False, ISD::CondCode CC, const SDLoc &DL);
620 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
621 SDValue False, ISD::CondCode CC, const SDLoc &DL);
622 SDValue unfoldMaskedMerge(SDNode *N);
623 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
624 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
625 const SDLoc &DL, bool foldBooleans);
626 SDValue rebuildSetCC(SDValue N);
627
628 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
629 SDValue &CC, bool MatchStrict = false) const;
630 bool isOneUseSetCC(SDValue N) const;
631
632 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
633 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
634
635 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
636 unsigned HiOp);
637 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
638 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
639 const TargetLowering &TLI);
640 SDValue foldPartialReduceMLAMulOp(SDNode *N);
641 SDValue foldPartialReduceAdd(SDNode *N);
642
643 SDValue CombineExtLoad(SDNode *N);
644 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
645 SDValue combineRepeatedFPDivisors(SDNode *N);
646 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
647 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
648 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
650 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
651 SDValue BuildSDIV(SDNode *N);
652 SDValue BuildSDIVPow2(SDNode *N);
653 SDValue BuildUDIV(SDNode *N);
654 SDValue BuildSREMPow2(SDNode *N);
655 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
656 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
657 bool KnownNeverZero = false,
658 bool InexpensiveOnly = false,
659 std::optional<EVT> OutVT = std::nullopt);
660 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
661 SDValue buildRsqrtEstimate(SDValue Op);
662 SDValue buildSqrtEstimate(SDValue Op);
663 SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
664 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
665 bool Reciprocal);
666 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
667 bool Reciprocal);
668 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
669 bool DemandHighBits = true);
670 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
671 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
672 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
673 bool HasPos, unsigned PosOpcode,
674 unsigned NegOpcode, const SDLoc &DL);
675 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
676 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
677 bool HasPos, unsigned PosOpcode,
678 unsigned NegOpcode, const SDLoc &DL);
679 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
680 bool FromAdd);
681 SDValue MatchLoadCombine(SDNode *N);
682 SDValue mergeTruncStores(StoreSDNode *N);
683 SDValue reduceLoadWidth(SDNode *N);
684 SDValue ReduceLoadOpStoreWidth(SDNode *N);
685 SDValue splitMergedValStore(StoreSDNode *ST);
686 SDValue TransformFPLoadStorePair(SDNode *N);
687 SDValue convertBuildVecZextToZext(SDNode *N);
688 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
689 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
690 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
691 SDValue reduceBuildVecToShuffle(SDNode *N);
692 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
693 ArrayRef<int> VectorMask, SDValue VecIn1,
694 SDValue VecIn2, unsigned LeftIdx,
695 bool DidSplitVec);
696 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
697
698 /// Walk up chain skipping non-aliasing memory nodes,
699 /// looking for aliasing nodes and adding them to the Aliases vector.
700 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
701 SmallVectorImpl<SDValue> &Aliases);
702
703 /// Return true if there is any possibility that the two addresses overlap.
704 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
705
706 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
707 /// chain (aliasing node.)
708 SDValue FindBetterChain(SDNode *N, SDValue Chain);
709
710 /// Try to replace a store and any possibly adjacent stores on
711 /// consecutive chains with better chains. Return true only if St is
712 /// replaced.
713 ///
714 /// Notice that other chains may still be replaced even if the function
715 /// returns false.
716 bool findBetterNeighborChains(StoreSDNode *St);
717
718 // Helper for findBetterNeighborChains. Walk up store chain add additional
719 // chained stores that do not overlap and can be parallelized.
720 bool parallelizeChainedStores(StoreSDNode *St);
721
722 /// Holds a pointer to an LSBaseSDNode as well as information on where it
723 /// is located in a sequence of memory operations connected by a chain.
724 struct MemOpLink {
725 // Ptr to the mem node.
726 LSBaseSDNode *MemNode;
727
728 // Offset from the base ptr.
729 int64_t OffsetFromBase;
730
731 MemOpLink(LSBaseSDNode *N, int64_t Offset)
732 : MemNode(N), OffsetFromBase(Offset) {}
733 };
734
735 // Classify the origin of a stored value.
736 enum class StoreSource { Unknown, Constant, Extract, Load };
737 StoreSource getStoreSource(SDValue StoreVal) {
738 switch (StoreVal.getOpcode()) {
739 case ISD::Constant:
740 case ISD::ConstantFP:
741 return StoreSource::Constant;
745 return StoreSource::Constant;
746 return StoreSource::Unknown;
749 return StoreSource::Extract;
750 case ISD::LOAD:
751 return StoreSource::Load;
752 default:
753 return StoreSource::Unknown;
754 }
755 }
756
757 /// This is a helper function for visitMUL to check the profitability
758 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
759 /// MulNode is the original multiply, AddNode is (add x, c1),
760 /// and ConstNode is c2.
761 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
762 SDValue ConstNode);
763
764 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
765 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
766 /// the type of the loaded value to be extended.
767 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
768 EVT LoadResultTy, EVT &ExtVT);
769
770 /// Helper function to calculate whether the given Load/Store can have its
771 /// width reduced to ExtVT.
772 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
773 EVT &MemVT, unsigned ShAmt = 0);
774
775 /// Used by BackwardsPropagateMask to find suitable loads.
776 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
777 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
778 ConstantSDNode *Mask, SDNode *&NodeToMask);
779 /// Attempt to propagate a given AND node back to load leaves so that they
780 /// can be combined into narrow loads.
781 bool BackwardsPropagateMask(SDNode *N);
782
783 /// Helper function for mergeConsecutiveStores which merges the component
784 /// store chains.
785 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
786 unsigned NumStores);
787
788 /// Helper function for mergeConsecutiveStores which checks if all the store
789 /// nodes have the same underlying object. We can still reuse the first
790 /// store's pointer info if all the stores are from the same object.
791 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
792
793 /// This is a helper function for mergeConsecutiveStores. When the source
794 /// elements of the consecutive stores are all constants or all extracted
795 /// vector elements, try to merge them into one larger store introducing
796 /// bitcasts if necessary. \return True if a merged store was created.
797 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
798 EVT MemVT, unsigned NumStores,
799 bool IsConstantSrc, bool UseVector,
800 bool UseTrunc);
801
802 /// This is a helper function for mergeConsecutiveStores. Stores that
803 /// potentially may be merged with St are placed in StoreNodes. On success,
804 /// returns a chain predecessor to all store candidates.
805 SDNode *getStoreMergeCandidates(StoreSDNode *St,
806 SmallVectorImpl<MemOpLink> &StoreNodes);
807
808 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
809 /// have indirect dependency through their operands. RootNode is the
810 /// predecessor to all stores calculated by getStoreMergeCandidates and is
811 /// used to prune the dependency check. \return True if safe to merge.
812 bool checkMergeStoreCandidatesForDependencies(
813 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
814 SDNode *RootNode);
815
816 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
817 /// chain has a call in it. \return True if a call is found.
818 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
819
820 /// This is a helper function for mergeConsecutiveStores. Given a list of
821 /// store candidates, find the first N that are consecutive in memory.
822 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
823 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
824 int64_t ElementSizeBytes) const;
825
826 /// This is a helper function for mergeConsecutiveStores. It is used for
827 /// store chains that are composed entirely of constant values.
828 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
829 unsigned NumConsecutiveStores,
830 EVT MemVT, SDNode *Root, bool AllowVectors);
831
832 /// This is a helper function for mergeConsecutiveStores. It is used for
833 /// store chains that are composed entirely of extracted vector elements.
834 /// When extracting multiple vector elements, try to store them in one
835 /// vector store rather than a sequence of scalar stores.
836 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
837 unsigned NumConsecutiveStores, EVT MemVT,
838 SDNode *Root);
839
840 /// This is a helper function for mergeConsecutiveStores. It is used for
841 /// store chains that are composed entirely of loaded values.
842 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
843 unsigned NumConsecutiveStores, EVT MemVT,
844 SDNode *Root, bool AllowVectors,
845 bool IsNonTemporalStore, bool IsNonTemporalLoad);
846
847 /// Merge consecutive store operations into a wide store.
848 /// This optimization uses wide integers or vectors when possible.
849 /// \return true if stores were merged.
850 bool mergeConsecutiveStores(StoreSDNode *St);
851
852 /// Try to transform a truncation where C is a constant:
853 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
854 ///
855 /// \p N needs to be a truncation and its first operand an AND. Other
856 /// requirements are checked by the function (e.g. that trunc is
857 /// single-use) and if missed an empty SDValue is returned.
858 SDValue distributeTruncateThroughAnd(SDNode *N);
859
860 /// Helper function to determine whether the target supports operation
861 /// given by \p Opcode for type \p VT, that is, whether the operation
862 /// is legal or custom before legalizing operations, and whether is
863 /// legal (but not custom) after legalization.
864 bool hasOperation(unsigned Opcode, EVT VT) {
865 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
866 }
867
868 bool hasUMin(EVT VT) const {
869 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
870 return (LK.first == TargetLoweringBase::TypeLegal ||
872 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
873 }
874
875 public:
876 /// Runs the dag combiner on all nodes in the work list
877 void Run(CombineLevel AtLevel);
878
879 SelectionDAG &getDAG() const { return DAG; }
880
881 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
882 EVT getShiftAmountTy(EVT LHSTy) {
883 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
884 }
885
886 /// This method returns true if we are running before type legalization or
887 /// if the specified VT is legal.
888 bool isTypeLegal(const EVT &VT) {
889 if (!LegalTypes) return true;
890 return TLI.isTypeLegal(VT);
891 }
892
893 /// Convenience wrapper around TargetLowering::getSetCCResultType
894 EVT getSetCCResultType(EVT VT) const {
895 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
896 }
897
898 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
899 SDValue OrigLoad, SDValue ExtLoad,
900 ISD::NodeType ExtType);
901 };
902
903/// This class is a DAGUpdateListener that removes any deleted
904/// nodes from the worklist.
905class WorklistRemover : public SelectionDAG::DAGUpdateListener {
906 DAGCombiner &DC;
907
908public:
909 explicit WorklistRemover(DAGCombiner &dc)
910 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
911
912 void NodeDeleted(SDNode *N, SDNode *E) override {
913 DC.removeFromWorklist(N);
914 }
915};
916
917class WorklistInserter : public SelectionDAG::DAGUpdateListener {
918 DAGCombiner &DC;
919
920public:
921 explicit WorklistInserter(DAGCombiner &dc)
922 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
923
924 // FIXME: Ideally we could add N to the worklist, but this causes exponential
925 // compile time costs in large DAGs, e.g. Halide.
926 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
927};
928
929} // end anonymous namespace
930
931//===----------------------------------------------------------------------===//
932// TargetLowering::DAGCombinerInfo implementation
933//===----------------------------------------------------------------------===//
934
936 ((DAGCombiner*)DC)->AddToWorklist(N);
937}
938
940CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
941 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
942}
943
945CombineTo(SDNode *N, SDValue Res, bool AddTo) {
946 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
947}
948
950CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
951 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
952}
953
956 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
957}
958
961 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
962}
963
964//===----------------------------------------------------------------------===//
965// Helper Functions
966//===----------------------------------------------------------------------===//
967
968void DAGCombiner::deleteAndRecombine(SDNode *N) {
969 removeFromWorklist(N);
970
971 // If the operands of this node are only used by the node, they will now be
972 // dead. Make sure to re-visit them and recursively delete dead nodes.
973 for (const SDValue &Op : N->ops())
974 // For an operand generating multiple values, one of the values may
975 // become dead allowing further simplification (e.g. split index
976 // arithmetic from an indexed load).
977 if (Op->hasOneUse() || Op->getNumValues() > 1)
978 AddToWorklist(Op.getNode());
979
980 DAG.DeleteNode(N);
981}
982
983// APInts must be the same size for most operations, this helper
984// function zero extends the shorter of the pair so that they match.
985// We provide an Offset so that we can create bitwidths that won't overflow.
986static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
987 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
988 LHS = LHS.zext(Bits);
989 RHS = RHS.zext(Bits);
990}
991
992// Return true if this node is a setcc, or is a select_cc
993// that selects between the target values used for true and false, making it
994// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
995// the appropriate nodes based on the type of node we are checking. This
996// simplifies life a bit for the callers.
997bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
998 SDValue &CC, bool MatchStrict) const {
999 if (N.getOpcode() == ISD::SETCC) {
1000 LHS = N.getOperand(0);
1001 RHS = N.getOperand(1);
1002 CC = N.getOperand(2);
1003 return true;
1004 }
1005
1006 if (MatchStrict &&
1007 (N.getOpcode() == ISD::STRICT_FSETCC ||
1008 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1009 LHS = N.getOperand(1);
1010 RHS = N.getOperand(2);
1011 CC = N.getOperand(3);
1012 return true;
1013 }
1014
1015 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1016 !TLI.isConstFalseVal(N.getOperand(3)))
1017 return false;
1018
1019 if (TLI.getBooleanContents(N.getValueType()) ==
1021 return false;
1022
1023 LHS = N.getOperand(0);
1024 RHS = N.getOperand(1);
1025 CC = N.getOperand(4);
1026 return true;
1027}
1028
1029/// Return true if this is a SetCC-equivalent operation with only one use.
1030/// If this is true, it allows the users to invert the operation for free when
1031/// it is profitable to do so.
1032bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1033 SDValue N0, N1, N2;
1034 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1035 return true;
1036 return false;
1037}
1038
1040 if (!ScalarTy.isSimple())
1041 return false;
1042
1043 uint64_t MaskForTy = 0ULL;
1044 switch (ScalarTy.getSimpleVT().SimpleTy) {
1045 case MVT::i8:
1046 MaskForTy = 0xFFULL;
1047 break;
1048 case MVT::i16:
1049 MaskForTy = 0xFFFFULL;
1050 break;
1051 case MVT::i32:
1052 MaskForTy = 0xFFFFFFFFULL;
1053 break;
1054 default:
1055 return false;
1056 break;
1057 }
1058
1059 APInt Val;
1060 if (ISD::isConstantSplatVector(N, Val))
1061 return Val.getLimitedValue() == MaskForTy;
1062
1063 return false;
1064}
1065
1066// Determines if it is a constant integer or a splat/build vector of constant
1067// integers (and undefs).
1068// Do not permit build vector implicit truncation unless AllowTruncation is set.
1069static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1070 bool AllowTruncation = false) {
1072 return !(Const->isOpaque() && NoOpaques);
1073 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1074 return false;
1075 unsigned BitWidth = N.getScalarValueSizeInBits();
1076 for (const SDValue &Op : N->op_values()) {
1077 if (Op.isUndef())
1078 continue;
1080 if (!Const || (Const->isOpaque() && NoOpaques))
1081 return false;
1082 // When AllowTruncation is true, allow constants that have been promoted
1083 // during type legalization as long as the value fits in the target type.
1084 if ((AllowTruncation &&
1085 Const->getAPIntValue().getActiveBits() > BitWidth) ||
1086 (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
1087 return false;
1088 }
1089 return true;
1090}
1091
1092// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1093// undef's.
1094static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1095 if (V.getOpcode() != ISD::BUILD_VECTOR)
1096 return false;
1097 return isConstantOrConstantVector(V, NoOpaques) ||
1099}
1100
1101// Determine if this an indexed load with an opaque target constant index.
1102static bool canSplitIdx(LoadSDNode *LD) {
1103 return MaySplitLoadIndex &&
1104 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1105 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1106}
1107
1108bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1109 const SDLoc &DL,
1110 SDNode *N,
1111 SDValue N0,
1112 SDValue N1) {
1113 // Currently this only tries to ensure we don't undo the GEP splits done by
1114 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1115 // we check if the following transformation would be problematic:
1116 // (load/store (add, (add, x, offset1), offset2)) ->
1117 // (load/store (add, x, offset1+offset2)).
1118
1119 // (load/store (add, (add, x, y), offset2)) ->
1120 // (load/store (add, (add, x, offset2), y)).
1121
1122 if (!N0.isAnyAdd())
1123 return false;
1124
1125 // Check for vscale addressing modes.
1126 // (load/store (add/sub (add x, y), vscale))
1127 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1128 // (load/store (add/sub (add x, y), (mul vscale, C)))
1129 if ((N1.getOpcode() == ISD::VSCALE ||
1130 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1131 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1133 N1.getValueType().getFixedSizeInBits() <= 64) {
1134 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1135 ? N1.getConstantOperandVal(0)
1136 : (N1.getOperand(0).getConstantOperandVal(0) *
1137 (N1.getOpcode() == ISD::SHL
1138 ? (1LL << N1.getConstantOperandVal(1))
1139 : N1.getConstantOperandVal(1)));
1140 if (Opc == ISD::SUB)
1141 ScalableOffset = -ScalableOffset;
1142 if (all_of(N->users(), [&](SDNode *Node) {
1143 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1144 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1145 TargetLoweringBase::AddrMode AM;
1146 AM.HasBaseReg = true;
1147 AM.ScalableOffset = ScalableOffset;
1148 EVT VT = LoadStore->getMemoryVT();
1149 unsigned AS = LoadStore->getAddressSpace();
1150 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1151 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1152 AS);
1153 }
1154 return false;
1155 }))
1156 return true;
1157 }
1158
1159 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1160 return false;
1161
1162 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1163 if (!C2)
1164 return false;
1165
1166 const APInt &C2APIntVal = C2->getAPIntValue();
1167 if (C2APIntVal.getSignificantBits() > 64)
1168 return false;
1169
1170 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1171 if (N0.hasOneUse())
1172 return false;
1173
1174 const APInt &C1APIntVal = C1->getAPIntValue();
1175 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1176 if (CombinedValueIntVal.getSignificantBits() > 64)
1177 return false;
1178 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1179
1180 for (SDNode *Node : N->users()) {
1181 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1182 // Is x[offset2] already not a legal addressing mode? If so then
1183 // reassociating the constants breaks nothing (we test offset2 because
1184 // that's the one we hope to fold into the load or store).
1185 TargetLoweringBase::AddrMode AM;
1186 AM.HasBaseReg = true;
1187 AM.BaseOffs = C2APIntVal.getSExtValue();
1188 EVT VT = LoadStore->getMemoryVT();
1189 unsigned AS = LoadStore->getAddressSpace();
1190 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1191 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1192 continue;
1193
1194 // Would x[offset1+offset2] still be a legal addressing mode?
1195 AM.BaseOffs = CombinedValue;
1196 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1197 return true;
1198 }
1199 }
1200 } else {
1201 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1202 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1203 return false;
1204
1205 for (SDNode *Node : N->users()) {
1206 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1207 if (!LoadStore)
1208 return false;
1209
1210 // Is x[offset2] a legal addressing mode? If so then
1211 // reassociating the constants breaks address pattern
1212 TargetLoweringBase::AddrMode AM;
1213 AM.HasBaseReg = true;
1214 AM.BaseOffs = C2APIntVal.getSExtValue();
1215 EVT VT = LoadStore->getMemoryVT();
1216 unsigned AS = LoadStore->getAddressSpace();
1217 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1218 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1219 return false;
1220 }
1221 return true;
1222 }
1223
1224 return false;
1225}
1226
1227/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1228/// \p N0 is the same kind of operation as \p Opc.
1229SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1230 SDValue N0, SDValue N1,
1231 SDNodeFlags Flags) {
1232 EVT VT = N0.getValueType();
1233
1234 if (N0.getOpcode() != Opc)
1235 return SDValue();
1236
1237 SDValue N00 = N0.getOperand(0);
1238 SDValue N01 = N0.getOperand(1);
1239
1241 SDNodeFlags NewFlags;
1242 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1243 Flags.hasNoUnsignedWrap())
1244 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1245
1247 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1248 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1249 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1250 N0->getFlags().hasDisjoint());
1251 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1252 }
1253 return SDValue();
1254 }
1255 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1256 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1257 // iff (op x, c1) has one use
1258 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1259 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1260 }
1261 }
1262
1263 // Check for repeated operand logic simplifications.
1264 if (Opc == ISD::AND || Opc == ISD::OR) {
1265 // (N00 & N01) & N00 --> N00 & N01
1266 // (N00 & N01) & N01 --> N00 & N01
1267 // (N00 | N01) | N00 --> N00 | N01
1268 // (N00 | N01) | N01 --> N00 | N01
1269 if (N1 == N00 || N1 == N01)
1270 return N0;
1271 }
1272 if (Opc == ISD::XOR) {
1273 // (N00 ^ N01) ^ N00 --> N01
1274 if (N1 == N00)
1275 return N01;
1276 // (N00 ^ N01) ^ N01 --> N00
1277 if (N1 == N01)
1278 return N00;
1279 }
1280
1281 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1282 if (N1 != N01) {
1283 // Reassociate if (op N00, N1) already exist
1284 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1285 // if Op (Op N00, N1), N01 already exist
1286 // we need to stop reassciate to avoid dead loop
1287 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1288 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1289 }
1290 }
1291
1292 if (N1 != N00) {
1293 // Reassociate if (op N01, N1) already exist
1294 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1295 // if Op (Op N01, N1), N00 already exist
1296 // we need to stop reassciate to avoid dead loop
1297 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1298 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1299 }
1300 }
1301
1302 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1303 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1304 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1305 // comparisons with the same predicate. This enables optimizations as the
1306 // following one:
1307 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1308 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1309 if (Opc == ISD::AND || Opc == ISD::OR) {
1310 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1311 N01->getOpcode() == ISD::SETCC) {
1312 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1313 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1314 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1315 if (CC1 == CC00 && CC1 != CC01) {
1316 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1317 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1318 }
1319 if (CC1 == CC01 && CC1 != CC00) {
1320 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1321 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1322 }
1323 }
1324 }
1325 }
1326
1327 return SDValue();
1328}
1329
1330/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1331/// same kind of operation as \p Opc.
1332SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1333 SDValue N1, SDNodeFlags Flags) {
1334 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1335
1336 // Floating-point reassociation is not allowed without loose FP math.
1337 if (N0.getValueType().isFloatingPoint() ||
1339 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1340 return SDValue();
1341
1342 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1343 return Combined;
1344 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1345 return Combined;
1346 return SDValue();
1347}
1348
1349// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1350// Note that we only expect Flags to be passed from FP operations. For integer
1351// operations they need to be dropped.
1352SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1353 const SDLoc &DL, EVT VT, SDValue N0,
1354 SDValue N1, SDNodeFlags Flags) {
1355 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1356 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1357 N0->hasOneUse() && N1->hasOneUse() &&
1359 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1360 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1361 return DAG.getNode(RedOpc, DL, VT,
1362 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1363 N0.getOperand(0), N1.getOperand(0)));
1364 }
1365
1366 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1367 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1368 // single node.
1369 SDValue A, B, C, D, RedA, RedB;
1370 if (sd_match(N0, m_OneUse(m_c_BinOp(
1371 Opc,
1372 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1373 m_Value(RedA)),
1374 m_Value(B)))) &&
1376 Opc,
1377 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1378 m_Value(RedB)),
1379 m_Value(D)))) &&
1380 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1381 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1382 A.getValueType() == C.getValueType() &&
1383 hasOperation(Opc, A.getValueType()) &&
1384 TLI.shouldReassociateReduction(RedOpc, VT)) {
1385 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1386 (!N0->getFlags().hasAllowReassociation() ||
1388 !RedA->getFlags().hasAllowReassociation() ||
1389 !RedB->getFlags().hasAllowReassociation()))
1390 return SDValue();
1391 SelectionDAG::FlagInserter FlagsInserter(
1392 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1393 RedB->getFlags());
1394 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1395 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1396 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1397 return DAG.getNode(Opc, DL, VT, Red, Op2);
1398 }
1399 return SDValue();
1400}
1401
1402SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1403 bool AddTo) {
1404 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1405 ++NodesCombined;
1406 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1407 To[0].dump(&DAG);
1408 dbgs() << " and " << NumTo - 1 << " other values\n");
1409 for (unsigned i = 0, e = NumTo; i != e; ++i)
1410 assert((!To[i].getNode() ||
1411 N->getValueType(i) == To[i].getValueType()) &&
1412 "Cannot combine value to value of different type!");
1413
1414 WorklistRemover DeadNodes(*this);
1415 DAG.ReplaceAllUsesWith(N, To);
1416 if (AddTo) {
1417 // Push the new nodes and any users onto the worklist
1418 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1419 if (To[i].getNode())
1420 AddToWorklistWithUsers(To[i].getNode());
1421 }
1422 }
1423
1424 // Finally, if the node is now dead, remove it from the graph. The node
1425 // may not be dead if the replacement process recursively simplified to
1426 // something else needing this node.
1427 if (N->use_empty())
1428 deleteAndRecombine(N);
1429 return SDValue(N, 0);
1430}
1431
1432void DAGCombiner::
1433CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1434 // Replace the old value with the new one.
1435 ++NodesCombined;
1436 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1437 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1438
1439 // Replace all uses.
1440 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1441
1442 // Push the new node and any (possibly new) users onto the worklist.
1443 AddToWorklistWithUsers(TLO.New.getNode());
1444
1445 // Finally, if the node is now dead, remove it from the graph.
1446 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1447}
1448
1449/// Check the specified integer node value to see if it can be simplified or if
1450/// things it uses can be simplified by bit propagation. If so, return true.
1451bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1452 const APInt &DemandedElts,
1453 bool AssumeSingleUse) {
1454 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1455 KnownBits Known;
1456 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1457 AssumeSingleUse))
1458 return false;
1459
1460 // Revisit the node.
1461 AddToWorklist(Op.getNode());
1462
1463 CommitTargetLoweringOpt(TLO);
1464 return true;
1465}
1466
1467/// Check the specified vector node value to see if it can be simplified or
1468/// if things it uses can be simplified as it only uses some of the elements.
1469/// If so, return true.
1470bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1471 const APInt &DemandedElts,
1472 bool AssumeSingleUse) {
1473 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1474 APInt KnownUndef, KnownZero;
1475 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1476 TLO, 0, AssumeSingleUse))
1477 return false;
1478
1479 // Revisit the node.
1480 AddToWorklist(Op.getNode());
1481
1482 CommitTargetLoweringOpt(TLO);
1483 return true;
1484}
1485
1486void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1487 SDLoc DL(Load);
1488 EVT VT = Load->getValueType(0);
1489 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1490
1491 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1492 Trunc.dump(&DAG); dbgs() << '\n');
1493
1494 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1495 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1496
1497 AddToWorklist(Trunc.getNode());
1498 recursivelyDeleteUnusedNodes(Load);
1499}
1500
1501SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1502 Replace = false;
1503 SDLoc DL(Op);
1504 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1505 LoadSDNode *LD = cast<LoadSDNode>(Op);
1506 EVT MemVT = LD->getMemoryVT();
1508 : LD->getExtensionType();
1509 Replace = true;
1510 return DAG.getExtLoad(ExtType, DL, PVT,
1511 LD->getChain(), LD->getBasePtr(),
1512 MemVT, LD->getMemOperand());
1513 }
1514
1515 unsigned Opc = Op.getOpcode();
1516 switch (Opc) {
1517 default: break;
1518 case ISD::AssertSext:
1519 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1520 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1521 break;
1522 case ISD::AssertZext:
1523 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1524 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1525 break;
1526 case ISD::Constant: {
1527 unsigned ExtOpc =
1528 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1529 return DAG.getNode(ExtOpc, DL, PVT, Op);
1530 }
1531 }
1532
1533 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1534 return SDValue();
1535 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1536}
1537
1538SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1540 return SDValue();
1541 EVT OldVT = Op.getValueType();
1542 SDLoc DL(Op);
1543 bool Replace = false;
1544 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1545 if (!NewOp.getNode())
1546 return SDValue();
1547 AddToWorklist(NewOp.getNode());
1548
1549 if (Replace)
1550 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1551 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1552 DAG.getValueType(OldVT));
1553}
1554
1555SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1556 EVT OldVT = Op.getValueType();
1557 SDLoc DL(Op);
1558 bool Replace = false;
1559 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1560 if (!NewOp.getNode())
1561 return SDValue();
1562 AddToWorklist(NewOp.getNode());
1563
1564 if (Replace)
1565 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1566 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1567}
1568
1569/// Promote the specified integer binary operation if the target indicates it is
1570/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1571/// i32 since i16 instructions are longer.
1572SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1573 if (!LegalOperations)
1574 return SDValue();
1575
1576 EVT VT = Op.getValueType();
1577 if (VT.isVector() || !VT.isInteger())
1578 return SDValue();
1579
1580 // If operation type is 'undesirable', e.g. i16 on x86, consider
1581 // promoting it.
1582 unsigned Opc = Op.getOpcode();
1583 if (TLI.isTypeDesirableForOp(Opc, VT))
1584 return SDValue();
1585
1586 EVT PVT = VT;
1587 // Consult target whether it is a good idea to promote this operation and
1588 // what's the right type to promote it to.
1589 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1590 assert(PVT != VT && "Don't know what type to promote to!");
1591
1592 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1593
1594 bool Replace0 = false;
1595 SDValue N0 = Op.getOperand(0);
1596 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1597
1598 bool Replace1 = false;
1599 SDValue N1 = Op.getOperand(1);
1600 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1601 SDLoc DL(Op);
1602
1603 SDValue RV =
1604 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1605
1606 // We are always replacing N0/N1's use in N and only need additional
1607 // replacements if there are additional uses.
1608 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1609 // (SDValue) here because the node may reference multiple values
1610 // (for example, the chain value of a load node).
1611 Replace0 &= !N0->hasOneUse();
1612 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1613
1614 // Combine Op here so it is preserved past replacements.
1615 CombineTo(Op.getNode(), RV);
1616
1617 // If operands have a use ordering, make sure we deal with
1618 // predecessor first.
1619 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1620 std::swap(N0, N1);
1621 std::swap(NN0, NN1);
1622 }
1623
1624 if (Replace0) {
1625 AddToWorklist(NN0.getNode());
1626 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1627 }
1628 if (Replace1) {
1629 AddToWorklist(NN1.getNode());
1630 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1631 }
1632 return Op;
1633 }
1634 return SDValue();
1635}
1636
1637/// Promote the specified integer shift operation if the target indicates it is
1638/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1639/// i32 since i16 instructions are longer.
1640SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1641 if (!LegalOperations)
1642 return SDValue();
1643
1644 EVT VT = Op.getValueType();
1645 if (VT.isVector() || !VT.isInteger())
1646 return SDValue();
1647
1648 // If operation type is 'undesirable', e.g. i16 on x86, consider
1649 // promoting it.
1650 unsigned Opc = Op.getOpcode();
1651 if (TLI.isTypeDesirableForOp(Opc, VT))
1652 return SDValue();
1653
1654 EVT PVT = VT;
1655 // Consult target whether it is a good idea to promote this operation and
1656 // what's the right type to promote it to.
1657 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1658 assert(PVT != VT && "Don't know what type to promote to!");
1659
1660 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1661
1662 bool Replace = false;
1663 SDValue N0 = Op.getOperand(0);
1664 if (Opc == ISD::SRA)
1665 N0 = SExtPromoteOperand(N0, PVT);
1666 else if (Opc == ISD::SRL)
1667 N0 = ZExtPromoteOperand(N0, PVT);
1668 else
1669 N0 = PromoteOperand(N0, PVT, Replace);
1670
1671 if (!N0.getNode())
1672 return SDValue();
1673
1674 SDLoc DL(Op);
1675 SDValue N1 = Op.getOperand(1);
1676 SDValue RV =
1677 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1678
1679 if (Replace)
1680 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1681
1682 // Deal with Op being deleted.
1683 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1684 return RV;
1685 }
1686 return SDValue();
1687}
1688
1689SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1690 if (!LegalOperations)
1691 return SDValue();
1692
1693 EVT VT = Op.getValueType();
1694 if (VT.isVector() || !VT.isInteger())
1695 return SDValue();
1696
1697 // If operation type is 'undesirable', e.g. i16 on x86, consider
1698 // promoting it.
1699 unsigned Opc = Op.getOpcode();
1700 if (TLI.isTypeDesirableForOp(Opc, VT))
1701 return SDValue();
1702
1703 EVT PVT = VT;
1704 // Consult target whether it is a good idea to promote this operation and
1705 // what's the right type to promote it to.
1706 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1707 assert(PVT != VT && "Don't know what type to promote to!");
1708 // fold (aext (aext x)) -> (aext x)
1709 // fold (aext (zext x)) -> (zext x)
1710 // fold (aext (sext x)) -> (sext x)
1711 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1712 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1713 }
1714 return SDValue();
1715}
1716
1717bool DAGCombiner::PromoteLoad(SDValue Op) {
1718 if (!LegalOperations)
1719 return false;
1720
1721 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1722 return false;
1723
1724 EVT VT = Op.getValueType();
1725 if (VT.isVector() || !VT.isInteger())
1726 return false;
1727
1728 // If operation type is 'undesirable', e.g. i16 on x86, consider
1729 // promoting it.
1730 unsigned Opc = Op.getOpcode();
1731 if (TLI.isTypeDesirableForOp(Opc, VT))
1732 return false;
1733
1734 EVT PVT = VT;
1735 // Consult target whether it is a good idea to promote this operation and
1736 // what's the right type to promote it to.
1737 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1738 assert(PVT != VT && "Don't know what type to promote to!");
1739
1740 SDLoc DL(Op);
1741 SDNode *N = Op.getNode();
1742 LoadSDNode *LD = cast<LoadSDNode>(N);
1743 EVT MemVT = LD->getMemoryVT();
1745 : LD->getExtensionType();
1746 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1747 LD->getChain(), LD->getBasePtr(),
1748 MemVT, LD->getMemOperand());
1749 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1750
1751 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1752 Result.dump(&DAG); dbgs() << '\n');
1753
1754 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1755 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1756
1757 AddToWorklist(Result.getNode());
1758 recursivelyDeleteUnusedNodes(N);
1759 return true;
1760 }
1761
1762 return false;
1763}
1764
1765/// Recursively delete a node which has no uses and any operands for
1766/// which it is the only use.
1767///
1768/// Note that this both deletes the nodes and removes them from the worklist.
1769/// It also adds any nodes who have had a user deleted to the worklist as they
1770/// may now have only one use and subject to other combines.
1771bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1772 if (!N->use_empty())
1773 return false;
1774
1775 SmallSetVector<SDNode *, 16> Nodes;
1776 Nodes.insert(N);
1777 do {
1778 N = Nodes.pop_back_val();
1779 if (!N)
1780 continue;
1781
1782 if (N->use_empty()) {
1783 for (const SDValue &ChildN : N->op_values())
1784 Nodes.insert(ChildN.getNode());
1785
1786 removeFromWorklist(N);
1787 DAG.DeleteNode(N);
1788 } else {
1789 AddToWorklist(N);
1790 }
1791 } while (!Nodes.empty());
1792 return true;
1793}
1794
1795//===----------------------------------------------------------------------===//
1796// Main DAG Combiner implementation
1797//===----------------------------------------------------------------------===//
1798
1799void DAGCombiner::Run(CombineLevel AtLevel) {
1800 // set the instance variables, so that the various visit routines may use it.
1801 Level = AtLevel;
1802 LegalDAG = Level >= AfterLegalizeDAG;
1803 LegalOperations = Level >= AfterLegalizeVectorOps;
1804 LegalTypes = Level >= AfterLegalizeTypes;
1805
1806 WorklistInserter AddNodes(*this);
1807
1808 // Add all the dag nodes to the worklist.
1809 //
1810 // Note: All nodes are not added to PruningList here, this is because the only
1811 // nodes which can be deleted are those which have no uses and all other nodes
1812 // which would otherwise be added to the worklist by the first call to
1813 // getNextWorklistEntry are already present in it.
1814 for (SDNode &Node : DAG.allnodes())
1815 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1816
1817 // Create a dummy node (which is not added to allnodes), that adds a reference
1818 // to the root node, preventing it from being deleted, and tracking any
1819 // changes of the root.
1820 HandleSDNode Dummy(DAG.getRoot());
1821
1822 // While we have a valid worklist entry node, try to combine it.
1823 while (SDNode *N = getNextWorklistEntry()) {
1824 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1825 // N is deleted from the DAG, since they too may now be dead or may have a
1826 // reduced number of uses, allowing other xforms.
1827 if (recursivelyDeleteUnusedNodes(N))
1828 continue;
1829
1830 WorklistRemover DeadNodes(*this);
1831
1832 // If this combine is running after legalizing the DAG, re-legalize any
1833 // nodes pulled off the worklist.
1834 if (LegalDAG) {
1835 SmallSetVector<SDNode *, 16> UpdatedNodes;
1836 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1837
1838 for (SDNode *LN : UpdatedNodes)
1839 AddToWorklistWithUsers(LN);
1840
1841 if (!NIsValid)
1842 continue;
1843 }
1844
1845 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1846
1847 // Add any operands of the new node which have not yet been combined to the
1848 // worklist as well. getNextWorklistEntry flags nodes that have been
1849 // combined before. Because the worklist uniques things already, this won't
1850 // repeatedly process the same operand.
1851 for (const SDValue &ChildN : N->op_values())
1852 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1853 /*SkipIfCombinedBefore=*/true);
1854
1855 SDValue RV = combine(N);
1856
1857 if (!RV.getNode())
1858 continue;
1859
1860 ++NodesCombined;
1861
1862 // Invalidate cached info.
1863 ChainsWithoutMergeableStores.clear();
1864
1865 // If we get back the same node we passed in, rather than a new node or
1866 // zero, we know that the node must have defined multiple values and
1867 // CombineTo was used. Since CombineTo takes care of the worklist
1868 // mechanics for us, we have no work to do in this case.
1869 if (RV.getNode() == N)
1870 continue;
1871
1872 assert(N->getOpcode() != ISD::DELETED_NODE &&
1873 RV.getOpcode() != ISD::DELETED_NODE &&
1874 "Node was deleted but visit returned new node!");
1875
1876 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1877
1878 if (N->getNumValues() == RV->getNumValues())
1879 DAG.ReplaceAllUsesWith(N, RV.getNode());
1880 else {
1881 assert(N->getValueType(0) == RV.getValueType() &&
1882 N->getNumValues() == 1 && "Type mismatch");
1883 DAG.ReplaceAllUsesWith(N, &RV);
1884 }
1885
1886 // Push the new node and any users onto the worklist. Omit this if the
1887 // new node is the EntryToken (e.g. if a store managed to get optimized
1888 // out), because re-visiting the EntryToken and its users will not uncover
1889 // any additional opportunities, but there may be a large number of such
1890 // users, potentially causing compile time explosion.
1891 if (RV.getOpcode() != ISD::EntryToken)
1892 AddToWorklistWithUsers(RV.getNode());
1893
1894 // Finally, if the node is now dead, remove it from the graph. The node
1895 // may not be dead if the replacement process recursively simplified to
1896 // something else needing this node. This will also take care of adding any
1897 // operands which have lost a user to the worklist.
1898 recursivelyDeleteUnusedNodes(N);
1899 }
1900
1901 // If the root changed (e.g. it was a dead load, update the root).
1902 DAG.setRoot(Dummy.getValue());
1903 DAG.RemoveDeadNodes();
1904}
1905
1906SDValue DAGCombiner::visit(SDNode *N) {
1907 // clang-format off
1908 switch (N->getOpcode()) {
1909 default: break;
1910 case ISD::TokenFactor: return visitTokenFactor(N);
1911 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1912 case ISD::ADD: return visitADD(N);
1913 case ISD::PTRADD: return visitPTRADD(N);
1914 case ISD::SUB: return visitSUB(N);
1915 case ISD::SADDSAT:
1916 case ISD::UADDSAT: return visitADDSAT(N);
1917 case ISD::SSUBSAT:
1918 case ISD::USUBSAT: return visitSUBSAT(N);
1919 case ISD::ADDC: return visitADDC(N);
1920 case ISD::SADDO:
1921 case ISD::UADDO: return visitADDO(N);
1922 case ISD::SUBC: return visitSUBC(N);
1923 case ISD::SSUBO:
1924 case ISD::USUBO: return visitSUBO(N);
1925 case ISD::ADDE: return visitADDE(N);
1926 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1927 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1928 case ISD::SUBE: return visitSUBE(N);
1929 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1930 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1931 case ISD::SMULFIX:
1932 case ISD::SMULFIXSAT:
1933 case ISD::UMULFIX:
1934 case ISD::UMULFIXSAT: return visitMULFIX(N);
1935 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1936 case ISD::SDIV: return visitSDIV(N);
1937 case ISD::UDIV: return visitUDIV(N);
1938 case ISD::SREM:
1939 case ISD::UREM: return visitREM(N);
1940 case ISD::MULHU: return visitMULHU(N);
1941 case ISD::MULHS: return visitMULHS(N);
1942 case ISD::AVGFLOORS:
1943 case ISD::AVGFLOORU:
1944 case ISD::AVGCEILS:
1945 case ISD::AVGCEILU: return visitAVG(N);
1946 case ISD::ABDS:
1947 case ISD::ABDU: return visitABD(N);
1948 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1949 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1950 case ISD::SMULO:
1951 case ISD::UMULO: return visitMULO(N);
1952 case ISD::SMIN:
1953 case ISD::SMAX:
1954 case ISD::UMIN:
1955 case ISD::UMAX: return visitIMINMAX(N);
1956 case ISD::AND: return visitAND(N);
1957 case ISD::OR: return visitOR(N);
1958 case ISD::XOR: return visitXOR(N);
1959 case ISD::SHL: return visitSHL(N);
1960 case ISD::SRA: return visitSRA(N);
1961 case ISD::SRL: return visitSRL(N);
1962 case ISD::ROTR:
1963 case ISD::ROTL: return visitRotate(N);
1964 case ISD::FSHL:
1965 case ISD::FSHR: return visitFunnelShift(N);
1966 case ISD::SSHLSAT:
1967 case ISD::USHLSAT: return visitSHLSAT(N);
1968 case ISD::ABS: return visitABS(N);
1969 case ISD::BSWAP: return visitBSWAP(N);
1970 case ISD::BITREVERSE: return visitBITREVERSE(N);
1971 case ISD::CTLZ: return visitCTLZ(N);
1972 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1973 case ISD::CTTZ: return visitCTTZ(N);
1974 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1975 case ISD::CTPOP: return visitCTPOP(N);
1976 case ISD::SELECT: return visitSELECT(N);
1977 case ISD::VSELECT: return visitVSELECT(N);
1978 case ISD::SELECT_CC: return visitSELECT_CC(N);
1979 case ISD::SETCC: return visitSETCC(N);
1980 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1981 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1982 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1983 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1984 case ISD::AssertSext:
1985 case ISD::AssertZext: return visitAssertExt(N);
1986 case ISD::AssertAlign: return visitAssertAlign(N);
1987 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1990 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1991 case ISD::TRUNCATE: return visitTRUNCATE(N);
1992 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1993 case ISD::BITCAST: return visitBITCAST(N);
1994 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1995 case ISD::FADD: return visitFADD(N);
1996 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1997 case ISD::FSUB: return visitFSUB(N);
1998 case ISD::FMUL: return visitFMUL(N);
1999 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
2000 case ISD::FMAD: return visitFMAD(N);
2001 case ISD::FMULADD: return visitFMULADD(N);
2002 case ISD::FDIV: return visitFDIV(N);
2003 case ISD::FREM: return visitFREM(N);
2004 case ISD::FSQRT: return visitFSQRT(N);
2005 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
2006 case ISD::FPOW: return visitFPOW(N);
2007 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2008 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2009 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2010 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2011 case ISD::LROUND:
2012 case ISD::LLROUND:
2013 case ISD::LRINT:
2014 case ISD::LLRINT: return visitXROUND(N);
2015 case ISD::FP_ROUND: return visitFP_ROUND(N);
2016 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2017 case ISD::FNEG: return visitFNEG(N);
2018 case ISD::FABS: return visitFABS(N);
2019 case ISD::FFLOOR: return visitFFLOOR(N);
2020 case ISD::FMINNUM:
2021 case ISD::FMAXNUM:
2022 case ISD::FMINIMUM:
2023 case ISD::FMAXIMUM:
2024 case ISD::FMINIMUMNUM:
2025 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2026 case ISD::FCEIL: return visitFCEIL(N);
2027 case ISD::FTRUNC: return visitFTRUNC(N);
2028 case ISD::FFREXP: return visitFFREXP(N);
2029 case ISD::BRCOND: return visitBRCOND(N);
2030 case ISD::BR_CC: return visitBR_CC(N);
2031 case ISD::LOAD: return visitLOAD(N);
2032 case ISD::STORE: return visitSTORE(N);
2033 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2034 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2035 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2036 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2037 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2038 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2039 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2040 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2041 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2042 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2043 case ISD::MGATHER: return visitMGATHER(N);
2044 case ISD::MLOAD: return visitMLOAD(N);
2045 case ISD::MSCATTER: return visitMSCATTER(N);
2046 case ISD::MSTORE: return visitMSTORE(N);
2047 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2052 return visitPARTIAL_REDUCE_MLA(N);
2053 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2054 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2055 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2056 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2057 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2058 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2059 case ISD::FREEZE: return visitFREEZE(N);
2060 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2061 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2062 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2065 case ISD::VECREDUCE_ADD:
2066 case ISD::VECREDUCE_MUL:
2067 case ISD::VECREDUCE_AND:
2068 case ISD::VECREDUCE_OR:
2069 case ISD::VECREDUCE_XOR:
2077 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2078#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2079#include "llvm/IR/VPIntrinsics.def"
2080 return visitVPOp(N);
2081 }
2082 // clang-format on
2083 return SDValue();
2084}
2085
2086SDValue DAGCombiner::combine(SDNode *N) {
2087 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2088 return SDValue();
2089
2090 SDValue RV;
2091 if (!DisableGenericCombines)
2092 RV = visit(N);
2093
2094 // If nothing happened, try a target-specific DAG combine.
2095 if (!RV.getNode()) {
2096 assert(N->getOpcode() != ISD::DELETED_NODE &&
2097 "Node was deleted but visit returned NULL!");
2098
2099 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2100 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2101
2102 // Expose the DAG combiner to the target combiner impls.
2103 TargetLowering::DAGCombinerInfo
2104 DagCombineInfo(DAG, Level, false, this);
2105
2106 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2107 }
2108 }
2109
2110 // If nothing happened still, try promoting the operation.
2111 if (!RV.getNode()) {
2112 switch (N->getOpcode()) {
2113 default: break;
2114 case ISD::ADD:
2115 case ISD::SUB:
2116 case ISD::MUL:
2117 case ISD::AND:
2118 case ISD::OR:
2119 case ISD::XOR:
2120 RV = PromoteIntBinOp(SDValue(N, 0));
2121 break;
2122 case ISD::SHL:
2123 case ISD::SRA:
2124 case ISD::SRL:
2125 RV = PromoteIntShiftOp(SDValue(N, 0));
2126 break;
2127 case ISD::SIGN_EXTEND:
2128 case ISD::ZERO_EXTEND:
2129 case ISD::ANY_EXTEND:
2130 RV = PromoteExtend(SDValue(N, 0));
2131 break;
2132 case ISD::LOAD:
2133 if (PromoteLoad(SDValue(N, 0)))
2134 RV = SDValue(N, 0);
2135 break;
2136 }
2137 }
2138
2139 // If N is a commutative binary node, try to eliminate it if the commuted
2140 // version is already present in the DAG.
2141 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2142 SDValue N0 = N->getOperand(0);
2143 SDValue N1 = N->getOperand(1);
2144
2145 // Constant operands are canonicalized to RHS.
2146 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2147 SDValue Ops[] = {N1, N0};
2148 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2149 N->getFlags());
2150 if (CSENode)
2151 return SDValue(CSENode, 0);
2152 }
2153 }
2154
2155 return RV;
2156}
2157
2158/// Given a node, return its input chain if it has one, otherwise return a null
2159/// sd operand.
2161 if (unsigned NumOps = N->getNumOperands()) {
2162 if (N->getOperand(0).getValueType() == MVT::Other)
2163 return N->getOperand(0);
2164 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2165 return N->getOperand(NumOps-1);
2166 for (unsigned i = 1; i < NumOps-1; ++i)
2167 if (N->getOperand(i).getValueType() == MVT::Other)
2168 return N->getOperand(i);
2169 }
2170 return SDValue();
2171}
2172
2173SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2174 SDValue Operand = N->getOperand(0);
2175 EVT VT = Operand.getValueType();
2176 SDLoc dl(N);
2177
2178 // Canonicalize undef to quiet NaN.
2179 if (Operand.isUndef()) {
2180 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2181 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2182 }
2183 return SDValue();
2184}
2185
2186SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2187 // If N has two operands, where one has an input chain equal to the other,
2188 // the 'other' chain is redundant.
2189 if (N->getNumOperands() == 2) {
2190 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2191 return N->getOperand(0);
2192 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2193 return N->getOperand(1);
2194 }
2195
2196 // Don't simplify token factors if optnone.
2197 if (OptLevel == CodeGenOptLevel::None)
2198 return SDValue();
2199
2200 // Don't simplify the token factor if the node itself has too many operands.
2201 if (N->getNumOperands() > TokenFactorInlineLimit)
2202 return SDValue();
2203
2204 // If the sole user is a token factor, we should make sure we have a
2205 // chance to merge them together. This prevents TF chains from inhibiting
2206 // optimizations.
2207 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2208 AddToWorklist(*(N->user_begin()));
2209
2210 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2211 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2212 SmallPtrSet<SDNode*, 16> SeenOps;
2213 bool Changed = false; // If we should replace this token factor.
2214
2215 // Start out with this token factor.
2216 TFs.push_back(N);
2217
2218 // Iterate through token factors. The TFs grows when new token factors are
2219 // encountered.
2220 for (unsigned i = 0; i < TFs.size(); ++i) {
2221 // Limit number of nodes to inline, to avoid quadratic compile times.
2222 // We have to add the outstanding Token Factors to Ops, otherwise we might
2223 // drop Ops from the resulting Token Factors.
2224 if (Ops.size() > TokenFactorInlineLimit) {
2225 for (unsigned j = i; j < TFs.size(); j++)
2226 Ops.emplace_back(TFs[j], 0);
2227 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2228 // combiner worklist later.
2229 TFs.resize(i);
2230 break;
2231 }
2232
2233 SDNode *TF = TFs[i];
2234 // Check each of the operands.
2235 for (const SDValue &Op : TF->op_values()) {
2236 switch (Op.getOpcode()) {
2237 case ISD::EntryToken:
2238 // Entry tokens don't need to be added to the list. They are
2239 // redundant.
2240 Changed = true;
2241 break;
2242
2243 case ISD::TokenFactor:
2244 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2245 // Queue up for processing.
2246 TFs.push_back(Op.getNode());
2247 Changed = true;
2248 break;
2249 }
2250 [[fallthrough]];
2251
2252 default:
2253 // Only add if it isn't already in the list.
2254 if (SeenOps.insert(Op.getNode()).second)
2255 Ops.push_back(Op);
2256 else
2257 Changed = true;
2258 break;
2259 }
2260 }
2261 }
2262
2263 // Re-visit inlined Token Factors, to clean them up in case they have been
2264 // removed. Skip the first Token Factor, as this is the current node.
2265 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2266 AddToWorklist(TFs[i]);
2267
2268 // Remove Nodes that are chained to another node in the list. Do so
2269 // by walking up chains breath-first stopping when we've seen
2270 // another operand. In general we must climb to the EntryNode, but we can exit
2271 // early if we find all remaining work is associated with just one operand as
2272 // no further pruning is possible.
2273
2274 // List of nodes to search through and original Ops from which they originate.
2276 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2277 SmallPtrSet<SDNode *, 16> SeenChains;
2278 bool DidPruneOps = false;
2279
2280 unsigned NumLeftToConsider = 0;
2281 for (const SDValue &Op : Ops) {
2282 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2283 OpWorkCount.push_back(1);
2284 }
2285
2286 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2287 // If this is an Op, we can remove the op from the list. Remark any
2288 // search associated with it as from the current OpNumber.
2289 if (SeenOps.contains(Op)) {
2290 Changed = true;
2291 DidPruneOps = true;
2292 unsigned OrigOpNumber = 0;
2293 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2294 OrigOpNumber++;
2295 assert((OrigOpNumber != Ops.size()) &&
2296 "expected to find TokenFactor Operand");
2297 // Re-mark worklist from OrigOpNumber to OpNumber
2298 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2299 if (Worklist[i].second == OrigOpNumber) {
2300 Worklist[i].second = OpNumber;
2301 }
2302 }
2303 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2304 OpWorkCount[OrigOpNumber] = 0;
2305 NumLeftToConsider--;
2306 }
2307 // Add if it's a new chain
2308 if (SeenChains.insert(Op).second) {
2309 OpWorkCount[OpNumber]++;
2310 Worklist.push_back(std::make_pair(Op, OpNumber));
2311 }
2312 };
2313
2314 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2315 // We need at least be consider at least 2 Ops to prune.
2316 if (NumLeftToConsider <= 1)
2317 break;
2318 auto CurNode = Worklist[i].first;
2319 auto CurOpNumber = Worklist[i].second;
2320 assert((OpWorkCount[CurOpNumber] > 0) &&
2321 "Node should not appear in worklist");
2322 switch (CurNode->getOpcode()) {
2323 case ISD::EntryToken:
2324 // Hitting EntryToken is the only way for the search to terminate without
2325 // hitting
2326 // another operand's search. Prevent us from marking this operand
2327 // considered.
2328 NumLeftToConsider++;
2329 break;
2330 case ISD::TokenFactor:
2331 for (const SDValue &Op : CurNode->op_values())
2332 AddToWorklist(i, Op.getNode(), CurOpNumber);
2333 break;
2335 case ISD::LIFETIME_END:
2336 case ISD::CopyFromReg:
2337 case ISD::CopyToReg:
2338 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2339 break;
2340 default:
2341 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2342 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2343 break;
2344 }
2345 OpWorkCount[CurOpNumber]--;
2346 if (OpWorkCount[CurOpNumber] == 0)
2347 NumLeftToConsider--;
2348 }
2349
2350 // If we've changed things around then replace token factor.
2351 if (Changed) {
2353 if (Ops.empty()) {
2354 // The entry token is the only possible outcome.
2355 Result = DAG.getEntryNode();
2356 } else {
2357 if (DidPruneOps) {
2358 SmallVector<SDValue, 8> PrunedOps;
2359 //
2360 for (const SDValue &Op : Ops) {
2361 if (SeenChains.count(Op.getNode()) == 0)
2362 PrunedOps.push_back(Op);
2363 }
2364 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2365 } else {
2366 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2367 }
2368 }
2369 return Result;
2370 }
2371 return SDValue();
2372}
2373
2374/// MERGE_VALUES can always be eliminated.
2375SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2376 WorklistRemover DeadNodes(*this);
2377 // Replacing results may cause a different MERGE_VALUES to suddenly
2378 // be CSE'd with N, and carry its uses with it. Iterate until no
2379 // uses remain, to ensure that the node can be safely deleted.
2380 // First add the users of this node to the work list so that they
2381 // can be tried again once they have new operands.
2382 AddUsersToWorklist(N);
2383 do {
2384 // Do as a single replacement to avoid rewalking use lists.
2386 DAG.ReplaceAllUsesWith(N, Ops.data());
2387 } while (!N->use_empty());
2388 deleteAndRecombine(N);
2389 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2390}
2391
2392/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2393/// ConstantSDNode pointer else nullptr.
2396 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2397}
2398
2399// isTruncateOf - If N is a truncate of some other value, return true, record
2400// the value being truncated in Op and which of Op's bits are zero/one in Known.
2401// This function computes KnownBits to avoid a duplicated call to
2402// computeKnownBits in the caller.
2404 KnownBits &Known) {
2405 if (N->getOpcode() == ISD::TRUNCATE) {
2406 Op = N->getOperand(0);
2407 Known = DAG.computeKnownBits(Op);
2408 if (N->getFlags().hasNoUnsignedWrap())
2409 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2410 return true;
2411 }
2412
2413 if (N.getValueType().getScalarType() != MVT::i1 ||
2414 !sd_match(
2416 return false;
2417
2418 Known = DAG.computeKnownBits(Op);
2419 return (Known.Zero | 1).isAllOnes();
2420}
2421
2422/// Return true if 'Use' is a load or a store that uses N as its base pointer
2423/// and that N may be folded in the load / store addressing mode.
2425 const TargetLowering &TLI) {
2426 EVT VT;
2427 unsigned AS;
2428
2429 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2430 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2431 return false;
2432 VT = LD->getMemoryVT();
2433 AS = LD->getAddressSpace();
2434 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2435 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2436 return false;
2437 VT = ST->getMemoryVT();
2438 AS = ST->getAddressSpace();
2440 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2441 return false;
2442 VT = LD->getMemoryVT();
2443 AS = LD->getAddressSpace();
2445 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2446 return false;
2447 VT = ST->getMemoryVT();
2448 AS = ST->getAddressSpace();
2449 } else {
2450 return false;
2451 }
2452
2454 if (N->isAnyAdd()) {
2455 AM.HasBaseReg = true;
2457 if (Offset)
2458 // [reg +/- imm]
2459 AM.BaseOffs = Offset->getSExtValue();
2460 else
2461 // [reg +/- reg]
2462 AM.Scale = 1;
2463 } else if (N->getOpcode() == ISD::SUB) {
2464 AM.HasBaseReg = true;
2466 if (Offset)
2467 // [reg +/- imm]
2468 AM.BaseOffs = -Offset->getSExtValue();
2469 else
2470 // [reg +/- reg]
2471 AM.Scale = 1;
2472 } else {
2473 return false;
2474 }
2475
2476 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2477 VT.getTypeForEVT(*DAG.getContext()), AS);
2478}
2479
2480/// This inverts a canonicalization in IR that replaces a variable select arm
2481/// with an identity constant. Codegen improves if we re-use the variable
2482/// operand rather than load a constant. This can also be converted into a
2483/// masked vector operation if the target supports it.
2485 bool ShouldCommuteOperands) {
2486 SDValue N0 = N->getOperand(0);
2487 SDValue N1 = N->getOperand(1);
2488
2489 // Match a select as operand 1. The identity constant that we are looking for
2490 // is only valid as operand 1 of a non-commutative binop.
2491 if (ShouldCommuteOperands)
2492 std::swap(N0, N1);
2493
2494 SDValue Cond, TVal, FVal;
2496 m_Value(FVal)))))
2497 return SDValue();
2498
2499 // We can't hoist all instructions because of immediate UB (not speculatable).
2500 // For example div/rem by zero.
2502 return SDValue();
2503
2504 unsigned SelOpcode = N1.getOpcode();
2505 unsigned Opcode = N->getOpcode();
2506 EVT VT = N->getValueType(0);
2507 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2508
2509 // This transform increases uses of N0, so freeze it to be safe.
2510 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2511 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2512 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2513 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2514 FVal)) {
2515 SDValue F0 = DAG.getFreeze(N0);
2516 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2517 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2518 }
2519 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2520 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2521 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2522 TVal)) {
2523 SDValue F0 = DAG.getFreeze(N0);
2524 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2525 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2526 }
2527
2528 return SDValue();
2529}
2530
2531SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2532 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2533 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2534 "Unexpected binary operator");
2535
2536 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2537 return Sel;
2538
2539 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2540 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2541 return Sel;
2542
2543 // Don't do this unless the old select is going away. We want to eliminate the
2544 // binary operator, not replace a binop with a select.
2545 // TODO: Handle ISD::SELECT_CC.
2546 unsigned SelOpNo = 0;
2547 SDValue Sel = BO->getOperand(0);
2548 auto BinOpcode = BO->getOpcode();
2549 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2550 SelOpNo = 1;
2551 Sel = BO->getOperand(1);
2552
2553 // Peek through trunc to shift amount type.
2554 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2555 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2556 // This is valid when the truncated bits of x are already zero.
2557 SDValue Op;
2558 KnownBits Known;
2559 if (isTruncateOf(DAG, Sel, Op, Known) &&
2561 Sel = Op;
2562 }
2563 }
2564
2565 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2566 return SDValue();
2567
2568 SDValue CT = Sel.getOperand(1);
2569 if (!isConstantOrConstantVector(CT, true) &&
2571 return SDValue();
2572
2573 SDValue CF = Sel.getOperand(2);
2574 if (!isConstantOrConstantVector(CF, true) &&
2576 return SDValue();
2577
2578 // Bail out if any constants are opaque because we can't constant fold those.
2579 // The exception is "and" and "or" with either 0 or -1 in which case we can
2580 // propagate non constant operands into select. I.e.:
2581 // and (select Cond, 0, -1), X --> select Cond, 0, X
2582 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2583 bool CanFoldNonConst =
2584 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2587
2588 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2589 if (!CanFoldNonConst &&
2590 !isConstantOrConstantVector(CBO, true) &&
2592 return SDValue();
2593
2594 SDLoc DL(Sel);
2595 SDValue NewCT, NewCF;
2596 EVT VT = BO->getValueType(0);
2597
2598 if (CanFoldNonConst) {
2599 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2600 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2601 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2602 NewCT = CT;
2603 else
2604 NewCT = CBO;
2605
2606 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2607 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2608 NewCF = CF;
2609 else
2610 NewCF = CBO;
2611 } else {
2612 // We have a select-of-constants followed by a binary operator with a
2613 // constant. Eliminate the binop by pulling the constant math into the
2614 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2615 // CBO, CF + CBO
2616 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2617 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2618 if (!NewCT)
2619 return SDValue();
2620
2621 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2622 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2623 if (!NewCF)
2624 return SDValue();
2625 }
2626
2627 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2628}
2629
2631 SelectionDAG &DAG) {
2632 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2633 "Expecting add or sub");
2634
2635 // Match a constant operand and a zext operand for the math instruction:
2636 // add Z, C
2637 // sub C, Z
2638 bool IsAdd = N->getOpcode() == ISD::ADD;
2639 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2640 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2641 auto *CN = dyn_cast<ConstantSDNode>(C);
2642 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2643 return SDValue();
2644
2645 // Match the zext operand as a setcc of a boolean.
2646 if (Z.getOperand(0).getValueType() != MVT::i1)
2647 return SDValue();
2648
2649 // Match the compare as: setcc (X & 1), 0, eq.
2650 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2652 return SDValue();
2653
2654 // We are adding/subtracting a constant and an inverted low bit. Turn that
2655 // into a subtract/add of the low bit with incremented/decremented constant:
2656 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2657 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2658 EVT VT = C.getValueType();
2659 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2660 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2661 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2662 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2663}
2664
2665// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2666SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2667 SDValue N0 = N->getOperand(0);
2668 EVT VT = N0.getValueType();
2669 SDValue A, B;
2670
2671 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2673 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2674 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2675 }
2676 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2678 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2679 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2680 }
2681 return SDValue();
2682}
2683
2684/// Try to fold a pointer arithmetic node.
2685/// This needs to be done separately from normal addition, because pointer
2686/// addition is not commutative.
2687SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2688 SDValue N0 = N->getOperand(0);
2689 SDValue N1 = N->getOperand(1);
2690 EVT PtrVT = N0.getValueType();
2691 EVT IntVT = N1.getValueType();
2692 SDLoc DL(N);
2693
2694 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2695 // combines here depend on this assumption.
2696 assert(PtrVT == IntVT &&
2697 "PTRADD with different operand types is not supported");
2698
2699 // fold (ptradd x, 0) -> x
2700 if (isNullConstant(N1))
2701 return N0;
2702
2703 // fold (ptradd 0, x) -> x
2704 if (PtrVT == IntVT && isNullConstant(N0))
2705 return N1;
2706
2707 if (N0.getOpcode() == ISD::PTRADD &&
2708 !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2709 SDValue X = N0.getOperand(0);
2710 SDValue Y = N0.getOperand(1);
2711 SDValue Z = N1;
2712 bool N0OneUse = N0.hasOneUse();
2713 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2714 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2715
2716 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2717 // * y is a constant and (ptradd x, y) has one use; or
2718 // * y and z are both constants.
2719 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2720 // If both additions in the original were NUW, the new ones are as well.
2721 SDNodeFlags Flags =
2722 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2723 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2724 AddToWorklist(Add.getNode());
2725 // We can't set InBounds even if both original ptradds were InBounds and
2726 // NUW: SDAG usually represents pointers as integers, therefore, the
2727 // matched pattern behaves as if it had implicit casts:
2728 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
2729 // The outer inbounds ptradd might therefore rely on a provenance that x
2730 // does not have.
2731 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2732 }
2733 }
2734
2735 // The following combines can turn in-bounds pointer arithmetic out of bounds.
2736 // That is problematic for settings like AArch64's CPA, which checks that
2737 // intermediate results of pointer arithmetic remain in bounds. The target
2738 // therefore needs to opt-in to enable them.
2740 DAG.getMachineFunction().getFunction(), PtrVT))
2741 return SDValue();
2742
2743 if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2744 // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2745 // global address GA and constant c, such that c can be folded into GA.
2746 // TODO: Support constant vector splats.
2747 SDValue GAValue = N0.getOperand(0);
2748 if (const GlobalAddressSDNode *GA =
2750 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2751 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2752 // If both additions in the original were NUW, reassociation preserves
2753 // that.
2754 SDNodeFlags Flags =
2755 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2756 // We can't set InBounds even if both original ptradds were InBounds and
2757 // NUW: SDAG usually represents pointers as integers, therefore, the
2758 // matched pattern behaves as if it had implicit casts:
2759 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
2760 // The outer inbounds ptradd might therefore rely on a provenance that
2761 // GA does not have.
2762 SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2763 AddToWorklist(Inner.getNode());
2764 return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2765 }
2766 }
2767 }
2768
2769 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2770 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2771 // y is not, and (add y, z) is used only once.
2772 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2773 // z is not, and (add y, z) is used only once.
2774 // The goal is to move constant offsets to the outermost ptradd, to create
2775 // more opportunities to fold offsets into memory instructions.
2776 // Together with the another combine above, this also implements
2777 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2778 SDValue X = N0;
2779 SDValue Y = N1.getOperand(0);
2780 SDValue Z = N1.getOperand(1);
2781 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2782 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2783
2784 // If both additions in the original were NUW, reassociation preserves that.
2785 SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags();
2786 SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
2787 if (CommonFlags.hasNoUnsignedWrap()) {
2788 // If both operations are NUW and the PTRADD is inbounds, the offests are
2789 // both non-negative, so the reassociated PTRADDs are also inbounds.
2790 ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds;
2791 }
2792
2793 if (ZIsConstant != YIsConstant) {
2794 if (YIsConstant)
2795 std::swap(Y, Z);
2796 SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2797 AddToWorklist(Inner.getNode());
2798 return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2799 }
2800 }
2801
2802 // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2803 // that transformation can't block an offset folding at any use of the ptradd.
2804 // This should be done late, after legalization, so that it doesn't block
2805 // other ptradd combines that could enable more offset folding.
2806 if (LegalOperations && DAG.haveNoCommonBitsSet(N0, N1)) {
2807 bool TransformCannotBreakAddrMode = none_of(N->users(), [&](SDNode *User) {
2808 return canFoldInAddressingMode(N, User, DAG, TLI);
2809 });
2810
2811 if (TransformCannotBreakAddrMode)
2812 return DAG.getNode(ISD::OR, DL, PtrVT, N0, N1, SDNodeFlags::Disjoint);
2813 }
2814
2815 return SDValue();
2816}
2817
2818/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2819/// a shift and add with a different constant.
2821 SelectionDAG &DAG) {
2822 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2823 "Expecting add or sub");
2824
2825 // We need a constant operand for the add/sub, and the other operand is a
2826 // logical shift right: add (srl), C or sub C, (srl).
2827 bool IsAdd = N->getOpcode() == ISD::ADD;
2828 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2829 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2830 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2831 ShiftOp.getOpcode() != ISD::SRL)
2832 return SDValue();
2833
2834 // The shift must be of a 'not' value.
2835 SDValue Not = ShiftOp.getOperand(0);
2836 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2837 return SDValue();
2838
2839 // The shift must be moving the sign bit to the least-significant-bit.
2840 EVT VT = ShiftOp.getValueType();
2841 SDValue ShAmt = ShiftOp.getOperand(1);
2842 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2843 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2844 return SDValue();
2845
2846 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2847 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2848 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2849 if (SDValue NewC = DAG.FoldConstantArithmetic(
2850 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2851 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2852 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2853 Not.getOperand(0), ShAmt);
2854 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2855 }
2856
2857 return SDValue();
2858}
2859
2860static bool
2862 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2863 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2864}
2865
2866/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2867/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2868/// are no common bits set in the operands).
2869SDValue DAGCombiner::visitADDLike(SDNode *N) {
2870 SDValue N0 = N->getOperand(0);
2871 SDValue N1 = N->getOperand(1);
2872 EVT VT = N0.getValueType();
2873 SDLoc DL(N);
2874
2875 // fold (add x, undef) -> undef
2876 if (N0.isUndef())
2877 return N0;
2878 if (N1.isUndef())
2879 return N1;
2880
2881 // fold (add c1, c2) -> c1+c2
2882 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2883 return C;
2884
2885 // canonicalize constant to RHS
2888 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2889
2890 if (areBitwiseNotOfEachother(N0, N1))
2891 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2892
2893 // fold vector ops
2894 if (VT.isVector()) {
2895 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2896 return FoldedVOp;
2897
2898 // fold (add x, 0) -> x, vector edition
2900 return N0;
2901 }
2902
2903 // fold (add x, 0) -> x
2904 if (isNullConstant(N1))
2905 return N0;
2906
2907 if (N0.getOpcode() == ISD::SUB) {
2908 SDValue N00 = N0.getOperand(0);
2909 SDValue N01 = N0.getOperand(1);
2910
2911 // fold ((A-c1)+c2) -> (A+(c2-c1))
2912 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2913 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2914
2915 // fold ((c1-A)+c2) -> (c1+c2)-A
2916 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2917 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2918 }
2919
2920 // add (sext i1 X), 1 -> zext (not i1 X)
2921 // We don't transform this pattern:
2922 // add (zext i1 X), -1 -> sext (not i1 X)
2923 // because most (?) targets generate better code for the zext form.
2924 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2925 isOneOrOneSplat(N1)) {
2926 SDValue X = N0.getOperand(0);
2927 if ((!LegalOperations ||
2928 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2930 X.getScalarValueSizeInBits() == 1) {
2931 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2932 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2933 }
2934 }
2935
2936 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2937 // iff (or x, c0) is equivalent to (add x, c0).
2938 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2939 // iff (xor x, c0) is equivalent to (add x, c0).
2940 if (DAG.isADDLike(N0)) {
2941 SDValue N01 = N0.getOperand(1);
2942 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2943 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2944 }
2945
2946 if (SDValue NewSel = foldBinOpIntoSelect(N))
2947 return NewSel;
2948
2949 // reassociate add
2950 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2951 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2952 return RADD;
2953
2954 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2955 // equivalent to (add x, c).
2956 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2957 // equivalent to (add x, c).
2958 // Do this optimization only when adding c does not introduce instructions
2959 // for adding carries.
2960 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2961 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2962 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2963 // If N0's type does not split or is a sign mask, it does not introduce
2964 // add carry.
2965 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2966 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2969 if (NoAddCarry)
2970 return DAG.getNode(
2971 ISD::ADD, DL, VT,
2972 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2973 N0.getOperand(1));
2974 }
2975 return SDValue();
2976 };
2977 if (SDValue Add = ReassociateAddOr(N0, N1))
2978 return Add;
2979 if (SDValue Add = ReassociateAddOr(N1, N0))
2980 return Add;
2981
2982 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2983 if (SDValue SD =
2984 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2985 return SD;
2986 }
2987
2988 SDValue A, B, C, D;
2989
2990 // fold ((0-A) + B) -> B-A
2991 if (sd_match(N0, m_Neg(m_Value(A))))
2992 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2993
2994 // fold (A + (0-B)) -> A-B
2995 if (sd_match(N1, m_Neg(m_Value(B))))
2996 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2997
2998 // fold (A+(B-A)) -> B
2999 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
3000 return B;
3001
3002 // fold ((B-A)+A) -> B
3003 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
3004 return B;
3005
3006 // fold ((A-B)+(C-A)) -> (C-B)
3007 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3009 return DAG.getNode(ISD::SUB, DL, VT, C, B);
3010
3011 // fold ((A-B)+(B-C)) -> (A-C)
3012 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3014 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3015
3016 // fold (A+(B-(A+C))) to (B-C)
3017 // fold (A+(B-(C+A))) to (B-C)
3018 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
3019 return DAG.getNode(ISD::SUB, DL, VT, B, C);
3020
3021 // fold (A+((B-A)+or-C)) to (B+or-C)
3022 if (sd_match(N1,
3024 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
3025 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
3026
3027 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3028 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
3029 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
3031 return DAG.getNode(ISD::SUB, DL, VT,
3032 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
3033 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
3034
3035 // fold (add (umax X, C), -C) --> (usubsat X, C)
3036 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
3037 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
3038 return (!Max && !Op) ||
3039 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3040 };
3041 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
3042 /*AllowUndefs*/ true))
3043 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
3044 N0.getOperand(1));
3045 }
3046
3048 return SDValue(N, 0);
3049
3050 if (isOneOrOneSplat(N1)) {
3051 // fold (add (xor a, -1), 1) -> (sub 0, a)
3052 if (isBitwiseNot(N0))
3053 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
3054 N0.getOperand(0));
3055
3056 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3057 if (N0.getOpcode() == ISD::ADD) {
3058 SDValue A, Xor;
3059
3060 if (isBitwiseNot(N0.getOperand(0))) {
3061 A = N0.getOperand(1);
3062 Xor = N0.getOperand(0);
3063 } else if (isBitwiseNot(N0.getOperand(1))) {
3064 A = N0.getOperand(0);
3065 Xor = N0.getOperand(1);
3066 }
3067
3068 if (Xor)
3069 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3070 }
3071
3072 // Look for:
3073 // add (add x, y), 1
3074 // And if the target does not like this form then turn into:
3075 // sub y, (xor x, -1)
3076 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3077 N0.hasOneUse() &&
3078 // Limit this to after legalization if the add has wrap flags
3079 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3080 !N->getFlags().hasNoSignedWrap()))) {
3081 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3082 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3083 }
3084 }
3085
3086 // (x - y) + -1 -> add (xor y, -1), x
3087 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3088 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3089 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3090 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3091 }
3092
3093 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3094 // This can help if the inner add has multiple uses.
3095 APInt CM, CA;
3096 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3097 if (VT.getScalarSizeInBits() <= 64) {
3099 m_ConstInt(CM)))) &&
3101 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3102 SDNodeFlags Flags;
3103 // If all the inputs are nuw, the outputs can be nuw. If all the input
3104 // are _also_ nsw the outputs can be too.
3105 if (N->getFlags().hasNoUnsignedWrap() &&
3106 N0->getFlags().hasNoUnsignedWrap() &&
3109 if (N->getFlags().hasNoSignedWrap() &&
3110 N0->getFlags().hasNoSignedWrap() &&
3113 }
3114 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3115 DAG.getConstant(CM, DL, VT), Flags);
3116 return DAG.getNode(
3117 ISD::ADD, DL, VT, Mul,
3118 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3119 }
3120 // Also look in case there is an intermediate add.
3121 if (sd_match(N0, m_OneUse(m_Add(
3123 m_ConstInt(CM))),
3124 m_Value(B)))) &&
3126 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3127 SDNodeFlags Flags;
3128 // If all the inputs are nuw, the outputs can be nuw. If all the input
3129 // are _also_ nsw the outputs can be too.
3130 SDValue OMul =
3131 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3132 if (N->getFlags().hasNoUnsignedWrap() &&
3133 N0->getFlags().hasNoUnsignedWrap() &&
3134 OMul->getFlags().hasNoUnsignedWrap() &&
3135 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3137 if (N->getFlags().hasNoSignedWrap() &&
3138 N0->getFlags().hasNoSignedWrap() &&
3139 OMul->getFlags().hasNoSignedWrap() &&
3140 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3142 }
3143 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3144 DAG.getConstant(CM, DL, VT), Flags);
3145 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3146 return DAG.getNode(
3147 ISD::ADD, DL, VT, Add,
3148 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3149 }
3150 }
3151 }
3152
3153 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3154 return Combined;
3155
3156 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3157 return Combined;
3158
3159 return SDValue();
3160}
3161
3162// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3163// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
3164SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3165 SDValue N0 = N->getOperand(0);
3166 EVT VT = N0.getValueType();
3167 SDValue A, B;
3168
3169 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3170 (sd_match(N,
3172 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3175 m_Srl(m_Deferred(A), m_One()),
3176 m_Srl(m_Deferred(B), m_One()))))) {
3177 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3178 }
3179 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3180 (sd_match(N,
3182 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3185 m_Sra(m_Deferred(A), m_One()),
3186 m_Sra(m_Deferred(B), m_One()))))) {
3187 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3188 }
3189
3190 return SDValue();
3191}
3192
3193SDValue DAGCombiner::visitADD(SDNode *N) {
3194 SDValue N0 = N->getOperand(0);
3195 SDValue N1 = N->getOperand(1);
3196 EVT VT = N0.getValueType();
3197 SDLoc DL(N);
3198
3199 if (SDValue Combined = visitADDLike(N))
3200 return Combined;
3201
3202 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3203 return V;
3204
3205 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3206 return V;
3207
3208 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3209 return V;
3210
3211 // Try to match AVGFLOOR fixedwidth pattern
3212 if (SDValue V = foldAddToAvg(N, DL))
3213 return V;
3214
3215 // fold (a+b) -> (a|b) iff a and b share no bits.
3216 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3217 DAG.haveNoCommonBitsSet(N0, N1))
3218 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3219
3220 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3221 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3222 const APInt &C0 = N0->getConstantOperandAPInt(0);
3223 const APInt &C1 = N1->getConstantOperandAPInt(0);
3224 return DAG.getVScale(DL, VT, C0 + C1);
3225 }
3226
3227 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3228 if (N0.getOpcode() == ISD::ADD &&
3229 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3230 N1.getOpcode() == ISD::VSCALE) {
3231 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3232 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3233 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3234 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3235 }
3236
3237 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3238 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3239 N1.getOpcode() == ISD::STEP_VECTOR) {
3240 const APInt &C0 = N0->getConstantOperandAPInt(0);
3241 const APInt &C1 = N1->getConstantOperandAPInt(0);
3242 APInt NewStep = C0 + C1;
3243 return DAG.getStepVector(DL, VT, NewStep);
3244 }
3245
3246 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3247 if (N0.getOpcode() == ISD::ADD &&
3249 N1.getOpcode() == ISD::STEP_VECTOR) {
3250 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3251 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3252 APInt NewStep = SV0 + SV1;
3253 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3254 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3255 }
3256
3257 return SDValue();
3258}
3259
3260SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3261 unsigned Opcode = N->getOpcode();
3262 SDValue N0 = N->getOperand(0);
3263 SDValue N1 = N->getOperand(1);
3264 EVT VT = N0.getValueType();
3265 bool IsSigned = Opcode == ISD::SADDSAT;
3266 SDLoc DL(N);
3267
3268 // fold (add_sat x, undef) -> -1
3269 if (N0.isUndef() || N1.isUndef())
3270 return DAG.getAllOnesConstant(DL, VT);
3271
3272 // fold (add_sat c1, c2) -> c3
3273 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3274 return C;
3275
3276 // canonicalize constant to RHS
3279 return DAG.getNode(Opcode, DL, VT, N1, N0);
3280
3281 // fold vector ops
3282 if (VT.isVector()) {
3283 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3284 return FoldedVOp;
3285
3286 // fold (add_sat x, 0) -> x, vector edition
3288 return N0;
3289 }
3290
3291 // fold (add_sat x, 0) -> x
3292 if (isNullConstant(N1))
3293 return N0;
3294
3295 // If it cannot overflow, transform into an add.
3296 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3297 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3298
3299 return SDValue();
3300}
3301
3303 bool ForceCarryReconstruction = false) {
3304 bool Masked = false;
3305
3306 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3307 while (true) {
3308 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3309 return V;
3310
3311 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3312 V = V.getOperand(0);
3313 continue;
3314 }
3315
3316 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3317 if (ForceCarryReconstruction)
3318 return V;
3319
3320 Masked = true;
3321 V = V.getOperand(0);
3322 continue;
3323 }
3324
3325 break;
3326 }
3327
3328 // If this is not a carry, return.
3329 if (V.getResNo() != 1)
3330 return SDValue();
3331
3332 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3333 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3334 return SDValue();
3335
3336 EVT VT = V->getValueType(0);
3337 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3338 return SDValue();
3339
3340 // If the result is masked, then no matter what kind of bool it is we can
3341 // return. If it isn't, then we need to make sure the bool type is either 0 or
3342 // 1 and not other values.
3343 if (Masked ||
3344 TLI.getBooleanContents(V.getValueType()) ==
3346 return V;
3347
3348 return SDValue();
3349}
3350
3351/// Given the operands of an add/sub operation, see if the 2nd operand is a
3352/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3353/// the opcode and bypass the mask operation.
3354static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3355 SelectionDAG &DAG, const SDLoc &DL) {
3356 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3357 N1 = N1.getOperand(0);
3358
3359 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3360 return SDValue();
3361
3362 EVT VT = N0.getValueType();
3363 SDValue N10 = N1.getOperand(0);
3364 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3365 N10 = N10.getOperand(0);
3366
3367 if (N10.getValueType() != VT)
3368 return SDValue();
3369
3370 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3371 return SDValue();
3372
3373 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3374 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3375 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3376}
3377
3378/// Helper for doing combines based on N0 and N1 being added to each other.
3379SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3380 SDNode *LocReference) {
3381 EVT VT = N0.getValueType();
3382 SDLoc DL(LocReference);
3383
3384 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3385 SDValue Y, N;
3386 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3387 return DAG.getNode(ISD::SUB, DL, VT, N0,
3388 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3389
3390 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3391 return V;
3392
3393 // Look for:
3394 // add (add x, 1), y
3395 // And if the target does not like this form then turn into:
3396 // sub y, (xor x, -1)
3397 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3398 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3399 // Limit this to after legalization if the add has wrap flags
3400 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3401 !N0->getFlags().hasNoSignedWrap()))) {
3402 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3403 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3404 }
3405
3406 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3407 // Hoist one-use subtraction by non-opaque constant:
3408 // (x - C) + y -> (x + y) - C
3409 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3410 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3411 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3412 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3413 }
3414 // Hoist one-use subtraction from non-opaque constant:
3415 // (C - x) + y -> (y - x) + C
3416 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3417 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3418 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3419 }
3420 }
3421
3422 // add (mul x, C), x -> mul x, C+1
3423 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3424 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3425 N0.hasOneUse()) {
3426 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3427 DAG.getConstant(1, DL, VT));
3428 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3429 }
3430
3431 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3432 // rather than 'add 0/-1' (the zext should get folded).
3433 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3434 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3435 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3437 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3438 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3439 }
3440
3441 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3442 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3443 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3444 if (TN->getVT() == MVT::i1) {
3445 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3446 DAG.getConstant(1, DL, VT));
3447 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3448 }
3449 }
3450
3451 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3452 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3453 N1.getResNo() == 0)
3454 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3455 N0, N1.getOperand(0), N1.getOperand(2));
3456
3457 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3459 if (SDValue Carry = getAsCarry(TLI, N1))
3460 return DAG.getNode(ISD::UADDO_CARRY, DL,
3461 DAG.getVTList(VT, Carry.getValueType()), N0,
3462 DAG.getConstant(0, DL, VT), Carry);
3463
3464 return SDValue();
3465}
3466
3467SDValue DAGCombiner::visitADDC(SDNode *N) {
3468 SDValue N0 = N->getOperand(0);
3469 SDValue N1 = N->getOperand(1);
3470 EVT VT = N0.getValueType();
3471 SDLoc DL(N);
3472
3473 // If the flag result is dead, turn this into an ADD.
3474 if (!N->hasAnyUseOfValue(1))
3475 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3476 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3477
3478 // canonicalize constant to RHS.
3479 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3480 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3481 if (N0C && !N1C)
3482 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3483
3484 // fold (addc x, 0) -> x + no carry out
3485 if (isNullConstant(N1))
3486 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3487 DL, MVT::Glue));
3488
3489 // If it cannot overflow, transform into an add.
3491 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3492 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3493
3494 return SDValue();
3495}
3496
3497/**
3498 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3499 * then the flip also occurs if computing the inverse is the same cost.
3500 * This function returns an empty SDValue in case it cannot flip the boolean
3501 * without increasing the cost of the computation. If you want to flip a boolean
3502 * no matter what, use DAG.getLogicalNOT.
3503 */
3505 const TargetLowering &TLI,
3506 bool Force) {
3507 if (Force && isa<ConstantSDNode>(V))
3508 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3509
3510 if (V.getOpcode() != ISD::XOR)
3511 return SDValue();
3512
3513 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3514 return V.getOperand(0);
3515 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3516 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3517 return SDValue();
3518}
3519
3520SDValue DAGCombiner::visitADDO(SDNode *N) {
3521 SDValue N0 = N->getOperand(0);
3522 SDValue N1 = N->getOperand(1);
3523 EVT VT = N0.getValueType();
3524 bool IsSigned = (ISD::SADDO == N->getOpcode());
3525
3526 EVT CarryVT = N->getValueType(1);
3527 SDLoc DL(N);
3528
3529 // If the flag result is dead, turn this into an ADD.
3530 if (!N->hasAnyUseOfValue(1))
3531 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3532 DAG.getUNDEF(CarryVT));
3533
3534 // canonicalize constant to RHS.
3537 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3538
3539 // fold (addo x, 0) -> x + no carry out
3540 if (isNullOrNullSplat(N1))
3541 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3542
3543 // If it cannot overflow, transform into an add.
3544 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3545 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3546 DAG.getConstant(0, DL, CarryVT));
3547
3548 if (IsSigned) {
3549 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3550 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3551 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3552 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3553 } else {
3554 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3555 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3556 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3557 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3558 return CombineTo(
3559 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3560 }
3561
3562 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3563 return Combined;
3564
3565 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3566 return Combined;
3567 }
3568
3569 return SDValue();
3570}
3571
3572SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3573 EVT VT = N0.getValueType();
3574 if (VT.isVector())
3575 return SDValue();
3576
3577 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3578 // If Y + 1 cannot overflow.
3579 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3580 SDValue Y = N1.getOperand(0);
3581 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3583 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3584 N1.getOperand(2));
3585 }
3586
3587 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3589 if (SDValue Carry = getAsCarry(TLI, N1))
3590 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3591 DAG.getConstant(0, SDLoc(N), VT), Carry);
3592
3593 return SDValue();
3594}
3595
3596SDValue DAGCombiner::visitADDE(SDNode *N) {
3597 SDValue N0 = N->getOperand(0);
3598 SDValue N1 = N->getOperand(1);
3599 SDValue CarryIn = N->getOperand(2);
3600
3601 // canonicalize constant to RHS
3602 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3603 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3604 if (N0C && !N1C)
3605 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3606 N1, N0, CarryIn);
3607
3608 // fold (adde x, y, false) -> (addc x, y)
3609 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3610 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3611
3612 return SDValue();
3613}
3614
3615SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3616 SDValue N0 = N->getOperand(0);
3617 SDValue N1 = N->getOperand(1);
3618 SDValue CarryIn = N->getOperand(2);
3619 SDLoc DL(N);
3620
3621 // canonicalize constant to RHS
3622 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3623 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3624 if (N0C && !N1C)
3625 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3626
3627 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3628 if (isNullConstant(CarryIn)) {
3629 if (!LegalOperations ||
3630 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3631 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3632 }
3633
3634 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3635 if (isNullConstant(N0) && isNullConstant(N1)) {
3636 EVT VT = N0.getValueType();
3637 EVT CarryVT = CarryIn.getValueType();
3638 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3639 AddToWorklist(CarryExt.getNode());
3640 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3641 DAG.getConstant(1, DL, VT)),
3642 DAG.getConstant(0, DL, CarryVT));
3643 }
3644
3645 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3646 return Combined;
3647
3648 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3649 return Combined;
3650
3651 // We want to avoid useless duplication.
3652 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3653 // not a binary operation, this is not really possible to leverage this
3654 // existing mechanism for it. However, if more operations require the same
3655 // deduplication logic, then it may be worth generalize.
3656 SDValue Ops[] = {N1, N0, CarryIn};
3657 SDNode *CSENode =
3658 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3659 if (CSENode)
3660 return SDValue(CSENode, 0);
3661
3662 return SDValue();
3663}
3664
3665/**
3666 * If we are facing some sort of diamond carry propagation pattern try to
3667 * break it up to generate something like:
3668 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3669 *
3670 * The end result is usually an increase in operation required, but because the
3671 * carry is now linearized, other transforms can kick in and optimize the DAG.
3672 *
3673 * Patterns typically look something like
3674 * (uaddo A, B)
3675 * / \
3676 * Carry Sum
3677 * | \
3678 * | (uaddo_carry *, 0, Z)
3679 * | /
3680 * \ Carry
3681 * | /
3682 * (uaddo_carry X, *, *)
3683 *
3684 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3685 * produce a combine with a single path for carry propagation.
3686 */
3688 SelectionDAG &DAG, SDValue X,
3689 SDValue Carry0, SDValue Carry1,
3690 SDNode *N) {
3691 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3692 return SDValue();
3693 if (Carry1.getOpcode() != ISD::UADDO)
3694 return SDValue();
3695
3696 SDValue Z;
3697
3698 /**
3699 * First look for a suitable Z. It will present itself in the form of
3700 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3701 */
3702 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3703 isNullConstant(Carry0.getOperand(1))) {
3704 Z = Carry0.getOperand(2);
3705 } else if (Carry0.getOpcode() == ISD::UADDO &&
3706 isOneConstant(Carry0.getOperand(1))) {
3707 EVT VT = Carry0->getValueType(1);
3708 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3709 } else {
3710 // We couldn't find a suitable Z.
3711 return SDValue();
3712 }
3713
3714
3715 auto cancelDiamond = [&](SDValue A,SDValue B) {
3716 SDLoc DL(N);
3717 SDValue NewY =
3718 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3719 Combiner.AddToWorklist(NewY.getNode());
3720 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3721 DAG.getConstant(0, DL, X.getValueType()),
3722 NewY.getValue(1));
3723 };
3724
3725 /**
3726 * (uaddo A, B)
3727 * |
3728 * Sum
3729 * |
3730 * (uaddo_carry *, 0, Z)
3731 */
3732 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3733 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3734 }
3735
3736 /**
3737 * (uaddo_carry A, 0, Z)
3738 * |
3739 * Sum
3740 * |
3741 * (uaddo *, B)
3742 */
3743 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3744 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3745 }
3746
3747 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3748 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3749 }
3750
3751 return SDValue();
3752}
3753
3754// If we are facing some sort of diamond carry/borrow in/out pattern try to
3755// match patterns like:
3756//
3757// (uaddo A, B) CarryIn
3758// | \ |
3759// | \ |
3760// PartialSum PartialCarryOutX /
3761// | | /
3762// | ____|____________/
3763// | / |
3764// (uaddo *, *) \________
3765// | \ \
3766// | \ |
3767// | PartialCarryOutY |
3768// | \ |
3769// | \ /
3770// AddCarrySum | ______/
3771// | /
3772// CarryOut = (or *, *)
3773//
3774// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3775//
3776// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3777//
3778// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3779// with a single path for carry/borrow out propagation.
3781 SDValue N0, SDValue N1, SDNode *N) {
3782 SDValue Carry0 = getAsCarry(TLI, N0);
3783 if (!Carry0)
3784 return SDValue();
3785 SDValue Carry1 = getAsCarry(TLI, N1);
3786 if (!Carry1)
3787 return SDValue();
3788
3789 unsigned Opcode = Carry0.getOpcode();
3790 if (Opcode != Carry1.getOpcode())
3791 return SDValue();
3792 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3793 return SDValue();
3794 // Guarantee identical type of CarryOut
3795 EVT CarryOutType = N->getValueType(0);
3796 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3797 CarryOutType != Carry1.getValue(1).getValueType())
3798 return SDValue();
3799
3800 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3801 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3802 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3803 std::swap(Carry0, Carry1);
3804
3805 // Check if nodes are connected in expected way.
3806 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3807 Carry1.getOperand(1) != Carry0.getValue(0))
3808 return SDValue();
3809
3810 // The carry in value must be on the righthand side for subtraction.
3811 unsigned CarryInOperandNum =
3812 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3813 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3814 return SDValue();
3815 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3816
3817 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3818 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3819 return SDValue();
3820
3821 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3822 CarryIn = getAsCarry(TLI, CarryIn, true);
3823 if (!CarryIn)
3824 return SDValue();
3825
3826 SDLoc DL(N);
3827 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3828 Carry1->getValueType(0));
3829 SDValue Merged =
3830 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3831 Carry0.getOperand(1), CarryIn);
3832
3833 // Please note that because we have proven that the result of the UADDO/USUBO
3834 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3835 // therefore prove that if the first UADDO/USUBO overflows, the second
3836 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3837 // maximum value.
3838 //
3839 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3840 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3841 //
3842 // This is important because it means that OR and XOR can be used to merge
3843 // carry flags; and that AND can return a constant zero.
3844 //
3845 // TODO: match other operations that can merge flags (ADD, etc)
3846 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3847 if (N->getOpcode() == ISD::AND)
3848 return DAG.getConstant(0, DL, CarryOutType);
3849 return Merged.getValue(1);
3850}
3851
3852SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3853 SDValue CarryIn, SDNode *N) {
3854 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3855 // carry.
3856 if (isBitwiseNot(N0))
3857 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3858 SDLoc DL(N);
3859 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3860 N0.getOperand(0), NotC);
3861 return CombineTo(
3862 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3863 }
3864
3865 // Iff the flag result is dead:
3866 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3867 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3868 // or the dependency between the instructions.
3869 if ((N0.getOpcode() == ISD::ADD ||
3870 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3871 N0.getValue(1) != CarryIn)) &&
3872 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3873 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3874 N0.getOperand(0), N0.getOperand(1), CarryIn);
3875
3876 /**
3877 * When one of the uaddo_carry argument is itself a carry, we may be facing
3878 * a diamond carry propagation. In which case we try to transform the DAG
3879 * to ensure linear carry propagation if that is possible.
3880 */
3881 if (auto Y = getAsCarry(TLI, N1)) {
3882 // Because both are carries, Y and Z can be swapped.
3883 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3884 return R;
3885 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3886 return R;
3887 }
3888
3889 return SDValue();
3890}
3891
3892SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3893 SDValue CarryIn, SDNode *N) {
3894 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3895 if (isBitwiseNot(N0)) {
3896 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3897 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3898 N0.getOperand(0), NotC);
3899 }
3900
3901 return SDValue();
3902}
3903
3904SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3905 SDValue N0 = N->getOperand(0);
3906 SDValue N1 = N->getOperand(1);
3907 SDValue CarryIn = N->getOperand(2);
3908 SDLoc DL(N);
3909
3910 // canonicalize constant to RHS
3911 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3912 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3913 if (N0C && !N1C)
3914 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3915
3916 // fold (saddo_carry x, y, false) -> (saddo x, y)
3917 if (isNullConstant(CarryIn)) {
3918 if (!LegalOperations ||
3919 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3920 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3921 }
3922
3923 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3924 return Combined;
3925
3926 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3927 return Combined;
3928
3929 return SDValue();
3930}
3931
3932// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3933// clamp/truncation if necessary.
3935 SDValue RHS, SelectionDAG &DAG,
3936 const SDLoc &DL) {
3937 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3938 "Illegal truncation");
3939
3940 if (DstVT == SrcVT)
3941 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3942
3943 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3944 // clamping RHS.
3946 DstVT.getScalarSizeInBits());
3947 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3948 return SDValue();
3949
3950 SDValue SatLimit =
3952 DstVT.getScalarSizeInBits()),
3953 DL, SrcVT);
3954 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3955 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3956 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3957 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3958}
3959
3960// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3961// usubsat(a,b), optionally as a truncated type.
3962SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3963 if (N->getOpcode() != ISD::SUB ||
3964 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3965 return SDValue();
3966
3967 EVT SubVT = N->getValueType(0);
3968 SDValue Op0 = N->getOperand(0);
3969 SDValue Op1 = N->getOperand(1);
3970
3971 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3972 // they may be converted to usubsat(a,b).
3973 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3974 SDValue MaxLHS = Op0.getOperand(0);
3975 SDValue MaxRHS = Op0.getOperand(1);
3976 if (MaxLHS == Op1)
3977 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3978 if (MaxRHS == Op1)
3979 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3980 }
3981
3982 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3983 SDValue MinLHS = Op1.getOperand(0);
3984 SDValue MinRHS = Op1.getOperand(1);
3985 if (MinLHS == Op0)
3986 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3987 if (MinRHS == Op0)
3988 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3989 }
3990
3991 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3992 if (Op1.getOpcode() == ISD::TRUNCATE &&
3993 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3994 Op1.getOperand(0).hasOneUse()) {
3995 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3996 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3997 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3998 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3999 DAG, DL);
4000 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
4001 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
4002 DAG, DL);
4003 }
4004
4005 return SDValue();
4006}
4007
4008// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
4009// counting leading ones. Broadly, it replaces the substraction with a left
4010// shift.
4011//
4012// * DAG Legalisation Pattern:
4013//
4014// (sub (ctlz (zeroextend (not Src)))
4015// BitWidthDiff)
4016//
4017// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
4018// -->
4019//
4020// (ctlz_zero_undef (not (shl (anyextend Src)
4021// BitWidthDiff)))
4022//
4023// * Type Legalisation Pattern:
4024//
4025// (sub (ctlz (and (xor Src XorMask)
4026// AndMask))
4027// BitWidthDiff)
4028//
4029// if AndMask has only trailing ones
4030// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
4031// and XorMask has more trailing ones than AndMask
4032// -->
4033//
4034// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
4035template <class MatchContextClass>
4037 const SDLoc DL(N);
4038 SDValue N0 = N->getOperand(0);
4039 EVT VT = N0.getValueType();
4040 unsigned BitWidth = VT.getScalarSizeInBits();
4041
4042 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4043
4044 APInt AndMask;
4045 APInt XorMask;
4046 APInt BitWidthDiff;
4047
4048 SDValue CtlzOp;
4049 SDValue Src;
4050
4051 if (!sd_context_match(
4052 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
4053 return SDValue();
4054
4055 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
4056 // DAG Legalisation Pattern:
4057 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4058 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4059 return SDValue();
4060
4061 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
4062 } else if (sd_context_match(CtlzOp, Matcher,
4063 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
4064 m_ConstInt(AndMask)))) {
4065 // Type Legalisation Pattern:
4066 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4067 if (BitWidthDiff.getZExtValue() >= BitWidth)
4068 return SDValue();
4069 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
4070 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4071 return SDValue();
4072 } else
4073 return SDValue();
4074
4075 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4076 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4077 SDValue Not =
4078 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4079
4080 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4081}
4082
4083// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4085 const SDLoc &DL) {
4086 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4087 SDValue Sub0 = N->getOperand(0);
4088 SDValue Sub1 = N->getOperand(1);
4089
4090 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4091 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4092 DivRem.getOpcode() == ISD::UDIVREM) &&
4093 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4094 DivRem.getOperand(1) == MaybeY) {
4095 return SDValue(DivRem.getNode(), 1);
4096 }
4097 return SDValue();
4098 };
4099
4100 if (Sub1.getOpcode() == ISD::MUL) {
4101 // (sub x, (mul divrem(x,y)[0], y))
4102 SDValue Mul0 = Sub1.getOperand(0);
4103 SDValue Mul1 = Sub1.getOperand(1);
4104
4105 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4106 return Res;
4107
4108 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4109 return Res;
4110
4111 } else if (Sub1.getOpcode() == ISD::SHL) {
4112 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4113 SDValue Shl0 = Sub1.getOperand(0);
4114 SDValue Shl1 = Sub1.getOperand(1);
4115 // Check if Shl0 is divrem(x, Y)[0]
4116 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4117 Shl0.getOpcode() == ISD::UDIVREM) &&
4118 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4119
4120 SDValue Divisor = Shl0.getOperand(1);
4121
4122 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4124 if (!DivC || !ShC)
4125 return SDValue();
4126
4127 if (DivC->getAPIntValue().isPowerOf2() &&
4128 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4129 return SDValue(Shl0.getNode(), 1);
4130 }
4131 }
4132 return SDValue();
4133}
4134
4135// Since it may not be valid to emit a fold to zero for vector initializers
4136// check if we can before folding.
4137static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4138 SelectionDAG &DAG, bool LegalOperations) {
4139 if (!VT.isVector())
4140 return DAG.getConstant(0, DL, VT);
4141 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4142 return DAG.getConstant(0, DL, VT);
4143 return SDValue();
4144}
4145
4146SDValue DAGCombiner::visitSUB(SDNode *N) {
4147 SDValue N0 = N->getOperand(0);
4148 SDValue N1 = N->getOperand(1);
4149 EVT VT = N0.getValueType();
4150 unsigned BitWidth = VT.getScalarSizeInBits();
4151 SDLoc DL(N);
4152
4154 return V;
4155
4156 // fold (sub x, x) -> 0
4157 if (N0 == N1)
4158 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4159
4160 // fold (sub c1, c2) -> c3
4161 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4162 return C;
4163
4164 // fold vector ops
4165 if (VT.isVector()) {
4166 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4167 return FoldedVOp;
4168
4169 // fold (sub x, 0) -> x, vector edition
4171 return N0;
4172 }
4173
4174 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4175 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4176 if (N1.hasOneUse() && hasUMin(VT)) {
4177 SDValue Y;
4178 auto MS0 = m_Specific(N0);
4179 auto MVY = m_Value(Y);
4180 auto MZ = m_Zero();
4181 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4182 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4183
4184 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4185 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4186 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4187 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4188
4189 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4190 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4191 }
4192
4193 if (SDValue NewSel = foldBinOpIntoSelect(N))
4194 return NewSel;
4195
4196 // fold (sub x, c) -> (add x, -c)
4197 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4198 return DAG.getNode(ISD::ADD, DL, VT, N0,
4199 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4200
4201 if (isNullOrNullSplat(N0)) {
4202 // Right-shifting everything out but the sign bit followed by negation is
4203 // the same as flipping arithmetic/logical shift type without the negation:
4204 // -(X >>u 31) -> (X >>s 31)
4205 // -(X >>s 31) -> (X >>u 31)
4206 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4207 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4208 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4209 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4210 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4211 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4212 }
4213 }
4214
4215 // 0 - X --> 0 if the sub is NUW.
4216 if (N->getFlags().hasNoUnsignedWrap())
4217 return N0;
4218
4220 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4221 // N1 must be 0 because negating the minimum signed value is undefined.
4222 if (N->getFlags().hasNoSignedWrap())
4223 return N0;
4224
4225 // 0 - X --> X if X is 0 or the minimum signed value.
4226 return N1;
4227 }
4228
4229 // Convert 0 - abs(x).
4230 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4232 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4233 return Result;
4234
4235 // Similar to the previous rule, but this time targeting an expanded abs.
4236 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4237 // as well as
4238 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4239 // Note that these two are applicable to both signed and unsigned min/max.
4240 SDValue X;
4241 SDValue S0;
4242 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4243 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4244 m_UMax(m_Value(X), NegPat),
4245 m_SMin(m_Value(X), NegPat),
4246 m_UMin(m_Value(X), NegPat))))) {
4247 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4248 if (hasOperation(NewOpc, VT))
4249 return DAG.getNode(NewOpc, DL, VT, X, S0);
4250 }
4251
4252 // Fold neg(splat(neg(x)) -> splat(x)
4253 if (VT.isVector()) {
4254 SDValue N1S = DAG.getSplatValue(N1, true);
4255 if (N1S && N1S.getOpcode() == ISD::SUB &&
4256 isNullConstant(N1S.getOperand(0)))
4257 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4258 }
4259
4260 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4261 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4262 isOneOrOneSplat(N1->getOperand(1))) {
4263 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4264 if (VT.isVector())
4265 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4269 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4270 DAG.getValueType(ExtVT));
4271 }
4272 }
4273 }
4274
4275 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4277 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4278
4279 // fold (A - (0-B)) -> A+B
4280 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4281 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4282
4283 // fold A-(A-B) -> B
4284 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4285 return N1.getOperand(1);
4286
4287 // fold (A+B)-A -> B
4288 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4289 return N0.getOperand(1);
4290
4291 // fold (A+B)-B -> A
4292 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4293 return N0.getOperand(0);
4294
4295 // fold (A+C1)-C2 -> A+(C1-C2)
4296 if (N0.getOpcode() == ISD::ADD) {
4297 SDValue N01 = N0.getOperand(1);
4298 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4299 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4300 }
4301
4302 // fold C2-(A+C1) -> (C2-C1)-A
4303 if (N1.getOpcode() == ISD::ADD) {
4304 SDValue N11 = N1.getOperand(1);
4305 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4306 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4307 }
4308
4309 // fold (A-C1)-C2 -> A-(C1+C2)
4310 if (N0.getOpcode() == ISD::SUB) {
4311 SDValue N01 = N0.getOperand(1);
4312 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4313 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4314 }
4315
4316 // fold (c1-A)-c2 -> (c1-c2)-A
4317 if (N0.getOpcode() == ISD::SUB) {
4318 SDValue N00 = N0.getOperand(0);
4319 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4320 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4321 }
4322
4323 SDValue A, B, C;
4324
4325 // fold ((A+(B+C))-B) -> A+C
4326 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4327 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4328
4329 // fold ((A+(B-C))-B) -> A-C
4330 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4331 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4332
4333 // fold ((A-(B-C))-C) -> A-B
4334 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4335 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4336
4337 // fold (A-(B-C)) -> A+(C-B)
4338 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4339 return DAG.getNode(ISD::ADD, DL, VT, N0,
4340 DAG.getNode(ISD::SUB, DL, VT, C, B));
4341
4342 // A - (A & B) -> A & (~B)
4343 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4344 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4345 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4346
4347 // fold (A - (-B * C)) -> (A + (B * C))
4348 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4349 return DAG.getNode(ISD::ADD, DL, VT, N0,
4350 DAG.getNode(ISD::MUL, DL, VT, B, C));
4351
4352 // If either operand of a sub is undef, the result is undef
4353 if (N0.isUndef())
4354 return N0;
4355 if (N1.isUndef())
4356 return N1;
4357
4358 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4359 return V;
4360
4361 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4362 return V;
4363
4364 // Try to match AVGCEIL fixedwidth pattern
4365 if (SDValue V = foldSubToAvg(N, DL))
4366 return V;
4367
4368 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4369 return V;
4370
4371 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4372 return V;
4373
4374 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4375 return V;
4376
4377 // (A - B) - 1 -> add (xor B, -1), A
4379 m_One(/*AllowUndefs=*/true))))
4380 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4381
4382 // Look for:
4383 // sub y, (xor x, -1)
4384 // And if the target does not like this form then turn into:
4385 // add (add x, y), 1
4386 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4387 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4388 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4389 }
4390
4391 // Hoist one-use addition by non-opaque constant:
4392 // (x + C) - y -> (x - y) + C
4393 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4394 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4395 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4396 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4397 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4398 }
4399 // y - (x + C) -> (y - x) - C
4400 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4401 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4402 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4403 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4404 }
4405 // (x - C) - y -> (x - y) - C
4406 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4407 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4408 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4409 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4410 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4411 }
4412 // (C - x) - y -> C - (x + y)
4413 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4414 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4415 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4416 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4417 }
4418
4419 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4420 // rather than 'sub 0/1' (the sext should get folded).
4421 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4422 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4423 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4424 TLI.getBooleanContents(VT) ==
4426 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4427 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4428 }
4429
4430 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4431 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4433 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4434 return DAG.getNode(ISD::ABS, DL, VT, A);
4435
4436 // If the relocation model supports it, consider symbol offsets.
4437 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4438 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4439 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4440 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4441 if (GA->getGlobal() == GB->getGlobal())
4442 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4443 DL, VT);
4444 }
4445
4446 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4447 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4448 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4449 if (TN->getVT() == MVT::i1) {
4450 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4451 DAG.getConstant(1, DL, VT));
4452 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4453 }
4454 }
4455
4456 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4457 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4458 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4459 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4460 }
4461
4462 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4463 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4464 APInt NewStep = -N1.getConstantOperandAPInt(0);
4465 return DAG.getNode(ISD::ADD, DL, VT, N0,
4466 DAG.getStepVector(DL, VT, NewStep));
4467 }
4468
4469 // Prefer an add for more folding potential and possibly better codegen:
4470 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4471 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4472 SDValue ShAmt = N1.getOperand(1);
4473 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4474 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4475 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4476 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4477 }
4478 }
4479
4480 // As with the previous fold, prefer add for more folding potential.
4481 // Subtracting SMIN/0 is the same as adding SMIN/0:
4482 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4483 if (N1.getOpcode() == ISD::SHL) {
4484 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4485 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4486 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4487 }
4488
4489 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4490 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4491 N0.getResNo() == 0 && N0.hasOneUse())
4492 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4493 N0.getOperand(0), N1, N0.getOperand(2));
4494
4496 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4497 if (SDValue Carry = getAsCarry(TLI, N0)) {
4498 SDValue X = N1;
4499 SDValue Zero = DAG.getConstant(0, DL, VT);
4500 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4501 return DAG.getNode(ISD::UADDO_CARRY, DL,
4502 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4503 Carry);
4504 }
4505 }
4506
4507 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4508 // sub C0, X --> xor X, C0
4509 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4510 if (!C0->isOpaque()) {
4511 const APInt &C0Val = C0->getAPIntValue();
4512 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4513 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4514 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4515 }
4516 }
4517
4518 // smax(a,b) - smin(a,b) --> abds(a,b)
4519 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4520 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4522 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4523
4524 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4525 if (hasOperation(ISD::ABDS, VT) &&
4526 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4528 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4529
4530 // umax(a,b) - umin(a,b) --> abdu(a,b)
4531 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4532 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4534 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4535
4536 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4537 if (hasOperation(ISD::ABDU, VT) &&
4538 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4540 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4541
4542 return SDValue();
4543}
4544
4545SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4546 unsigned Opcode = N->getOpcode();
4547 SDValue N0 = N->getOperand(0);
4548 SDValue N1 = N->getOperand(1);
4549 EVT VT = N0.getValueType();
4550 bool IsSigned = Opcode == ISD::SSUBSAT;
4551 SDLoc DL(N);
4552
4553 // fold (sub_sat x, undef) -> 0
4554 if (N0.isUndef() || N1.isUndef())
4555 return DAG.getConstant(0, DL, VT);
4556
4557 // fold (sub_sat x, x) -> 0
4558 if (N0 == N1)
4559 return DAG.getConstant(0, DL, VT);
4560
4561 // fold (sub_sat c1, c2) -> c3
4562 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4563 return C;
4564
4565 // fold vector ops
4566 if (VT.isVector()) {
4567 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4568 return FoldedVOp;
4569
4570 // fold (sub_sat x, 0) -> x, vector edition
4572 return N0;
4573 }
4574
4575 // fold (sub_sat x, 0) -> x
4576 if (isNullConstant(N1))
4577 return N0;
4578
4579 // If it cannot overflow, transform into an sub.
4580 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4581 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4582
4583 return SDValue();
4584}
4585
4586SDValue DAGCombiner::visitSUBC(SDNode *N) {
4587 SDValue N0 = N->getOperand(0);
4588 SDValue N1 = N->getOperand(1);
4589 EVT VT = N0.getValueType();
4590 SDLoc DL(N);
4591
4592 // If the flag result is dead, turn this into an SUB.
4593 if (!N->hasAnyUseOfValue(1))
4594 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4595 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4596
4597 // fold (subc x, x) -> 0 + no borrow
4598 if (N0 == N1)
4599 return CombineTo(N, DAG.getConstant(0, DL, VT),
4600 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4601
4602 // fold (subc x, 0) -> x + no borrow
4603 if (isNullConstant(N1))
4604 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4605
4606 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4607 if (isAllOnesConstant(N0))
4608 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4609 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4610
4611 return SDValue();
4612}
4613
4614SDValue DAGCombiner::visitSUBO(SDNode *N) {
4615 SDValue N0 = N->getOperand(0);
4616 SDValue N1 = N->getOperand(1);
4617 EVT VT = N0.getValueType();
4618 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4619
4620 EVT CarryVT = N->getValueType(1);
4621 SDLoc DL(N);
4622
4623 // If the flag result is dead, turn this into an SUB.
4624 if (!N->hasAnyUseOfValue(1))
4625 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4626 DAG.getUNDEF(CarryVT));
4627
4628 // fold (subo x, x) -> 0 + no borrow
4629 if (N0 == N1)
4630 return CombineTo(N, DAG.getConstant(0, DL, VT),
4631 DAG.getConstant(0, DL, CarryVT));
4632
4633 // fold (subox, c) -> (addo x, -c)
4634 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4635 if (IsSigned && !N1C->isMinSignedValue())
4636 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4637 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4638
4639 // fold (subo x, 0) -> x + no borrow
4640 if (isNullOrNullSplat(N1))
4641 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4642
4643 // If it cannot overflow, transform into an sub.
4644 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4645 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4646 DAG.getConstant(0, DL, CarryVT));
4647
4648 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4649 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4650 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4651 DAG.getConstant(0, DL, CarryVT));
4652
4653 return SDValue();
4654}
4655
4656SDValue DAGCombiner::visitSUBE(SDNode *N) {
4657 SDValue N0 = N->getOperand(0);
4658 SDValue N1 = N->getOperand(1);
4659 SDValue CarryIn = N->getOperand(2);
4660
4661 // fold (sube x, y, false) -> (subc x, y)
4662 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4663 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4664
4665 return SDValue();
4666}
4667
4668SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4669 SDValue N0 = N->getOperand(0);
4670 SDValue N1 = N->getOperand(1);
4671 SDValue CarryIn = N->getOperand(2);
4672
4673 // fold (usubo_carry x, y, false) -> (usubo x, y)
4674 if (isNullConstant(CarryIn)) {
4675 if (!LegalOperations ||
4676 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4677 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4678 }
4679
4680 return SDValue();
4681}
4682
4683SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4684 SDValue N0 = N->getOperand(0);
4685 SDValue N1 = N->getOperand(1);
4686 SDValue CarryIn = N->getOperand(2);
4687
4688 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4689 if (isNullConstant(CarryIn)) {
4690 if (!LegalOperations ||
4691 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4692 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4693 }
4694
4695 return SDValue();
4696}
4697
4698// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4699// UMULFIXSAT here.
4700SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4701 SDValue N0 = N->getOperand(0);
4702 SDValue N1 = N->getOperand(1);
4703 SDValue Scale = N->getOperand(2);
4704 EVT VT = N0.getValueType();
4705
4706 // fold (mulfix x, undef, scale) -> 0
4707 if (N0.isUndef() || N1.isUndef())
4708 return DAG.getConstant(0, SDLoc(N), VT);
4709
4710 // Canonicalize constant to RHS (vector doesn't have to splat)
4713 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4714
4715 // fold (mulfix x, 0, scale) -> 0
4716 if (isNullConstant(N1))
4717 return DAG.getConstant(0, SDLoc(N), VT);
4718
4719 return SDValue();
4720}
4721
4722template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4723 SDValue N0 = N->getOperand(0);
4724 SDValue N1 = N->getOperand(1);
4725 EVT VT = N0.getValueType();
4726 unsigned BitWidth = VT.getScalarSizeInBits();
4727 SDLoc DL(N);
4728 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4729 MatchContextClass Matcher(DAG, TLI, N);
4730
4731 // fold (mul x, undef) -> 0
4732 if (N0.isUndef() || N1.isUndef())
4733 return DAG.getConstant(0, DL, VT);
4734
4735 // fold (mul c1, c2) -> c1*c2
4736 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4737 return C;
4738
4739 // canonicalize constant to RHS (vector doesn't have to splat)
4742 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4743
4744 bool N1IsConst = false;
4745 bool N1IsOpaqueConst = false;
4746 APInt ConstValue1;
4747
4748 // fold vector ops
4749 if (VT.isVector()) {
4750 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4751 if (!UseVP)
4752 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4753 return FoldedVOp;
4754
4755 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4756 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4757 "Splat APInt should be element width");
4758 } else {
4759 N1IsConst = isa<ConstantSDNode>(N1);
4760 if (N1IsConst) {
4761 ConstValue1 = N1->getAsAPIntVal();
4762 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4763 }
4764 }
4765
4766 // fold (mul x, 0) -> 0
4767 if (N1IsConst && ConstValue1.isZero())
4768 return N1;
4769
4770 // fold (mul x, 1) -> x
4771 if (N1IsConst && ConstValue1.isOne())
4772 return N0;
4773
4774 if (!UseVP)
4775 if (SDValue NewSel = foldBinOpIntoSelect(N))
4776 return NewSel;
4777
4778 // fold (mul x, -1) -> 0-x
4779 if (N1IsConst && ConstValue1.isAllOnes())
4780 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4781
4782 // fold (mul x, (1 << c)) -> x << c
4783 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4784 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4785 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4786 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4787 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4788 SDNodeFlags Flags;
4789 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4790 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4791 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4792 }
4793 }
4794
4795 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4796 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4797 unsigned Log2Val = (-ConstValue1).logBase2();
4798
4799 // FIXME: If the input is something that is easily negated (e.g. a
4800 // single-use add), we should put the negate there.
4801 return Matcher.getNode(
4802 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4803 Matcher.getNode(ISD::SHL, DL, VT, N0,
4804 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4805 }
4806
4807 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4808 // hi result is in use in case we hit this mid-legalization.
4809 if (!UseVP) {
4810 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4811 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4812 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4813 // TODO: Can we match commutable operands with getNodeIfExists?
4814 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4815 if (LoHi->hasAnyUseOfValue(1))
4816 return SDValue(LoHi, 0);
4817 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4818 if (LoHi->hasAnyUseOfValue(1))
4819 return SDValue(LoHi, 0);
4820 }
4821 }
4822 }
4823
4824 // Try to transform:
4825 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4826 // mul x, (2^N + 1) --> add (shl x, N), x
4827 // mul x, (2^N - 1) --> sub (shl x, N), x
4828 // Examples: x * 33 --> (x << 5) + x
4829 // x * 15 --> (x << 4) - x
4830 // x * -33 --> -((x << 5) + x)
4831 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4832 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4833 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4834 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4835 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4836 // x * 0xf800 --> (x << 16) - (x << 11)
4837 // x * -0x8800 --> -((x << 15) + (x << 11))
4838 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4839 if (!UseVP && N1IsConst &&
4840 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4841 // TODO: We could handle more general decomposition of any constant by
4842 // having the target set a limit on number of ops and making a
4843 // callback to determine that sequence (similar to sqrt expansion).
4844 unsigned MathOp = ISD::DELETED_NODE;
4845 APInt MulC = ConstValue1.abs();
4846 // The constant `2` should be treated as (2^0 + 1).
4847 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4848 MulC.lshrInPlace(TZeros);
4849 if ((MulC - 1).isPowerOf2())
4850 MathOp = ISD::ADD;
4851 else if ((MulC + 1).isPowerOf2())
4852 MathOp = ISD::SUB;
4853
4854 if (MathOp != ISD::DELETED_NODE) {
4855 unsigned ShAmt =
4856 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4857 ShAmt += TZeros;
4858 assert(ShAmt < BitWidth &&
4859 "multiply-by-constant generated out of bounds shift");
4860 SDValue Shl =
4861 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4862 SDValue R =
4863 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4864 DAG.getNode(ISD::SHL, DL, VT, N0,
4865 DAG.getConstant(TZeros, DL, VT)))
4866 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4867 if (ConstValue1.isNegative())
4868 R = DAG.getNegative(R, DL, VT);
4869 return R;
4870 }
4871 }
4872
4873 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4874 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4875 SDValue N01 = N0.getOperand(1);
4876 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4877 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4878 }
4879
4880 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4881 // use.
4882 {
4883 SDValue Sh, Y;
4884
4885 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4886 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4888 Sh = N0; Y = N1;
4889 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4891 Sh = N1; Y = N0;
4892 }
4893
4894 if (Sh.getNode()) {
4895 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4896 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4897 }
4898 }
4899
4900 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4901 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4905 return Matcher.getNode(
4906 ISD::ADD, DL, VT,
4907 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4908 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4909
4910 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4911 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4912 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4913 const APInt &C0 = N0.getConstantOperandAPInt(0);
4914 const APInt &C1 = NC1->getAPIntValue();
4915 return DAG.getVScale(DL, VT, C0 * C1);
4916 }
4917
4918 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4919 APInt MulVal;
4920 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4921 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4922 const APInt &C0 = N0.getConstantOperandAPInt(0);
4923 APInt NewStep = C0 * MulVal;
4924 return DAG.getStepVector(DL, VT, NewStep);
4925 }
4926
4927 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4928 SDValue X;
4929 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4931 N, Matcher,
4933 m_Deferred(X)))) {
4934 return Matcher.getNode(ISD::ABS, DL, VT, X);
4935 }
4936
4937 // Fold ((mul x, 0/undef) -> 0,
4938 // (mul x, 1) -> x) -> x)
4939 // -> and(x, mask)
4940 // We can replace vectors with '0' and '1' factors with a clearing mask.
4941 if (VT.isFixedLengthVector()) {
4942 unsigned NumElts = VT.getVectorNumElements();
4943 SmallBitVector ClearMask;
4944 ClearMask.reserve(NumElts);
4945 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4946 if (!V || V->isZero()) {
4947 ClearMask.push_back(true);
4948 return true;
4949 }
4950 ClearMask.push_back(false);
4951 return V->isOne();
4952 };
4953 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4954 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4955 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4956 EVT LegalSVT = N1.getOperand(0).getValueType();
4957 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4958 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4960 for (unsigned I = 0; I != NumElts; ++I)
4961 if (ClearMask[I])
4962 Mask[I] = Zero;
4963 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4964 }
4965 }
4966
4967 // reassociate mul
4968 // TODO: Change reassociateOps to support vp ops.
4969 if (!UseVP)
4970 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4971 return RMUL;
4972
4973 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4974 // TODO: Change reassociateReduction to support vp ops.
4975 if (!UseVP)
4976 if (SDValue SD =
4977 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4978 return SD;
4979
4980 // Simplify the operands using demanded-bits information.
4982 return SDValue(N, 0);
4983
4984 return SDValue();
4985}
4986
4987/// Return true if divmod libcall is available.
4989 const TargetLowering &TLI) {
4990 RTLIB::Libcall LC;
4991 EVT NodeType = Node->getValueType(0);
4992 if (!NodeType.isSimple())
4993 return false;
4994 switch (NodeType.getSimpleVT().SimpleTy) {
4995 default: return false; // No libcall for vector types.
4996 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4997 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4998 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4999 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
5000 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
5001 }
5002
5003 return TLI.getLibcallImpl(LC) != RTLIB::Unsupported;
5004}
5005
5006/// Issue divrem if both quotient and remainder are needed.
5007SDValue DAGCombiner::useDivRem(SDNode *Node) {
5008 if (Node->use_empty())
5009 return SDValue(); // This is a dead node, leave it alone.
5010
5011 unsigned Opcode = Node->getOpcode();
5012 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
5013 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
5014
5015 // DivMod lib calls can still work on non-legal types if using lib-calls.
5016 EVT VT = Node->getValueType(0);
5017 if (VT.isVector() || !VT.isInteger())
5018 return SDValue();
5019
5020 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
5021 return SDValue();
5022
5023 // If DIVREM is going to get expanded into a libcall,
5024 // but there is no libcall available, then don't combine.
5025 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
5027 return SDValue();
5028
5029 // If div is legal, it's better to do the normal expansion
5030 unsigned OtherOpcode = 0;
5031 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
5032 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
5033 if (TLI.isOperationLegalOrCustom(Opcode, VT))
5034 return SDValue();
5035 } else {
5036 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5037 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
5038 return SDValue();
5039 }
5040
5041 SDValue Op0 = Node->getOperand(0);
5042 SDValue Op1 = Node->getOperand(1);
5043 SDValue combined;
5044 for (SDNode *User : Op0->users()) {
5045 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
5046 User->use_empty())
5047 continue;
5048 // Convert the other matching node(s), too;
5049 // otherwise, the DIVREM may get target-legalized into something
5050 // target-specific that we won't be able to recognize.
5051 unsigned UserOpc = User->getOpcode();
5052 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
5053 User->getOperand(0) == Op0 &&
5054 User->getOperand(1) == Op1) {
5055 if (!combined) {
5056 if (UserOpc == OtherOpcode) {
5057 SDVTList VTs = DAG.getVTList(VT, VT);
5058 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
5059 } else if (UserOpc == DivRemOpc) {
5060 combined = SDValue(User, 0);
5061 } else {
5062 assert(UserOpc == Opcode);
5063 continue;
5064 }
5065 }
5066 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
5067 CombineTo(User, combined);
5068 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
5069 CombineTo(User, combined.getValue(1));
5070 }
5071 }
5072 return combined;
5073}
5074
5076 SDValue N0 = N->getOperand(0);
5077 SDValue N1 = N->getOperand(1);
5078 EVT VT = N->getValueType(0);
5079 SDLoc DL(N);
5080
5081 unsigned Opc = N->getOpcode();
5082 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5083
5084 // X / undef -> undef
5085 // X % undef -> undef
5086 // X / 0 -> undef
5087 // X % 0 -> undef
5088 // NOTE: This includes vectors where any divisor element is zero/undef.
5089 if (DAG.isUndef(Opc, {N0, N1}))
5090 return DAG.getUNDEF(VT);
5091
5092 // undef / X -> 0
5093 // undef % X -> 0
5094 if (N0.isUndef())
5095 return DAG.getConstant(0, DL, VT);
5096
5097 // 0 / X -> 0
5098 // 0 % X -> 0
5100 if (N0C && N0C->isZero())
5101 return N0;
5102
5103 // X / X -> 1
5104 // X % X -> 0
5105 if (N0 == N1)
5106 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5107
5108 // X / 1 -> X
5109 // X % 1 -> 0
5110 // If this is a boolean op (single-bit element type), we can't have
5111 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5112 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5113 // it's a 1.
5114 if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
5115 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5116
5117 return SDValue();
5118}
5119
5120SDValue DAGCombiner::visitSDIV(SDNode *N) {
5121 SDValue N0 = N->getOperand(0);
5122 SDValue N1 = N->getOperand(1);
5123 EVT VT = N->getValueType(0);
5124 EVT CCVT = getSetCCResultType(VT);
5125 SDLoc DL(N);
5126
5127 // fold (sdiv c1, c2) -> c1/c2
5128 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5129 return C;
5130
5131 // fold vector ops
5132 if (VT.isVector())
5133 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5134 return FoldedVOp;
5135
5136 // fold (sdiv X, -1) -> 0-X
5137 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5138 if (N1C && N1C->isAllOnes())
5139 return DAG.getNegative(N0, DL, VT);
5140
5141 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5142 if (N1C && N1C->isMinSignedValue())
5143 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5144 DAG.getConstant(1, DL, VT),
5145 DAG.getConstant(0, DL, VT));
5146
5147 if (SDValue V = simplifyDivRem(N, DAG))
5148 return V;
5149
5150 if (SDValue NewSel = foldBinOpIntoSelect(N))
5151 return NewSel;
5152
5153 // If we know the sign bits of both operands are zero, strength reduce to a
5154 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5155 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5156 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5157
5158 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5159 // If the corresponding remainder node exists, update its users with
5160 // (Dividend - (Quotient * Divisor).
5161 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5162 { N0, N1 })) {
5163 // If the sdiv has the exact flag we shouldn't propagate it to the
5164 // remainder node.
5165 if (!N->getFlags().hasExact()) {
5166 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5167 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5168 AddToWorklist(Mul.getNode());
5169 AddToWorklist(Sub.getNode());
5170 CombineTo(RemNode, Sub);
5171 }
5172 }
5173 return V;
5174 }
5175
5176 // sdiv, srem -> sdivrem
5177 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5178 // true. Otherwise, we break the simplification logic in visitREM().
5179 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5180 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5181 if (SDValue DivRem = useDivRem(N))
5182 return DivRem;
5183
5184 return SDValue();
5185}
5186
5187static bool isDivisorPowerOfTwo(SDValue Divisor) {
5188 // Helper for determining whether a value is a power-2 constant scalar or a
5189 // vector of such elements.
5190 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5191 if (C->isZero() || C->isOpaque())
5192 return false;
5193 if (C->getAPIntValue().isPowerOf2())
5194 return true;
5195 if (C->getAPIntValue().isNegatedPowerOf2())
5196 return true;
5197 return false;
5198 };
5199
5200 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
5201 /*AllowTruncation=*/true);
5202}
5203
5204SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5205 SDLoc DL(N);
5206 EVT VT = N->getValueType(0);
5207 EVT CCVT = getSetCCResultType(VT);
5208 unsigned BitWidth = VT.getScalarSizeInBits();
5209
5210 // fold (sdiv X, pow2) -> simple ops after legalize
5211 // FIXME: We check for the exact bit here because the generic lowering gives
5212 // better results in that case. The target-specific lowering should learn how
5213 // to handle exact sdivs efficiently.
5214 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5215 // Target-specific implementation of sdiv x, pow2.
5216 if (SDValue Res = BuildSDIVPow2(N))
5217 return Res;
5218
5219 // Create constants that are functions of the shift amount value.
5220 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5221 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5222 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5223 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5224 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5225 if (!isConstantOrConstantVector(Inexact))
5226 return SDValue();
5227
5228 // Splat the sign bit into the register
5229 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5230 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5231 AddToWorklist(Sign.getNode());
5232
5233 // Add (N0 < 0) ? abs2 - 1 : 0;
5234 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5235 AddToWorklist(Srl.getNode());
5236 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5237 AddToWorklist(Add.getNode());
5238 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5239 AddToWorklist(Sra.getNode());
5240
5241 // Special case: (sdiv X, 1) -> X
5242 // Special Case: (sdiv X, -1) -> 0-X
5243 SDValue One = DAG.getConstant(1, DL, VT);
5245 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5246 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5247 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5248 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5249
5250 // If dividing by a positive value, we're done. Otherwise, the result must
5251 // be negated.
5252 SDValue Zero = DAG.getConstant(0, DL, VT);
5253 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5254
5255 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5256 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5257 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5258 return Res;
5259 }
5260
5261 // If integer divide is expensive and we satisfy the requirements, emit an
5262 // alternate sequence. Targets may check function attributes for size/speed
5263 // trade-offs.
5264 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5265 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5266 /*AllowTruncation=*/true) &&
5267 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5268 if (SDValue Op = BuildSDIV(N))
5269 return Op;
5270
5271 return SDValue();
5272}
5273
5274SDValue DAGCombiner::visitUDIV(SDNode *N) {
5275 SDValue N0 = N->getOperand(0);
5276 SDValue N1 = N->getOperand(1);
5277 EVT VT = N->getValueType(0);
5278 EVT CCVT = getSetCCResultType(VT);
5279 SDLoc DL(N);
5280
5281 // fold (udiv c1, c2) -> c1/c2
5282 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5283 return C;
5284
5285 // fold vector ops
5286 if (VT.isVector())
5287 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5288 return FoldedVOp;
5289
5290 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5291 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5292 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5293 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5294 DAG.getConstant(1, DL, VT),
5295 DAG.getConstant(0, DL, VT));
5296 }
5297
5298 if (SDValue V = simplifyDivRem(N, DAG))
5299 return V;
5300
5301 if (SDValue NewSel = foldBinOpIntoSelect(N))
5302 return NewSel;
5303
5304 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5305 // If the corresponding remainder node exists, update its users with
5306 // (Dividend - (Quotient * Divisor).
5307 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5308 { N0, N1 })) {
5309 // If the udiv has the exact flag we shouldn't propagate it to the
5310 // remainder node.
5311 if (!N->getFlags().hasExact()) {
5312 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5313 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5314 AddToWorklist(Mul.getNode());
5315 AddToWorklist(Sub.getNode());
5316 CombineTo(RemNode, Sub);
5317 }
5318 }
5319 return V;
5320 }
5321
5322 // sdiv, srem -> sdivrem
5323 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5324 // true. Otherwise, we break the simplification logic in visitREM().
5325 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5326 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5327 if (SDValue DivRem = useDivRem(N))
5328 return DivRem;
5329
5330 // Simplify the operands using demanded-bits information.
5331 // We don't have demanded bits support for UDIV so this just enables constant
5332 // folding based on known bits.
5334 return SDValue(N, 0);
5335
5336 return SDValue();
5337}
5338
5339SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5340 SDLoc DL(N);
5341 EVT VT = N->getValueType(0);
5342
5343 // fold (udiv x, (1 << c)) -> x >>u c
5344 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5345 /*AllowTruncation=*/true)) {
5346 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5347 AddToWorklist(LogBase2.getNode());
5348
5349 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5350 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5351 AddToWorklist(Trunc.getNode());
5352 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5353 }
5354 }
5355
5356 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5357 if (N1.getOpcode() == ISD::SHL) {
5358 SDValue N10 = N1.getOperand(0);
5359 if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
5360 /*AllowTruncation=*/true)) {
5361 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5362 AddToWorklist(LogBase2.getNode());
5363
5364 EVT ADDVT = N1.getOperand(1).getValueType();
5365 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5366 AddToWorklist(Trunc.getNode());
5367 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5368 AddToWorklist(Add.getNode());
5369 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5370 }
5371 }
5372 }
5373
5374 // fold (udiv x, c) -> alternate
5375 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5376 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5377 /*AllowTruncation=*/true) &&
5378 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5379 if (SDValue Op = BuildUDIV(N))
5380 return Op;
5381
5382 return SDValue();
5383}
5384
5385SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5386 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5387 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5388 // Target-specific implementation of srem x, pow2.
5389 if (SDValue Res = BuildSREMPow2(N))
5390 return Res;
5391 }
5392 return SDValue();
5393}
5394
5395// handles ISD::SREM and ISD::UREM
5396SDValue DAGCombiner::visitREM(SDNode *N) {
5397 unsigned Opcode = N->getOpcode();
5398 SDValue N0 = N->getOperand(0);
5399 SDValue N1 = N->getOperand(1);
5400 EVT VT = N->getValueType(0);
5401 EVT CCVT = getSetCCResultType(VT);
5402
5403 bool isSigned = (Opcode == ISD::SREM);
5404 SDLoc DL(N);
5405
5406 // fold (rem c1, c2) -> c1%c2
5407 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5408 return C;
5409
5410 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5411 // Freeze the numerator to avoid a miscompile with an undefined value.
5412 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5413 CCVT.isVector() == VT.isVector()) {
5414 SDValue F0 = DAG.getFreeze(N0);
5415 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5416 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5417 }
5418
5419 if (SDValue V = simplifyDivRem(N, DAG))
5420 return V;
5421
5422 if (SDValue NewSel = foldBinOpIntoSelect(N))
5423 return NewSel;
5424
5425 if (isSigned) {
5426 // If we know the sign bits of both operands are zero, strength reduce to a
5427 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5428 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5429 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5430 } else {
5431 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5432 // fold (urem x, pow2) -> (and x, pow2-1)
5433 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5434 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5435 AddToWorklist(Add.getNode());
5436 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5437 }
5438 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5439 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5440 // TODO: We should sink the following into isKnownToBePowerOfTwo
5441 // using a OrZero parameter analogous to our handling in ValueTracking.
5442 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5444 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5445 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5446 AddToWorklist(Add.getNode());
5447 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5448 }
5449 }
5450
5451 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5452
5453 // If X/C can be simplified by the division-by-constant logic, lower
5454 // X%C to the equivalent of X-X/C*C.
5455 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5456 // speculative DIV must not cause a DIVREM conversion. We guard against this
5457 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5458 // combine will not return a DIVREM. Regardless, checking cheapness here
5459 // makes sense since the simplification results in fatter code.
5460 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5461 if (isSigned) {
5462 // check if we can build faster implementation for srem
5463 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5464 return OptimizedRem;
5465 }
5466
5467 SDValue OptimizedDiv =
5468 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5469 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5470 // If the equivalent Div node also exists, update its users.
5471 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5472 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5473 { N0, N1 }))
5474 CombineTo(DivNode, OptimizedDiv);
5475 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5476 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5477 AddToWorklist(OptimizedDiv.getNode());
5478 AddToWorklist(Mul.getNode());
5479 return Sub;
5480 }
5481 }
5482
5483 // sdiv, srem -> sdivrem
5484 if (SDValue DivRem = useDivRem(N))
5485 return DivRem.getValue(1);
5486
5487 // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5488 // iff urem(BCst, Op1Cst) == 0
5489 SDValue A;
5490 APInt Op1Cst, BCst;
5491 if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
5492 m_ConstInt(Op1Cst))) &&
5493 BCst.urem(Op1Cst).isZero()) {
5494 return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5495 }
5496
5497 // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5498 // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5499 if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
5500 m_ConstInt(Op1Cst))) &&
5501 BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5502 return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5503 }
5504
5505 return SDValue();
5506}
5507
5508SDValue DAGCombiner::visitMULHS(SDNode *N) {
5509 SDValue N0 = N->getOperand(0);
5510 SDValue N1 = N->getOperand(1);
5511 EVT VT = N->getValueType(0);
5512 SDLoc DL(N);
5513
5514 // fold (mulhs c1, c2)
5515 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5516 return C;
5517
5518 // canonicalize constant to RHS.
5521 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5522
5523 if (VT.isVector()) {
5524 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5525 return FoldedVOp;
5526
5527 // fold (mulhs x, 0) -> 0
5528 // do not return N1, because undef node may exist.
5530 return DAG.getConstant(0, DL, VT);
5531 }
5532
5533 // fold (mulhs x, 0) -> 0
5534 if (isNullConstant(N1))
5535 return N1;
5536
5537 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5538 if (isOneConstant(N1))
5539 return DAG.getNode(
5540 ISD::SRA, DL, VT, N0,
5542
5543 // fold (mulhs x, undef) -> 0
5544 if (N0.isUndef() || N1.isUndef())
5545 return DAG.getConstant(0, DL, VT);
5546
5547 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5548 // plus a shift.
5549 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5550 !VT.isVector()) {
5551 MVT Simple = VT.getSimpleVT();
5552 unsigned SimpleSize = Simple.getSizeInBits();
5553 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5554 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5555 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5556 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5557 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5558 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5559 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5560 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5561 }
5562 }
5563
5564 return SDValue();
5565}
5566
5567SDValue DAGCombiner::visitMULHU(SDNode *N) {
5568 SDValue N0 = N->getOperand(0);
5569 SDValue N1 = N->getOperand(1);
5570 EVT VT = N->getValueType(0);
5571 SDLoc DL(N);
5572
5573 // fold (mulhu c1, c2)
5574 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5575 return C;
5576
5577 // canonicalize constant to RHS.
5580 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5581
5582 if (VT.isVector()) {
5583 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5584 return FoldedVOp;
5585
5586 // fold (mulhu x, 0) -> 0
5587 // do not return N1, because undef node may exist.
5589 return DAG.getConstant(0, DL, VT);
5590 }
5591
5592 // fold (mulhu x, 0) -> 0
5593 if (isNullConstant(N1))
5594 return N1;
5595
5596 // fold (mulhu x, 1) -> 0
5597 if (isOneConstant(N1))
5598 return DAG.getConstant(0, DL, VT);
5599
5600 // fold (mulhu x, undef) -> 0
5601 if (N0.isUndef() || N1.isUndef())
5602 return DAG.getConstant(0, DL, VT);
5603
5604 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5605 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5606 /*AllowTruncation=*/true) &&
5607 hasOperation(ISD::SRL, VT)) {
5608 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5609 unsigned NumEltBits = VT.getScalarSizeInBits();
5610 SDValue SRLAmt = DAG.getNode(
5611 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5612 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5613 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5614 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5615 }
5616 }
5617
5618 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5619 // plus a shift.
5620 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5621 !VT.isVector()) {
5622 MVT Simple = VT.getSimpleVT();
5623 unsigned SimpleSize = Simple.getSizeInBits();
5624 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5625 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5626 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5627 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5628 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5629 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5630 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5631 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5632 }
5633 }
5634
5635 // Simplify the operands using demanded-bits information.
5636 // We don't have demanded bits support for MULHU so this just enables constant
5637 // folding based on known bits.
5639 return SDValue(N, 0);
5640
5641 return SDValue();
5642}
5643
5644SDValue DAGCombiner::visitAVG(SDNode *N) {
5645 unsigned Opcode = N->getOpcode();
5646 SDValue N0 = N->getOperand(0);
5647 SDValue N1 = N->getOperand(1);
5648 EVT VT = N->getValueType(0);
5649 SDLoc DL(N);
5650 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5651
5652 // fold (avg c1, c2)
5653 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5654 return C;
5655
5656 // canonicalize constant to RHS.
5659 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5660
5661 if (VT.isVector())
5662 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5663 return FoldedVOp;
5664
5665 // fold (avg x, undef) -> x
5666 if (N0.isUndef())
5667 return N1;
5668 if (N1.isUndef())
5669 return N0;
5670
5671 // fold (avg x, x) --> x
5672 if (N0 == N1 && Level >= AfterLegalizeTypes)
5673 return N0;
5674
5675 // fold (avgfloor x, 0) -> x >> 1
5676 SDValue X, Y;
5678 return DAG.getNode(ISD::SRA, DL, VT, X,
5679 DAG.getShiftAmountConstant(1, VT, DL));
5681 return DAG.getNode(ISD::SRL, DL, VT, X,
5682 DAG.getShiftAmountConstant(1, VT, DL));
5683
5684 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5685 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5686 if (!IsSigned &&
5687 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5688 X.getValueType() == Y.getValueType() &&
5689 hasOperation(Opcode, X.getValueType())) {
5690 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5691 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5692 }
5693 if (IsSigned &&
5694 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5695 X.getValueType() == Y.getValueType() &&
5696 hasOperation(Opcode, X.getValueType())) {
5697 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5698 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5699 }
5700
5701 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5702 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5703 // Check if avgflooru isn't legal/custom but avgceilu is.
5704 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5705 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5706 if (DAG.isKnownNeverZero(N1))
5707 return DAG.getNode(
5708 ISD::AVGCEILU, DL, VT, N0,
5709 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5710 if (DAG.isKnownNeverZero(N0))
5711 return DAG.getNode(
5712 ISD::AVGCEILU, DL, VT, N1,
5713 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5714 }
5715
5716 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5717 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5718 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5719 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5720 SDValue Add;
5721 if (sd_match(N,
5722 m_c_BinOp(Opcode,
5724 m_One())) ||
5725 sd_match(N, m_c_BinOp(Opcode,
5727 m_Value(Y)))) {
5728
5729 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5730 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5731
5732 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5733 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5734 }
5735 }
5736
5737 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5738 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5739 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5740 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5741 }
5742
5743 return SDValue();
5744}
5745
5746SDValue DAGCombiner::visitABD(SDNode *N) {
5747 unsigned Opcode = N->getOpcode();
5748 SDValue N0 = N->getOperand(0);
5749 SDValue N1 = N->getOperand(1);
5750 EVT VT = N->getValueType(0);
5751 SDLoc DL(N);
5752
5753 // fold (abd c1, c2)
5754 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5755 return C;
5756
5757 // canonicalize constant to RHS.
5760 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5761
5762 if (VT.isVector())
5763 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5764 return FoldedVOp;
5765
5766 // fold (abd x, undef) -> 0
5767 if (N0.isUndef() || N1.isUndef())
5768 return DAG.getConstant(0, DL, VT);
5769
5770 // fold (abd x, x) -> 0
5771 if (N0 == N1)
5772 return DAG.getConstant(0, DL, VT);
5773
5774 SDValue X;
5775
5776 // fold (abds x, 0) -> abs x
5778 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5779 return DAG.getNode(ISD::ABS, DL, VT, X);
5780
5781 // fold (abdu x, 0) -> x
5783 return X;
5784
5785 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5786 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5787 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5788 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5789
5790 return SDValue();
5791}
5792
5793/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5794/// give the opcodes for the two computations that are being performed. Return
5795/// true if a simplification was made.
5796SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5797 unsigned HiOp) {
5798 // If the high half is not needed, just compute the low half.
5799 bool HiExists = N->hasAnyUseOfValue(1);
5800 if (!HiExists && (!LegalOperations ||
5801 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5802 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5803 return CombineTo(N, Res, Res);
5804 }
5805
5806 // If the low half is not needed, just compute the high half.
5807 bool LoExists = N->hasAnyUseOfValue(0);
5808 if (!LoExists && (!LegalOperations ||
5809 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5810 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5811 return CombineTo(N, Res, Res);
5812 }
5813
5814 // If both halves are used, return as it is.
5815 if (LoExists && HiExists)
5816 return SDValue();
5817
5818 // If the two computed results can be simplified separately, separate them.
5819 if (LoExists) {
5820 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5821 AddToWorklist(Lo.getNode());
5822 SDValue LoOpt = combine(Lo.getNode());
5823 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5824 (!LegalOperations ||
5825 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5826 return CombineTo(N, LoOpt, LoOpt);
5827 }
5828
5829 if (HiExists) {
5830 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5831 AddToWorklist(Hi.getNode());
5832 SDValue HiOpt = combine(Hi.getNode());
5833 if (HiOpt.getNode() && HiOpt != Hi &&
5834 (!LegalOperations ||
5835 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5836 return CombineTo(N, HiOpt, HiOpt);
5837 }
5838
5839 return SDValue();
5840}
5841
5842SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5843 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5844 return Res;
5845
5846 SDValue N0 = N->getOperand(0);
5847 SDValue N1 = N->getOperand(1);
5848 EVT VT = N->getValueType(0);
5849 SDLoc DL(N);
5850
5851 // Constant fold.
5853 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5854
5855 // canonicalize constant to RHS (vector doesn't have to splat)
5858 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5859
5860 // If the type is twice as wide is legal, transform the mulhu to a wider
5861 // multiply plus a shift.
5862 if (VT.isSimple() && !VT.isVector()) {
5863 MVT Simple = VT.getSimpleVT();
5864 unsigned SimpleSize = Simple.getSizeInBits();
5865 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5866 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5867 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5868 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5869 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5870 // Compute the high part as N1.
5871 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5872 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5873 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5874 // Compute the low part as N0.
5875 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5876 return CombineTo(N, Lo, Hi);
5877 }
5878 }
5879
5880 return SDValue();
5881}
5882
5883SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5884 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5885 return Res;
5886
5887 SDValue N0 = N->getOperand(0);
5888 SDValue N1 = N->getOperand(1);
5889 EVT VT = N->getValueType(0);
5890 SDLoc DL(N);
5891
5892 // Constant fold.
5894 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5895
5896 // canonicalize constant to RHS (vector doesn't have to splat)
5899 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5900
5901 // (umul_lohi N0, 0) -> (0, 0)
5902 if (isNullConstant(N1)) {
5903 SDValue Zero = DAG.getConstant(0, DL, VT);
5904 return CombineTo(N, Zero, Zero);
5905 }
5906
5907 // (umul_lohi N0, 1) -> (N0, 0)
5908 if (isOneConstant(N1)) {
5909 SDValue Zero = DAG.getConstant(0, DL, VT);
5910 return CombineTo(N, N0, Zero);
5911 }
5912
5913 // If the type is twice as wide is legal, transform the mulhu to a wider
5914 // multiply plus a shift.
5915 if (VT.isSimple() && !VT.isVector()) {
5916 MVT Simple = VT.getSimpleVT();
5917 unsigned SimpleSize = Simple.getSizeInBits();
5918 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5919 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5920 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5921 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5922 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5923 // Compute the high part as N1.
5924 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5925 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5926 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5927 // Compute the low part as N0.
5928 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5929 return CombineTo(N, Lo, Hi);
5930 }
5931 }
5932
5933 return SDValue();
5934}
5935
5936SDValue DAGCombiner::visitMULO(SDNode *N) {
5937 SDValue N0 = N->getOperand(0);
5938 SDValue N1 = N->getOperand(1);
5939 EVT VT = N0.getValueType();
5940 bool IsSigned = (ISD::SMULO == N->getOpcode());
5941
5942 EVT CarryVT = N->getValueType(1);
5943 SDLoc DL(N);
5944
5945 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5946 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5947
5948 // fold operation with constant operands.
5949 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5950 // multiple results.
5951 if (N0C && N1C) {
5952 bool Overflow;
5953 APInt Result =
5954 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5955 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5956 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5957 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5958 }
5959
5960 // canonicalize constant to RHS.
5963 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5964
5965 // fold (mulo x, 0) -> 0 + no carry out
5966 if (isNullOrNullSplat(N1))
5967 return CombineTo(N, DAG.getConstant(0, DL, VT),
5968 DAG.getConstant(0, DL, CarryVT));
5969
5970 // (mulo x, 2) -> (addo x, x)
5971 // FIXME: This needs a freeze.
5972 if (N1C && N1C->getAPIntValue() == 2 &&
5973 (!IsSigned || VT.getScalarSizeInBits() > 2))
5974 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5975 N->getVTList(), N0, N0);
5976
5977 // A 1 bit SMULO overflows if both inputs are 1.
5978 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5979 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5980 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5981 DAG.getConstant(0, DL, VT), ISD::SETNE);
5982 return CombineTo(N, And, Cmp);
5983 }
5984
5985 // If it cannot overflow, transform into a mul.
5986 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5987 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5988 DAG.getConstant(0, DL, CarryVT));
5989 return SDValue();
5990}
5991
5992// Function to calculate whether the Min/Max pair of SDNodes (potentially
5993// swapped around) make a signed saturate pattern, clamping to between a signed
5994// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5995// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5996// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5997// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5999 SDValue N3, ISD::CondCode CC, unsigned &BW,
6000 bool &Unsigned, SelectionDAG &DAG) {
6001 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
6002 ISD::CondCode CC) {
6003 // The compare and select operand should be the same or the select operands
6004 // should be truncated versions of the comparison.
6005 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
6006 return 0;
6007 // The constants need to be the same or a truncated version of each other.
6010 if (!N1C || !N3C)
6011 return 0;
6012 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
6013 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
6014 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
6015 return 0;
6016 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
6017 };
6018
6019 // Check the initial value is a SMIN/SMAX equivalent.
6020 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
6021 if (!Opcode0)
6022 return SDValue();
6023
6024 // We could only need one range check, if the fptosi could never produce
6025 // the upper value.
6026 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
6027 if (isNullOrNullSplat(N3)) {
6028 EVT IntVT = N0.getValueType().getScalarType();
6029 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
6030 if (FPVT.isSimple()) {
6031 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
6032 const fltSemantics &Semantics = InputTy->getFltSemantics();
6033 uint32_t MinBitWidth =
6034 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
6035 if (IntVT.getSizeInBits() >= MinBitWidth) {
6036 Unsigned = true;
6037 BW = PowerOf2Ceil(MinBitWidth);
6038 return N0;
6039 }
6040 }
6041 }
6042 }
6043
6044 SDValue N00, N01, N02, N03;
6045 ISD::CondCode N0CC;
6046 switch (N0.getOpcode()) {
6047 case ISD::SMIN:
6048 case ISD::SMAX:
6049 N00 = N02 = N0.getOperand(0);
6050 N01 = N03 = N0.getOperand(1);
6051 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
6052 break;
6053 case ISD::SELECT_CC:
6054 N00 = N0.getOperand(0);
6055 N01 = N0.getOperand(1);
6056 N02 = N0.getOperand(2);
6057 N03 = N0.getOperand(3);
6058 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
6059 break;
6060 case ISD::SELECT:
6061 case ISD::VSELECT:
6062 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
6063 return SDValue();
6064 N00 = N0.getOperand(0).getOperand(0);
6065 N01 = N0.getOperand(0).getOperand(1);
6066 N02 = N0.getOperand(1);
6067 N03 = N0.getOperand(2);
6068 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
6069 break;
6070 default:
6071 return SDValue();
6072 }
6073
6074 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
6075 if (!Opcode1 || Opcode0 == Opcode1)
6076 return SDValue();
6077
6078 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
6079 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
6080 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
6081 return SDValue();
6082
6083 const APInt &MinC = MinCOp->getAPIntValue();
6084 const APInt &MaxC = MaxCOp->getAPIntValue();
6085 APInt MinCPlus1 = MinC + 1;
6086 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6087 BW = MinCPlus1.exactLogBase2() + 1;
6088 Unsigned = false;
6089 return N02;
6090 }
6091
6092 if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
6093 BW = MinCPlus1.exactLogBase2();
6094 Unsigned = true;
6095 return N02;
6096 }
6097
6098 return SDValue();
6099}
6100
6102 SDValue N3, ISD::CondCode CC,
6103 SelectionDAG &DAG) {
6104 unsigned BW;
6105 bool Unsigned;
6106 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6107 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6108 return SDValue();
6109 EVT FPVT = Fp.getOperand(0).getValueType();
6110 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6111 if (FPVT.isVector())
6112 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6113 FPVT.getVectorElementCount());
6114 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6115 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6116 return SDValue();
6117 SDLoc DL(Fp);
6118 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6119 DAG.getValueType(NewVT.getScalarType()));
6120 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6121}
6122
6124 SDValue N3, ISD::CondCode CC,
6125 SelectionDAG &DAG) {
6126 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6127 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6128 // be truncated versions of the setcc (N0/N1).
6129 if ((N0 != N2 &&
6130 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6131 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6132 return SDValue();
6135 if (!N1C || !N3C)
6136 return SDValue();
6137 const APInt &C1 = N1C->getAPIntValue();
6138 const APInt &C3 = N3C->getAPIntValue();
6139 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6140 C1 != C3.zext(C1.getBitWidth()))
6141 return SDValue();
6142
6143 unsigned BW = (C1 + 1).exactLogBase2();
6144 EVT FPVT = N0.getOperand(0).getValueType();
6145 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6146 if (FPVT.isVector())
6147 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6148 FPVT.getVectorElementCount());
6150 FPVT, NewVT))
6151 return SDValue();
6152
6153 SDValue Sat =
6154 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6155 DAG.getValueType(NewVT.getScalarType()));
6156 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6157}
6158
6159SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6160 SDValue N0 = N->getOperand(0);
6161 SDValue N1 = N->getOperand(1);
6162 EVT VT = N0.getValueType();
6163 unsigned Opcode = N->getOpcode();
6164 SDLoc DL(N);
6165
6166 // fold operation with constant operands.
6167 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6168 return C;
6169
6170 // If the operands are the same, this is a no-op.
6171 if (N0 == N1)
6172 return N0;
6173
6174 // Fold operation with vscale operands.
6175 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6176 uint64_t C0 = N0->getConstantOperandVal(0);
6177 uint64_t C1 = N1->getConstantOperandVal(0);
6178 if (Opcode == ISD::UMAX)
6179 return C0 > C1 ? N0 : N1;
6180 else if (Opcode == ISD::UMIN)
6181 return C0 > C1 ? N1 : N0;
6182 }
6183
6184 // canonicalize constant to RHS
6187 return DAG.getNode(Opcode, DL, VT, N1, N0);
6188
6189 // fold vector ops
6190 if (VT.isVector())
6191 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6192 return FoldedVOp;
6193
6194 // reassociate minmax
6195 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6196 return RMINMAX;
6197
6198 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6199 // Only do this if:
6200 // 1. The current op isn't legal and the flipped is.
6201 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6202 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6203 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6204 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6205 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6206 unsigned AltOpcode;
6207 switch (Opcode) {
6208 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6209 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6210 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6211 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6212 default: llvm_unreachable("Unknown MINMAX opcode");
6213 }
6214 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6215 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6216 }
6217
6218 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6220 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6221 return S;
6222 if (Opcode == ISD::UMIN)
6223 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6224 return S;
6225
6226 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6227 auto ReductionOpcode = [](unsigned Opcode) {
6228 switch (Opcode) {
6229 case ISD::SMIN:
6230 return ISD::VECREDUCE_SMIN;
6231 case ISD::SMAX:
6232 return ISD::VECREDUCE_SMAX;
6233 case ISD::UMIN:
6234 return ISD::VECREDUCE_UMIN;
6235 case ISD::UMAX:
6236 return ISD::VECREDUCE_UMAX;
6237 default:
6238 llvm_unreachable("Unexpected opcode");
6239 }
6240 };
6241 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6242 SDLoc(N), VT, N0, N1))
6243 return SD;
6244
6245 // Simplify the operands using demanded-bits information.
6247 return SDValue(N, 0);
6248
6249 return SDValue();
6250}
6251
6252/// If this is a bitwise logic instruction and both operands have the same
6253/// opcode, try to sink the other opcode after the logic instruction.
6254SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6255 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6256 EVT VT = N0.getValueType();
6257 unsigned LogicOpcode = N->getOpcode();
6258 unsigned HandOpcode = N0.getOpcode();
6259 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6260 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6261
6262 // Bail early if none of these transforms apply.
6263 if (N0.getNumOperands() == 0)
6264 return SDValue();
6265
6266 // FIXME: We should check number of uses of the operands to not increase
6267 // the instruction count for all transforms.
6268
6269 // Handle size-changing casts (or sign_extend_inreg).
6270 SDValue X = N0.getOperand(0);
6271 SDValue Y = N1.getOperand(0);
6272 EVT XVT = X.getValueType();
6273 SDLoc DL(N);
6274 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6275 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6276 N0.getOperand(1) == N1.getOperand(1))) {
6277 // If both operands have other uses, this transform would create extra
6278 // instructions without eliminating anything.
6279 if (!N0.hasOneUse() && !N1.hasOneUse())
6280 return SDValue();
6281 // We need matching integer source types.
6282 if (XVT != Y.getValueType())
6283 return SDValue();
6284 // Don't create an illegal op during or after legalization. Don't ever
6285 // create an unsupported vector op.
6286 if ((VT.isVector() || LegalOperations) &&
6287 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6288 return SDValue();
6289 // Avoid infinite looping with PromoteIntBinOp.
6290 // TODO: Should we apply desirable/legal constraints to all opcodes?
6291 if ((HandOpcode == ISD::ANY_EXTEND ||
6292 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6293 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6294 return SDValue();
6295 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6296 SDNodeFlags LogicFlags;
6297 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6298 ISD::isExtOpcode(HandOpcode));
6299 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6300 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6301 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6302 return DAG.getNode(HandOpcode, DL, VT, Logic);
6303 }
6304
6305 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6306 if (HandOpcode == ISD::TRUNCATE) {
6307 // If both operands have other uses, this transform would create extra
6308 // instructions without eliminating anything.
6309 if (!N0.hasOneUse() && !N1.hasOneUse())
6310 return SDValue();
6311 // We need matching source types.
6312 if (XVT != Y.getValueType())
6313 return SDValue();
6314 // Don't create an illegal op during or after legalization.
6315 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6316 return SDValue();
6317 // Be extra careful sinking truncate. If it's free, there's no benefit in
6318 // widening a binop. Also, don't create a logic op on an illegal type.
6319 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6320 return SDValue();
6321 if (!TLI.isTypeLegal(XVT))
6322 return SDValue();
6323 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6324 return DAG.getNode(HandOpcode, DL, VT, Logic);
6325 }
6326
6327 // For binops SHL/SRL/SRA/AND:
6328 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6329 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6330 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6331 N0.getOperand(1) == N1.getOperand(1)) {
6332 // If either operand has other uses, this transform is not an improvement.
6333 if (!N0.hasOneUse() || !N1.hasOneUse())
6334 return SDValue();
6335 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6336 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6337 }
6338
6339 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6340 if (HandOpcode == ISD::BSWAP) {
6341 // If either operand has other uses, this transform is not an improvement.
6342 if (!N0.hasOneUse() || !N1.hasOneUse())
6343 return SDValue();
6344 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6345 return DAG.getNode(HandOpcode, DL, VT, Logic);
6346 }
6347
6348 // For funnel shifts FSHL/FSHR:
6349 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6350 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6351 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6352 N0.getOperand(2) == N1.getOperand(2)) {
6353 if (!N0.hasOneUse() || !N1.hasOneUse())
6354 return SDValue();
6355 SDValue X1 = N0.getOperand(1);
6356 SDValue Y1 = N1.getOperand(1);
6357 SDValue S = N0.getOperand(2);
6358 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6359 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6360 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6361 }
6362
6363 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6364 // Only perform this optimization up until type legalization, before
6365 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6366 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6367 // we don't want to undo this promotion.
6368 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6369 // on scalars.
6370 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6371 Level <= AfterLegalizeTypes) {
6372 // Input types must be integer and the same.
6373 if (XVT.isInteger() && XVT == Y.getValueType() &&
6374 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6375 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6376 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6377 return DAG.getNode(HandOpcode, DL, VT, Logic);
6378 }
6379 }
6380
6381 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6382 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6383 // If both shuffles use the same mask, and both shuffle within a single
6384 // vector, then it is worthwhile to move the swizzle after the operation.
6385 // The type-legalizer generates this pattern when loading illegal
6386 // vector types from memory. In many cases this allows additional shuffle
6387 // optimizations.
6388 // There are other cases where moving the shuffle after the xor/and/or
6389 // is profitable even if shuffles don't perform a swizzle.
6390 // If both shuffles use the same mask, and both shuffles have the same first
6391 // or second operand, then it might still be profitable to move the shuffle
6392 // after the xor/and/or operation.
6393 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6394 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6395 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6396 assert(X.getValueType() == Y.getValueType() &&
6397 "Inputs to shuffles are not the same type");
6398
6399 // Check that both shuffles use the same mask. The masks are known to be of
6400 // the same length because the result vector type is the same.
6401 // Check also that shuffles have only one use to avoid introducing extra
6402 // instructions.
6403 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6404 !SVN0->getMask().equals(SVN1->getMask()))
6405 return SDValue();
6406
6407 // Don't try to fold this node if it requires introducing a
6408 // build vector of all zeros that might be illegal at this stage.
6409 SDValue ShOp = N0.getOperand(1);
6410 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6411 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6412
6413 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6414 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6415 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6416 N0.getOperand(0), N1.getOperand(0));
6417 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6418 }
6419
6420 // Don't try to fold this node if it requires introducing a
6421 // build vector of all zeros that might be illegal at this stage.
6422 ShOp = N0.getOperand(0);
6423 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6424 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6425
6426 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6427 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6428 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6429 N1.getOperand(1));
6430 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6431 }
6432 }
6433
6434 return SDValue();
6435}
6436
6437/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6438SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6439 const SDLoc &DL) {
6440 SDValue LL, LR, RL, RR, N0CC, N1CC;
6441 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6442 !isSetCCEquivalent(N1, RL, RR, N1CC))
6443 return SDValue();
6444
6445 assert(N0.getValueType() == N1.getValueType() &&
6446 "Unexpected operand types for bitwise logic op");
6447 assert(LL.getValueType() == LR.getValueType() &&
6448 RL.getValueType() == RR.getValueType() &&
6449 "Unexpected operand types for setcc");
6450
6451 // If we're here post-legalization or the logic op type is not i1, the logic
6452 // op type must match a setcc result type. Also, all folds require new
6453 // operations on the left and right operands, so those types must match.
6454 EVT VT = N0.getValueType();
6455 EVT OpVT = LL.getValueType();
6456 if (LegalOperations || VT.getScalarType() != MVT::i1)
6457 if (VT != getSetCCResultType(OpVT))
6458 return SDValue();
6459 if (OpVT != RL.getValueType())
6460 return SDValue();
6461
6462 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6463 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6464 bool IsInteger = OpVT.isInteger();
6465 if (LR == RR && CC0 == CC1 && IsInteger) {
6466 bool IsZero = isNullOrNullSplat(LR);
6467 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6468
6469 // All bits clear?
6470 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6471 // All sign bits clear?
6472 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6473 // Any bits set?
6474 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6475 // Any sign bits set?
6476 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6477
6478 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6479 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6480 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6481 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6482 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6483 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6484 AddToWorklist(Or.getNode());
6485 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6486 }
6487
6488 // All bits set?
6489 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6490 // All sign bits set?
6491 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6492 // Any bits clear?
6493 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6494 // Any sign bits clear?
6495 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6496
6497 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6498 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6499 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6500 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6501 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6502 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6503 AddToWorklist(And.getNode());
6504 return DAG.getSetCC(DL, VT, And, LR, CC1);
6505 }
6506 }
6507
6508 // TODO: What is the 'or' equivalent of this fold?
6509 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6510 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6511 IsInteger && CC0 == ISD::SETNE &&
6512 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6513 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6514 SDValue One = DAG.getConstant(1, DL, OpVT);
6515 SDValue Two = DAG.getConstant(2, DL, OpVT);
6516 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6517 AddToWorklist(Add.getNode());
6518 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6519 }
6520
6521 // Try more general transforms if the predicates match and the only user of
6522 // the compares is the 'and' or 'or'.
6523 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6524 N0.hasOneUse() && N1.hasOneUse()) {
6525 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6526 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6527 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6528 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6529 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6530 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6531 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6532 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6533 }
6534
6535 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6536 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6537 // Match a shared variable operand and 2 non-opaque constant operands.
6538 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6539 // The difference of the constants must be a single bit.
6540 const APInt &CMax =
6541 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6542 const APInt &CMin =
6543 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6544 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6545 };
6546 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6547 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6548 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6549 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6550 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6551 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6552 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6553 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6554 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6555 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6556 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6557 }
6558 }
6559 }
6560
6561 // Canonicalize equivalent operands to LL == RL.
6562 if (LL == RR && LR == RL) {
6564 std::swap(RL, RR);
6565 }
6566
6567 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6568 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6569 if (LL == RL && LR == RR) {
6570 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6571 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6572 if (NewCC != ISD::SETCC_INVALID &&
6573 (!LegalOperations ||
6574 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6575 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6576 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6577 }
6578
6579 return SDValue();
6580}
6581
6582static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6583 SelectionDAG &DAG) {
6584 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6585}
6586
6587static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6588 SelectionDAG &DAG) {
6589 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6590}
6591
6592// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6593static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6594 ISD::CondCode CC, unsigned OrAndOpcode,
6595 SelectionDAG &DAG,
6596 bool isFMAXNUMFMINNUM_IEEE,
6597 bool isFMAXNUMFMINNUM) {
6598 // The optimization cannot be applied for all the predicates because
6599 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6600 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6601 // applied at all if one of the operands is a signaling NaN.
6602
6603 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6604 // are non NaN values.
6605 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6606 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6607 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6608 isFMAXNUMFMINNUM_IEEE
6611 }
6612
6613 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6614 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6615 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6616 isFMAXNUMFMINNUM_IEEE
6619 }
6620
6621 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6622 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6623 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6624 // that there are not any sNaNs, then the optimization is not valid
6625 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6626 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6627 // we can prove that we do not have any sNaNs, then we can do the
6628 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6629 // cases.
6630 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6631 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6632 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6633 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6634 isFMAXNUMFMINNUM_IEEE
6637 }
6638
6639 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6640 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6641 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6642 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6643 isFMAXNUMFMINNUM_IEEE
6646 }
6647
6648 return ISD::DELETED_NODE;
6649}
6650
6653 assert(
6654 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6655 "Invalid Op to combine SETCC with");
6656
6657 // TODO: Search past casts/truncates.
6658 SDValue LHS = LogicOp->getOperand(0);
6659 SDValue RHS = LogicOp->getOperand(1);
6660 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6661 !LHS->hasOneUse() || !RHS->hasOneUse())
6662 return SDValue();
6663
6664 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6666 LogicOp, LHS.getNode(), RHS.getNode());
6667
6668 SDValue LHS0 = LHS->getOperand(0);
6669 SDValue RHS0 = RHS->getOperand(0);
6670 SDValue LHS1 = LHS->getOperand(1);
6671 SDValue RHS1 = RHS->getOperand(1);
6672 // TODO: We don't actually need a splat here, for vectors we just need the
6673 // invariants to hold for each element.
6674 auto *LHS1C = isConstOrConstSplat(LHS1);
6675 auto *RHS1C = isConstOrConstSplat(RHS1);
6676 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6677 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6678 EVT VT = LogicOp->getValueType(0);
6679 EVT OpVT = LHS0.getValueType();
6680 SDLoc DL(LogicOp);
6681
6682 // Check if the operands of an and/or operation are comparisons and if they
6683 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6684 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6685 // sequence will be replaced with min-cmp sequence:
6686 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6687 // and and-cmp-cmp will be replaced with max-cmp sequence:
6688 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6689 // The optimization does not work for `==` or `!=` .
6690 // The two comparisons should have either the same predicate or the
6691 // predicate of one of the comparisons is the opposite of the other one.
6692 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6694 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6696 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6697 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6698 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6699 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6700 (OpVT.isFloatingPoint() &&
6701 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6703 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6704 CCL != ISD::SETTRUE &&
6705 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6706
6707 SDValue CommonValue, Operand1, Operand2;
6709 if (CCL == CCR) {
6710 if (LHS0 == RHS0) {
6711 CommonValue = LHS0;
6712 Operand1 = LHS1;
6713 Operand2 = RHS1;
6715 } else if (LHS1 == RHS1) {
6716 CommonValue = LHS1;
6717 Operand1 = LHS0;
6718 Operand2 = RHS0;
6719 CC = CCL;
6720 }
6721 } else {
6722 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6723 if (LHS0 == RHS1) {
6724 CommonValue = LHS0;
6725 Operand1 = LHS1;
6726 Operand2 = RHS0;
6727 CC = CCR;
6728 } else if (RHS0 == LHS1) {
6729 CommonValue = LHS1;
6730 Operand1 = LHS0;
6731 Operand2 = RHS1;
6732 CC = CCL;
6733 }
6734 }
6735
6736 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6737 // handle it using OR/AND.
6738 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6739 CC = ISD::SETCC_INVALID;
6740 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6741 CC = ISD::SETCC_INVALID;
6742
6743 if (CC != ISD::SETCC_INVALID) {
6744 unsigned NewOpcode = ISD::DELETED_NODE;
6745 bool IsSigned = isSignedIntSetCC(CC);
6746 if (OpVT.isInteger()) {
6747 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6748 CC == ISD::SETLT || CC == ISD::SETULT);
6749 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6750 if (IsLess == IsOr)
6751 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6752 else
6753 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6754 } else if (OpVT.isFloatingPoint())
6755 NewOpcode =
6756 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6757 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6758
6759 if (NewOpcode != ISD::DELETED_NODE) {
6760 SDValue MinMaxValue =
6761 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6762 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6763 }
6764 }
6765 }
6766
6767 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6768 LHS0.getValueType() == RHS0.getValueType() &&
6769 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6770 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6771 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6772
6773 if (TargetPreference == AndOrSETCCFoldKind::None)
6774 return SDValue();
6775
6776 if (CCL == CCR &&
6777 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6778 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6779 const APInt &APLhs = LHS1C->getAPIntValue();
6780 const APInt &APRhs = RHS1C->getAPIntValue();
6781
6782 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6783 // case this is just a compare).
6784 if (APLhs == (-APRhs) &&
6785 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6786 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6787 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6788 // (icmp eq A, C) | (icmp eq A, -C)
6789 // -> (icmp eq Abs(A), C)
6790 // (icmp ne A, C) & (icmp ne A, -C)
6791 // -> (icmp ne Abs(A), C)
6792 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6793 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6794 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6795 } else if (TargetPreference &
6797
6798 // AndOrSETCCFoldKind::AddAnd:
6799 // A == C0 | A == C1
6800 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6801 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6802 // A != C0 & A != C1
6803 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6804 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6805
6806 // AndOrSETCCFoldKind::NotAnd:
6807 // A == C0 | A == C1
6808 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6809 // -> ~A & smin(C0, C1) == 0
6810 // A != C0 & A != C1
6811 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6812 // -> ~A & smin(C0, C1) != 0
6813
6814 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6815 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6816 APInt Dif = MaxC - MinC;
6817 if (!Dif.isZero() && Dif.isPowerOf2()) {
6818 if (MaxC.isAllOnes() &&
6819 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6820 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6821 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6822 DAG.getConstant(MinC, DL, OpVT));
6823 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6824 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6825 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6826
6827 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6828 DAG.getConstant(-MinC, DL, OpVT));
6829 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6830 DAG.getConstant(~Dif, DL, OpVT));
6831 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6832 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6833 }
6834 }
6835 }
6836 }
6837
6838 return SDValue();
6839}
6840
6841// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6842// We canonicalize to the `select` form in the middle end, but the `and` form
6843// gets better codegen and all tested targets (arm, x86, riscv)
6845 const SDLoc &DL, SelectionDAG &DAG) {
6846 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6847 if (!isNullConstant(F))
6848 return SDValue();
6849
6850 EVT CondVT = Cond.getValueType();
6851 if (TLI.getBooleanContents(CondVT) !=
6853 return SDValue();
6854
6855 if (T.getOpcode() != ISD::AND)
6856 return SDValue();
6857
6858 if (!isOneConstant(T.getOperand(1)))
6859 return SDValue();
6860
6861 EVT OpVT = T.getValueType();
6862
6863 SDValue CondMask =
6864 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6865 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6866}
6867
6868/// This contains all DAGCombine rules which reduce two values combined by
6869/// an And operation to a single value. This makes them reusable in the context
6870/// of visitSELECT(). Rules involving constants are not included as
6871/// visitSELECT() already handles those cases.
6872SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6873 EVT VT = N1.getValueType();
6874 SDLoc DL(N);
6875
6876 // fold (and x, undef) -> 0
6877 if (N0.isUndef() || N1.isUndef())
6878 return DAG.getConstant(0, DL, VT);
6879
6880 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6881 return V;
6882
6883 // Canonicalize:
6884 // and(x, add) -> and(add, x)
6885 if (N1.getOpcode() == ISD::ADD)
6886 std::swap(N0, N1);
6887
6888 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6889 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6890 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6891 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6892 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6893 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6894 // immediate for an add, but it is legal if its top c2 bits are set,
6895 // transform the ADD so the immediate doesn't need to be materialized
6896 // in a register.
6897 APInt ADDC = ADDI->getAPIntValue();
6898 APInt SRLC = SRLI->getAPIntValue();
6899 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6900 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6902 SRLC.getZExtValue());
6903 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6904 ADDC |= Mask;
6905 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6906 SDLoc DL0(N0);
6907 SDValue NewAdd =
6908 DAG.getNode(ISD::ADD, DL0, VT,
6909 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6910 CombineTo(N0.getNode(), NewAdd);
6911 // Return N so it doesn't get rechecked!
6912 return SDValue(N, 0);
6913 }
6914 }
6915 }
6916 }
6917 }
6918 }
6919
6920 return SDValue();
6921}
6922
6923bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6924 EVT LoadResultTy, EVT &ExtVT) {
6925 if (!AndC->getAPIntValue().isMask())
6926 return false;
6927
6928 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6929
6930 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6931 EVT LoadedVT = LoadN->getMemoryVT();
6932
6933 if (ExtVT == LoadedVT &&
6934 (!LegalOperations ||
6935 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6936 // ZEXTLOAD will match without needing to change the size of the value being
6937 // loaded.
6938 return true;
6939 }
6940
6941 // Do not change the width of a volatile or atomic loads.
6942 if (!LoadN->isSimple())
6943 return false;
6944
6945 // Do not generate loads of non-round integer types since these can
6946 // be expensive (and would be wrong if the type is not byte sized).
6947 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6948 return false;
6949
6950 if (LegalOperations &&
6951 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6952 return false;
6953
6954 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6955 return false;
6956
6957 return true;
6958}
6959
6960bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6961 ISD::LoadExtType ExtType, EVT &MemVT,
6962 unsigned ShAmt) {
6963 if (!LDST)
6964 return false;
6965
6966 // Only allow byte offsets.
6967 if (ShAmt % 8)
6968 return false;
6969 const unsigned ByteShAmt = ShAmt / 8;
6970
6971 // Do not generate loads of non-round integer types since these can
6972 // be expensive (and would be wrong if the type is not byte sized).
6973 if (!MemVT.isRound())
6974 return false;
6975
6976 // Don't change the width of a volatile or atomic loads.
6977 if (!LDST->isSimple())
6978 return false;
6979
6980 EVT LdStMemVT = LDST->getMemoryVT();
6981
6982 // Bail out when changing the scalable property, since we can't be sure that
6983 // we're actually narrowing here.
6984 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6985 return false;
6986
6987 // Verify that we are actually reducing a load width here.
6988 if (LdStMemVT.bitsLT(MemVT))
6989 return false;
6990
6991 // Ensure that this isn't going to produce an unsupported memory access.
6992 if (ShAmt) {
6993 const Align LDSTAlign = LDST->getAlign();
6994 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6995 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6996 LDST->getAddressSpace(), NarrowAlign,
6997 LDST->getMemOperand()->getFlags()))
6998 return false;
6999 }
7000
7001 // It's not possible to generate a constant of extended or untyped type.
7002 EVT PtrType = LDST->getBasePtr().getValueType();
7003 if (PtrType == MVT::Untyped || PtrType.isExtended())
7004 return false;
7005
7006 if (isa<LoadSDNode>(LDST)) {
7007 LoadSDNode *Load = cast<LoadSDNode>(LDST);
7008 // Don't transform one with multiple uses, this would require adding a new
7009 // load.
7010 if (!SDValue(Load, 0).hasOneUse())
7011 return false;
7012
7013 if (LegalOperations &&
7014 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
7015 return false;
7016
7017 // For the transform to be legal, the load must produce only two values
7018 // (the value loaded and the chain). Don't transform a pre-increment
7019 // load, for example, which produces an extra value. Otherwise the
7020 // transformation is not equivalent, and the downstream logic to replace
7021 // uses gets things wrong.
7022 if (Load->getNumValues() > 2)
7023 return false;
7024
7025 // If the load that we're shrinking is an extload and we're not just
7026 // discarding the extension we can't simply shrink the load. Bail.
7027 // TODO: It would be possible to merge the extensions in some cases.
7028 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
7029 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7030 return false;
7031
7032 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
7033 return false;
7034 } else {
7035 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
7036 StoreSDNode *Store = cast<StoreSDNode>(LDST);
7037 // Can't write outside the original store
7038 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7039 return false;
7040
7041 if (LegalOperations &&
7042 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
7043 return false;
7044 }
7045 return true;
7046}
7047
7048bool DAGCombiner::SearchForAndLoads(SDNode *N,
7049 SmallVectorImpl<LoadSDNode*> &Loads,
7050 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
7051 ConstantSDNode *Mask,
7052 SDNode *&NodeToMask) {
7053 // Recursively search for the operands, looking for loads which can be
7054 // narrowed.
7055 for (SDValue Op : N->op_values()) {
7056 if (Op.getValueType().isVector())
7057 return false;
7058
7059 // Some constants may need fixing up later if they are too large.
7060 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7061 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7062 "Expected bitwise logic operation");
7063 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
7064 NodesWithConsts.insert(N);
7065 continue;
7066 }
7067
7068 if (!Op.hasOneUse())
7069 return false;
7070
7071 switch(Op.getOpcode()) {
7072 case ISD::LOAD: {
7073 auto *Load = cast<LoadSDNode>(Op);
7074 EVT ExtVT;
7075 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
7076 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
7077
7078 // ZEXTLOAD is already small enough.
7079 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7080 ExtVT.bitsGE(Load->getMemoryVT()))
7081 continue;
7082
7083 // Use LE to convert equal sized loads to zext.
7084 if (ExtVT.bitsLE(Load->getMemoryVT()))
7085 Loads.push_back(Load);
7086
7087 continue;
7088 }
7089 return false;
7090 }
7091 case ISD::ZERO_EXTEND:
7092 case ISD::AssertZext: {
7093 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7094 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7095 EVT VT = Op.getOpcode() == ISD::AssertZext ?
7096 cast<VTSDNode>(Op.getOperand(1))->getVT() :
7097 Op.getOperand(0).getValueType();
7098
7099 // We can accept extending nodes if the mask is wider or an equal
7100 // width to the original type.
7101 if (ExtVT.bitsGE(VT))
7102 continue;
7103 break;
7104 }
7105 case ISD::OR:
7106 case ISD::XOR:
7107 case ISD::AND:
7108 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7109 NodeToMask))
7110 return false;
7111 continue;
7112 }
7113
7114 // Allow one node which will masked along with any loads found.
7115 if (NodeToMask)
7116 return false;
7117
7118 // Also ensure that the node to be masked only produces one data result.
7119 NodeToMask = Op.getNode();
7120 if (NodeToMask->getNumValues() > 1) {
7121 bool HasValue = false;
7122 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7123 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7124 if (VT != MVT::Glue && VT != MVT::Other) {
7125 if (HasValue) {
7126 NodeToMask = nullptr;
7127 return false;
7128 }
7129 HasValue = true;
7130 }
7131 }
7132 assert(HasValue && "Node to be masked has no data result?");
7133 }
7134 }
7135 return true;
7136}
7137
7138bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7139 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7140 if (!Mask)
7141 return false;
7142
7143 if (!Mask->getAPIntValue().isMask())
7144 return false;
7145
7146 // No need to do anything if the and directly uses a load.
7147 if (isa<LoadSDNode>(N->getOperand(0)))
7148 return false;
7149
7151 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7152 SDNode *FixupNode = nullptr;
7153 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7154 if (Loads.empty())
7155 return false;
7156
7157 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7158 SDValue MaskOp = N->getOperand(1);
7159
7160 // If it exists, fixup the single node we allow in the tree that needs
7161 // masking.
7162 if (FixupNode) {
7163 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7164 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7165 FixupNode->getValueType(0),
7166 SDValue(FixupNode, 0), MaskOp);
7167 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7168 if (And.getOpcode() == ISD ::AND)
7169 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7170 }
7171
7172 // Narrow any constants that need it.
7173 for (auto *LogicN : NodesWithConsts) {
7174 SDValue Op0 = LogicN->getOperand(0);
7175 SDValue Op1 = LogicN->getOperand(1);
7176
7177 // We only need to fix AND if both inputs are constants. And we only need
7178 // to fix one of the constants.
7179 if (LogicN->getOpcode() == ISD::AND &&
7181 continue;
7182
7183 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7184 Op0 =
7185 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7186
7187 if (isa<ConstantSDNode>(Op1))
7188 Op1 =
7189 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7190
7191 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7192 std::swap(Op0, Op1);
7193
7194 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7195 }
7196
7197 // Create narrow loads.
7198 for (auto *Load : Loads) {
7199 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7200 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7201 SDValue(Load, 0), MaskOp);
7202 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7203 if (And.getOpcode() == ISD ::AND)
7204 And = SDValue(
7205 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7206 SDValue NewLoad = reduceLoadWidth(And.getNode());
7207 assert(NewLoad &&
7208 "Shouldn't be masking the load if it can't be narrowed");
7209 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7210 }
7211 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7212 return true;
7213 }
7214 return false;
7215}
7216
7217// Unfold
7218// x & (-1 'logical shift' y)
7219// To
7220// (x 'opposite logical shift' y) 'logical shift' y
7221// if it is better for performance.
7222SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7223 assert(N->getOpcode() == ISD::AND);
7224
7225 SDValue N0 = N->getOperand(0);
7226 SDValue N1 = N->getOperand(1);
7227
7228 // Do we actually prefer shifts over mask?
7230 return SDValue();
7231
7232 // Try to match (-1 '[outer] logical shift' y)
7233 unsigned OuterShift;
7234 unsigned InnerShift; // The opposite direction to the OuterShift.
7235 SDValue Y; // Shift amount.
7236 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7237 if (!M.hasOneUse())
7238 return false;
7239 OuterShift = M->getOpcode();
7240 if (OuterShift == ISD::SHL)
7241 InnerShift = ISD::SRL;
7242 else if (OuterShift == ISD::SRL)
7243 InnerShift = ISD::SHL;
7244 else
7245 return false;
7246 if (!isAllOnesConstant(M->getOperand(0)))
7247 return false;
7248 Y = M->getOperand(1);
7249 return true;
7250 };
7251
7252 SDValue X;
7253 if (matchMask(N1))
7254 X = N0;
7255 else if (matchMask(N0))
7256 X = N1;
7257 else
7258 return SDValue();
7259
7260 SDLoc DL(N);
7261 EVT VT = N->getValueType(0);
7262
7263 // tmp = x 'opposite logical shift' y
7264 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7265 // ret = tmp 'logical shift' y
7266 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7267
7268 return T1;
7269}
7270
7271/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7272/// For a target with a bit test, this is expected to become test + set and save
7273/// at least 1 instruction.
7275 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7276
7277 // Look through an optional extension.
7278 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7279 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7280 And0 = And0.getOperand(0);
7281 if (!isOneConstant(And1) || !And0.hasOneUse())
7282 return SDValue();
7283
7284 SDValue Src = And0;
7285
7286 // Attempt to find a 'not' op.
7287 // TODO: Should we favor test+set even without the 'not' op?
7288 bool FoundNot = false;
7289 if (isBitwiseNot(Src)) {
7290 FoundNot = true;
7291 Src = Src.getOperand(0);
7292
7293 // Look though an optional truncation. The source operand may not be the
7294 // same type as the original 'and', but that is ok because we are masking
7295 // off everything but the low bit.
7296 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7297 Src = Src.getOperand(0);
7298 }
7299
7300 // Match a shift-right by constant.
7301 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7302 return SDValue();
7303
7304 // This is probably not worthwhile without a supported type.
7305 EVT SrcVT = Src.getValueType();
7306 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7307 if (!TLI.isTypeLegal(SrcVT))
7308 return SDValue();
7309
7310 // We might have looked through casts that make this transform invalid.
7311 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7312 SDValue ShiftAmt = Src.getOperand(1);
7313 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7314 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7315 return SDValue();
7316
7317 // Set source to shift source.
7318 Src = Src.getOperand(0);
7319
7320 // Try again to find a 'not' op.
7321 // TODO: Should we favor test+set even with two 'not' ops?
7322 if (!FoundNot) {
7323 if (!isBitwiseNot(Src))
7324 return SDValue();
7325 Src = Src.getOperand(0);
7326 }
7327
7328 if (!TLI.hasBitTest(Src, ShiftAmt))
7329 return SDValue();
7330
7331 // Turn this into a bit-test pattern using mask op + setcc:
7332 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7333 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7334 SDLoc DL(And);
7335 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7336 EVT CCVT =
7337 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7338 SDValue Mask = DAG.getConstant(
7339 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7340 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7341 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7342 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7343 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7344}
7345
7346/// For targets that support usubsat, match a bit-hack form of that operation
7347/// that ends in 'and' and convert it.
7349 EVT VT = N->getValueType(0);
7350 unsigned BitWidth = VT.getScalarSizeInBits();
7351 APInt SignMask = APInt::getSignMask(BitWidth);
7352
7353 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7354 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7355 // xor/add with SMIN (signmask) are logically equivalent.
7356 SDValue X;
7357 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7359 m_SpecificInt(BitWidth - 1))))) &&
7362 m_SpecificInt(BitWidth - 1))))))
7363 return SDValue();
7364
7365 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7366 DAG.getConstant(SignMask, DL, VT));
7367}
7368
7369/// Given a bitwise logic operation N with a matching bitwise logic operand,
7370/// fold a pattern where 2 of the source operands are identically shifted
7371/// values. For example:
7372/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7374 SelectionDAG &DAG) {
7375 unsigned LogicOpcode = N->getOpcode();
7376 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7377 "Expected bitwise logic operation");
7378
7379 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7380 return SDValue();
7381
7382 // Match another bitwise logic op and a shift.
7383 unsigned ShiftOpcode = ShiftOp.getOpcode();
7384 if (LogicOp.getOpcode() != LogicOpcode ||
7385 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7386 ShiftOpcode == ISD::SRA))
7387 return SDValue();
7388
7389 // Match another shift op inside the first logic operand. Handle both commuted
7390 // possibilities.
7391 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7392 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7393 SDValue X1 = ShiftOp.getOperand(0);
7394 SDValue Y = ShiftOp.getOperand(1);
7395 SDValue X0, Z;
7396 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7397 LogicOp.getOperand(0).getOperand(1) == Y) {
7398 X0 = LogicOp.getOperand(0).getOperand(0);
7399 Z = LogicOp.getOperand(1);
7400 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7401 LogicOp.getOperand(1).getOperand(1) == Y) {
7402 X0 = LogicOp.getOperand(1).getOperand(0);
7403 Z = LogicOp.getOperand(0);
7404 } else {
7405 return SDValue();
7406 }
7407
7408 EVT VT = N->getValueType(0);
7409 SDLoc DL(N);
7410 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7411 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7412 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7413}
7414
7415/// Given a tree of logic operations with shape like
7416/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7417/// try to match and fold shift operations with the same shift amount.
7418/// For example:
7419/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7420/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7422 SDValue RightHand, SelectionDAG &DAG) {
7423 unsigned LogicOpcode = N->getOpcode();
7424 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7425 "Expected bitwise logic operation");
7426 if (LeftHand.getOpcode() != LogicOpcode ||
7427 RightHand.getOpcode() != LogicOpcode)
7428 return SDValue();
7429 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7430 return SDValue();
7431
7432 // Try to match one of following patterns:
7433 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7434 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7435 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7436 // itself.
7437 SDValue CombinedShifts, W;
7438 SDValue R0 = RightHand.getOperand(0);
7439 SDValue R1 = RightHand.getOperand(1);
7440 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7441 W = R1;
7442 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7443 W = R0;
7444 else
7445 return SDValue();
7446
7447 EVT VT = N->getValueType(0);
7448 SDLoc DL(N);
7449 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7450}
7451
7452/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7453/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7454/// pattern. This is typically a better representation for targets without a
7455/// fused "and-not" operation.
7457 const TargetLowering &TLI, const SDLoc &DL) {
7458 // Note that masked-merge variants using XOR or ADD expressions are
7459 // normalized to OR by InstCombine so we only check for OR or AND.
7460 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7461 "Must be called with ISD::OR or ISD::AND node");
7462
7463 // If the target supports and-not, don't fold this.
7464 if (TLI.hasAndNot(SDValue(Node, 0)))
7465 return SDValue();
7466
7467 SDValue M, X, Y;
7468
7469 if (sd_match(Node,
7471 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7472 sd_match(Node,
7474 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7475 EVT VT = M.getValueType();
7476 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7477 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7478 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7479 }
7480 return SDValue();
7481}
7482
7483SDValue DAGCombiner::visitAND(SDNode *N) {
7484 SDValue N0 = N->getOperand(0);
7485 SDValue N1 = N->getOperand(1);
7486 EVT VT = N1.getValueType();
7487 SDLoc DL(N);
7488
7489 // x & x --> x
7490 if (N0 == N1)
7491 return N0;
7492
7493 // fold (and c1, c2) -> c1&c2
7494 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7495 return C;
7496
7497 // canonicalize constant to RHS
7500 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7501
7502 if (areBitwiseNotOfEachother(N0, N1))
7503 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7504
7505 // fold vector ops
7506 if (VT.isVector()) {
7507 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7508 return FoldedVOp;
7509
7510 // fold (and x, 0) -> 0, vector edition
7512 // do not return N1, because undef node may exist in N1
7514 N1.getValueType());
7515
7516 // fold (and x, -1) -> x, vector edition
7518 return N0;
7519
7520 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7522 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7523 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7524 EVT LoadVT = MLoad->getMemoryVT();
7525 EVT ExtVT = VT;
7526 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7527 // For this AND to be a zero extension of the masked load the elements
7528 // of the BuildVec must mask the bottom bits of the extended element
7529 // type
7530 uint64_t ElementSize =
7532 if (Splat->getAPIntValue().isMask(ElementSize)) {
7533 SDValue NewLoad = DAG.getMaskedLoad(
7534 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7535 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7536 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7537 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7538 bool LoadHasOtherUsers = !N0.hasOneUse();
7539 CombineTo(N, NewLoad);
7540 if (LoadHasOtherUsers)
7541 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7542 return SDValue(N, 0);
7543 }
7544 }
7545 }
7546 }
7547
7548 // fold (and x, -1) -> x
7549 if (isAllOnesConstant(N1))
7550 return N0;
7551
7552 // if (and x, c) is known to be zero, return 0
7553 unsigned BitWidth = VT.getScalarSizeInBits();
7554 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7556 return DAG.getConstant(0, DL, VT);
7557
7558 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7559 return R;
7560
7561 if (SDValue NewSel = foldBinOpIntoSelect(N))
7562 return NewSel;
7563
7564 // reassociate and
7565 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7566 return RAND;
7567
7568 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7569 if (SDValue SD =
7570 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7571 return SD;
7572
7573 // fold (and (or x, C), D) -> D if (C & D) == D
7574 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7575 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7576 };
7577 if (N0.getOpcode() == ISD::OR &&
7578 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7579 return N1;
7580
7581 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7582 SDValue N0Op0 = N0.getOperand(0);
7583 EVT SrcVT = N0Op0.getValueType();
7584 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7585 APInt Mask = ~N1C->getAPIntValue();
7586 Mask = Mask.trunc(SrcBitWidth);
7587
7588 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7589 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7590 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7591
7592 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7593 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7594 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7595 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7596 TLI.isNarrowingProfitable(N, VT, SrcVT))
7597 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7598 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7599 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7600 }
7601
7602 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7603 if (ISD::isExtOpcode(N0.getOpcode())) {
7604 unsigned ExtOpc = N0.getOpcode();
7605 SDValue N0Op0 = N0.getOperand(0);
7606 if (N0Op0.getOpcode() == ISD::AND &&
7607 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7608 N0->hasOneUse() && N0Op0->hasOneUse()) {
7609 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7610 {N0Op0.getOperand(1)})) {
7611 if (SDValue NewMask =
7612 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7613 return DAG.getNode(ISD::AND, DL, VT,
7614 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7615 NewMask);
7616 }
7617 }
7618 }
7619 }
7620
7621 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7622 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7623 // already be zero by virtue of the width of the base type of the load.
7624 //
7625 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7626 // more cases.
7627 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7629 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7630 N0.getOperand(0).getResNo() == 0) ||
7631 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7632 auto *Load =
7633 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7634
7635 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7636 // This can be a pure constant or a vector splat, in which case we treat the
7637 // vector as a scalar and use the splat value.
7638 APInt Constant = APInt::getZero(1);
7639 if (const ConstantSDNode *C = isConstOrConstSplat(
7640 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7641 Constant = C->getAPIntValue();
7642 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7643 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7644 APInt SplatValue, SplatUndef;
7645 unsigned SplatBitSize;
7646 bool HasAnyUndefs;
7647 // Endianness should not matter here. Code below makes sure that we only
7648 // use the result if the SplatBitSize is a multiple of the vector element
7649 // size. And after that we AND all element sized parts of the splat
7650 // together. So the end result should be the same regardless of in which
7651 // order we do those operations.
7652 const bool IsBigEndian = false;
7653 bool IsSplat =
7654 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7655 HasAnyUndefs, EltBitWidth, IsBigEndian);
7656
7657 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7658 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7659 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7660 // Undef bits can contribute to a possible optimisation if set, so
7661 // set them.
7662 SplatValue |= SplatUndef;
7663
7664 // The splat value may be something like "0x00FFFFFF", which means 0 for
7665 // the first vector value and FF for the rest, repeating. We need a mask
7666 // that will apply equally to all members of the vector, so AND all the
7667 // lanes of the constant together.
7668 Constant = APInt::getAllOnes(EltBitWidth);
7669 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7670 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7671 }
7672 }
7673
7674 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7675 // actually legal and isn't going to get expanded, else this is a false
7676 // optimisation.
7677 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7678 Load->getValueType(0),
7679 Load->getMemoryVT());
7680
7681 // Resize the constant to the same size as the original memory access before
7682 // extension. If it is still the AllOnesValue then this AND is completely
7683 // unneeded.
7684 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7685
7686 bool B;
7687 switch (Load->getExtensionType()) {
7688 default: B = false; break;
7689 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7690 case ISD::ZEXTLOAD:
7691 case ISD::NON_EXTLOAD: B = true; break;
7692 }
7693
7694 if (B && Constant.isAllOnes()) {
7695 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7696 // preserve semantics once we get rid of the AND.
7697 SDValue NewLoad(Load, 0);
7698
7699 // Fold the AND away. NewLoad may get replaced immediately.
7700 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7701
7702 if (Load->getExtensionType() == ISD::EXTLOAD) {
7703 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7704 Load->getValueType(0), SDLoc(Load),
7705 Load->getChain(), Load->getBasePtr(),
7706 Load->getOffset(), Load->getMemoryVT(),
7707 Load->getMemOperand());
7708 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7709 if (Load->getNumValues() == 3) {
7710 // PRE/POST_INC loads have 3 values.
7711 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7712 NewLoad.getValue(2) };
7713 CombineTo(Load, To, 3, true);
7714 } else {
7715 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7716 }
7717 }
7718
7719 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7720 }
7721 }
7722
7723 // Try to convert a constant mask AND into a shuffle clear mask.
7724 if (VT.isVector())
7725 if (SDValue Shuffle = XformToShuffleWithZero(N))
7726 return Shuffle;
7727
7728 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7729 return Combined;
7730
7731 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7733 SDValue Ext = N0.getOperand(0);
7734 EVT ExtVT = Ext->getValueType(0);
7735 SDValue Extendee = Ext->getOperand(0);
7736
7737 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7738 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7739 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7740 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7741 // => (extract_subvector (iN_zeroext v))
7742 SDValue ZeroExtExtendee =
7743 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7744
7745 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7746 N0.getOperand(1));
7747 }
7748 }
7749
7750 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7751 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7752 EVT MemVT = GN0->getMemoryVT();
7753 EVT ScalarVT = MemVT.getScalarType();
7754
7755 if (SDValue(GN0, 0).hasOneUse() &&
7756 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7758 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7759 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7760
7761 SDValue ZExtLoad = DAG.getMaskedGather(
7762 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7763 GN0->getIndexType(), ISD::ZEXTLOAD);
7764
7765 CombineTo(N, ZExtLoad);
7766 AddToWorklist(ZExtLoad.getNode());
7767 // Avoid recheck of N.
7768 return SDValue(N, 0);
7769 }
7770 }
7771
7772 // fold (and (load x), 255) -> (zextload x, i8)
7773 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7774 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7775 if (SDValue Res = reduceLoadWidth(N))
7776 return Res;
7777
7778 if (LegalTypes) {
7779 // Attempt to propagate the AND back up to the leaves which, if they're
7780 // loads, can be combined to narrow loads and the AND node can be removed.
7781 // Perform after legalization so that extend nodes will already be
7782 // combined into the loads.
7783 if (BackwardsPropagateMask(N))
7784 return SDValue(N, 0);
7785 }
7786
7787 if (SDValue Combined = visitANDLike(N0, N1, N))
7788 return Combined;
7789
7790 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7791 if (N0.getOpcode() == N1.getOpcode())
7792 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7793 return V;
7794
7795 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7796 return R;
7797 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7798 return R;
7799
7800 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7801 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7802 SDValue X, Y, Z, NotY;
7803 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7804 if (sd_match(N,
7805 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7806 sd_match(NotY, m_Not(m_Value(Y))) &&
7807 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7808 return DAG.getNode(ISD::AND, DL, VT, X,
7809 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7810
7811 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7812 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7813 if (sd_match(N, m_And(m_Value(X),
7814 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7815 sd_match(NotY, m_Not(m_Value(Y))) &&
7816 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7817 return DAG.getNode(ISD::AND, DL, VT, X,
7818 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7819
7820 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7821 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7822 if (TLI.hasAndNot(SDValue(N, 0)))
7823 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7824 return Folded;
7825
7826 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7827 // If we are shifting down an extended sign bit, see if we can simplify
7828 // this to shifting the MSB directly to expose further simplifications.
7829 // This pattern often appears after sext_inreg legalization.
7830 APInt Amt;
7831 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7832 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7833 return DAG.getNode(ISD::SRL, DL, VT, X,
7834 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7835
7836 // Masking the negated extension of a boolean is just the zero-extended
7837 // boolean:
7838 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7839 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7840 //
7841 // Note: the SimplifyDemandedBits fold below can make an information-losing
7842 // transform, and then we have no way to find this better fold.
7843 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7844 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7845 X.getOperand(0).getScalarValueSizeInBits() == 1)
7846 return X;
7847 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7848 X.getOperand(0).getScalarValueSizeInBits() == 1)
7849 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7850 }
7851
7852 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7853 // fold (and (sra)) -> (and (srl)) when possible.
7855 return SDValue(N, 0);
7856
7857 // fold (zext_inreg (extload x)) -> (zextload x)
7858 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7859 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7860 (ISD::isEXTLoad(N0.getNode()) ||
7861 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7862 auto *LN0 = cast<LoadSDNode>(N0);
7863 EVT MemVT = LN0->getMemoryVT();
7864 // If we zero all the possible extended bits, then we can turn this into
7865 // a zextload if we are running before legalize or the operation is legal.
7866 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7867 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7868 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7869 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7870 ((!LegalOperations && LN0->isSimple()) ||
7871 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7872 SDValue ExtLoad =
7873 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7874 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7875 AddToWorklist(N);
7876 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7877 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7878 }
7879 }
7880
7881 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7882 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7883 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7884 N0.getOperand(1), false))
7885 return BSwap;
7886 }
7887
7888 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7889 return Shifts;
7890
7891 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7892 return V;
7893
7894 // Recognize the following pattern:
7895 //
7896 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7897 //
7898 // where bitmask is a mask that clears the upper bits of AndVT. The
7899 // number of bits in bitmask must be a power of two.
7900 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7901 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7902 return false;
7903
7905 if (!C)
7906 return false;
7907
7908 if (!C->getAPIntValue().isMask(
7909 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7910 return false;
7911
7912 return true;
7913 };
7914
7915 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7916 if (IsAndZeroExtMask(N0, N1))
7917 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7918
7919 if (hasOperation(ISD::USUBSAT, VT))
7920 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7921 return V;
7922
7923 // Postpone until legalization completed to avoid interference with bswap
7924 // folding
7925 if (LegalOperations || VT.isVector())
7926 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7927 return R;
7928
7929 if (VT.isScalarInteger() && VT != MVT::i1)
7930 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7931 return R;
7932
7933 return SDValue();
7934}
7935
7936/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7937SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7938 bool DemandHighBits) {
7939 if (!LegalOperations)
7940 return SDValue();
7941
7942 EVT VT = N->getValueType(0);
7943 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7944 return SDValue();
7946 return SDValue();
7947
7948 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7949 bool LookPassAnd0 = false;
7950 bool LookPassAnd1 = false;
7951 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7952 std::swap(N0, N1);
7953 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7954 std::swap(N0, N1);
7955 if (N0.getOpcode() == ISD::AND) {
7956 if (!N0->hasOneUse())
7957 return SDValue();
7958 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7959 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7960 // This is needed for X86.
7961 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7962 N01C->getZExtValue() != 0xFFFF))
7963 return SDValue();
7964 N0 = N0.getOperand(0);
7965 LookPassAnd0 = true;
7966 }
7967
7968 if (N1.getOpcode() == ISD::AND) {
7969 if (!N1->hasOneUse())
7970 return SDValue();
7971 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7972 if (!N11C || N11C->getZExtValue() != 0xFF)
7973 return SDValue();
7974 N1 = N1.getOperand(0);
7975 LookPassAnd1 = true;
7976 }
7977
7978 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7979 std::swap(N0, N1);
7980 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7981 return SDValue();
7982 if (!N0->hasOneUse() || !N1->hasOneUse())
7983 return SDValue();
7984
7985 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7986 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7987 if (!N01C || !N11C)
7988 return SDValue();
7989 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7990 return SDValue();
7991
7992 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7993 SDValue N00 = N0->getOperand(0);
7994 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7995 if (!N00->hasOneUse())
7996 return SDValue();
7997 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7998 if (!N001C || N001C->getZExtValue() != 0xFF)
7999 return SDValue();
8000 N00 = N00.getOperand(0);
8001 LookPassAnd0 = true;
8002 }
8003
8004 SDValue N10 = N1->getOperand(0);
8005 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
8006 if (!N10->hasOneUse())
8007 return SDValue();
8008 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
8009 // Also allow 0xFFFF since the bits will be shifted out. This is needed
8010 // for X86.
8011 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
8012 N101C->getZExtValue() != 0xFFFF))
8013 return SDValue();
8014 N10 = N10.getOperand(0);
8015 LookPassAnd1 = true;
8016 }
8017
8018 if (N00 != N10)
8019 return SDValue();
8020
8021 // Make sure everything beyond the low halfword gets set to zero since the SRL
8022 // 16 will clear the top bits.
8023 unsigned OpSizeInBits = VT.getSizeInBits();
8024 if (OpSizeInBits > 16) {
8025 // If the left-shift isn't masked out then the only way this is a bswap is
8026 // if all bits beyond the low 8 are 0. In that case the entire pattern
8027 // reduces to a left shift anyway: leave it for other parts of the combiner.
8028 if (DemandHighBits && !LookPassAnd0)
8029 return SDValue();
8030
8031 // However, if the right shift isn't masked out then it might be because
8032 // it's not needed. See if we can spot that too. If the high bits aren't
8033 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
8034 // upper bits to be zero.
8035 if (!LookPassAnd1) {
8036 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
8037 if (!DAG.MaskedValueIsZero(N10,
8038 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
8039 return SDValue();
8040 }
8041 }
8042
8043 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
8044 if (OpSizeInBits > 16) {
8045 SDLoc DL(N);
8046 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8047 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
8048 }
8049 return Res;
8050}
8051
8052/// Return true if the specified node is an element that makes up a 32-bit
8053/// packed halfword byteswap.
8054/// ((x & 0x000000ff) << 8) |
8055/// ((x & 0x0000ff00) >> 8) |
8056/// ((x & 0x00ff0000) << 8) |
8057/// ((x & 0xff000000) >> 8)
8059 if (!N->hasOneUse())
8060 return false;
8061
8062 unsigned Opc = N.getOpcode();
8063 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8064 return false;
8065
8066 SDValue N0 = N.getOperand(0);
8067 unsigned Opc0 = N0.getOpcode();
8068 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8069 return false;
8070
8071 ConstantSDNode *N1C = nullptr;
8072 // SHL or SRL: look upstream for AND mask operand
8073 if (Opc == ISD::AND)
8074 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8075 else if (Opc0 == ISD::AND)
8077 if (!N1C)
8078 return false;
8079
8080 unsigned MaskByteOffset;
8081 switch (N1C->getZExtValue()) {
8082 default:
8083 return false;
8084 case 0xFF: MaskByteOffset = 0; break;
8085 case 0xFF00: MaskByteOffset = 1; break;
8086 case 0xFFFF:
8087 // In case demanded bits didn't clear the bits that will be shifted out.
8088 // This is needed for X86.
8089 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8090 MaskByteOffset = 1;
8091 break;
8092 }
8093 return false;
8094 case 0xFF0000: MaskByteOffset = 2; break;
8095 case 0xFF000000: MaskByteOffset = 3; break;
8096 }
8097
8098 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8099 if (Opc == ISD::AND) {
8100 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8101 // (x >> 8) & 0xff
8102 // (x >> 8) & 0xff0000
8103 if (Opc0 != ISD::SRL)
8104 return false;
8106 if (!C || C->getZExtValue() != 8)
8107 return false;
8108 } else {
8109 // (x << 8) & 0xff00
8110 // (x << 8) & 0xff000000
8111 if (Opc0 != ISD::SHL)
8112 return false;
8114 if (!C || C->getZExtValue() != 8)
8115 return false;
8116 }
8117 } else if (Opc == ISD::SHL) {
8118 // (x & 0xff) << 8
8119 // (x & 0xff0000) << 8
8120 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8121 return false;
8122 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8123 if (!C || C->getZExtValue() != 8)
8124 return false;
8125 } else { // Opc == ISD::SRL
8126 // (x & 0xff00) >> 8
8127 // (x & 0xff000000) >> 8
8128 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8129 return false;
8130 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8131 if (!C || C->getZExtValue() != 8)
8132 return false;
8133 }
8134
8135 if (Parts[MaskByteOffset])
8136 return false;
8137
8138 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8139 return true;
8140}
8141
8142// Match 2 elements of a packed halfword bswap.
8144 if (N.getOpcode() == ISD::OR)
8145 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8146 isBSwapHWordElement(N.getOperand(1), Parts);
8147
8148 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8149 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8150 if (!C || C->getAPIntValue() != 16)
8151 return false;
8152 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8153 return true;
8154 }
8155
8156 return false;
8157}
8158
8159// Match this pattern:
8160// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8161// And rewrite this to:
8162// (rotr (bswap A), 16)
8164 SelectionDAG &DAG, SDNode *N, SDValue N0,
8165 SDValue N1, EVT VT) {
8166 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8167 "MatchBSwapHWordOrAndAnd: expecting i32");
8168 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8169 return SDValue();
8170 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8171 return SDValue();
8172 // TODO: this is too restrictive; lifting this restriction requires more tests
8173 if (!N0->hasOneUse() || !N1->hasOneUse())
8174 return SDValue();
8177 if (!Mask0 || !Mask1)
8178 return SDValue();
8179 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8180 Mask1->getAPIntValue() != 0x00ff00ff)
8181 return SDValue();
8182 SDValue Shift0 = N0.getOperand(0);
8183 SDValue Shift1 = N1.getOperand(0);
8184 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8185 return SDValue();
8186 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8187 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8188 if (!ShiftAmt0 || !ShiftAmt1)
8189 return SDValue();
8190 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8191 return SDValue();
8192 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8193 return SDValue();
8194
8195 SDLoc DL(N);
8196 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8197 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8198 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8199}
8200
8201/// Match a 32-bit packed halfword bswap. That is
8202/// ((x & 0x000000ff) << 8) |
8203/// ((x & 0x0000ff00) >> 8) |
8204/// ((x & 0x00ff0000) << 8) |
8205/// ((x & 0xff000000) >> 8)
8206/// => (rotl (bswap x), 16)
8207SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8208 if (!LegalOperations)
8209 return SDValue();
8210
8211 EVT VT = N->getValueType(0);
8212 if (VT != MVT::i32)
8213 return SDValue();
8215 return SDValue();
8216
8217 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8218 return BSwap;
8219
8220 // Try again with commuted operands.
8221 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8222 return BSwap;
8223
8224
8225 // Look for either
8226 // (or (bswaphpair), (bswaphpair))
8227 // (or (or (bswaphpair), (and)), (and))
8228 // (or (or (and), (bswaphpair)), (and))
8229 SDNode *Parts[4] = {};
8230
8231 if (isBSwapHWordPair(N0, Parts)) {
8232 // (or (or (and), (and)), (or (and), (and)))
8233 if (!isBSwapHWordPair(N1, Parts))
8234 return SDValue();
8235 } else if (N0.getOpcode() == ISD::OR) {
8236 // (or (or (or (and), (and)), (and)), (and))
8237 if (!isBSwapHWordElement(N1, Parts))
8238 return SDValue();
8239 SDValue N00 = N0.getOperand(0);
8240 SDValue N01 = N0.getOperand(1);
8241 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8242 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8243 return SDValue();
8244 } else {
8245 return SDValue();
8246 }
8247
8248 // Make sure the parts are all coming from the same node.
8249 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8250 return SDValue();
8251
8252 SDLoc DL(N);
8253 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8254 SDValue(Parts[0], 0));
8255
8256 // Result of the bswap should be rotated by 16. If it's not legal, then
8257 // do (x << 16) | (x >> 16).
8258 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8260 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8262 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8263 return DAG.getNode(ISD::OR, DL, VT,
8264 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8265 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8266}
8267
8268/// This contains all DAGCombine rules which reduce two values combined by
8269/// an Or operation to a single value \see visitANDLike().
8270SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8271 EVT VT = N1.getValueType();
8272
8273 // fold (or x, undef) -> -1
8274 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8275 return DAG.getAllOnesConstant(DL, VT);
8276
8277 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8278 return V;
8279
8280 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8281 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8282 // Don't increase # computations.
8283 (N0->hasOneUse() || N1->hasOneUse())) {
8284 // We can only do this xform if we know that bits from X that are set in C2
8285 // but not in C1 are already zero. Likewise for Y.
8286 if (const ConstantSDNode *N0O1C =
8288 if (const ConstantSDNode *N1O1C =
8290 // We can only do this xform if we know that bits from X that are set in
8291 // C2 but not in C1 are already zero. Likewise for Y.
8292 const APInt &LHSMask = N0O1C->getAPIntValue();
8293 const APInt &RHSMask = N1O1C->getAPIntValue();
8294
8295 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8296 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8297 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8298 N0.getOperand(0), N1.getOperand(0));
8299 return DAG.getNode(ISD::AND, DL, VT, X,
8300 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8301 }
8302 }
8303 }
8304 }
8305
8306 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8307 if (N0.getOpcode() == ISD::AND &&
8308 N1.getOpcode() == ISD::AND &&
8309 N0.getOperand(0) == N1.getOperand(0) &&
8310 // Don't increase # computations.
8311 (N0->hasOneUse() || N1->hasOneUse())) {
8312 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8313 N0.getOperand(1), N1.getOperand(1));
8314 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8315 }
8316
8317 return SDValue();
8318}
8319
8320/// OR combines for which the commuted variant will be tried as well.
8322 SDNode *N) {
8323 EVT VT = N0.getValueType();
8324 unsigned BW = VT.getScalarSizeInBits();
8325 SDLoc DL(N);
8326
8327 auto peekThroughResize = [](SDValue V) {
8328 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8329 return V->getOperand(0);
8330 return V;
8331 };
8332
8333 SDValue N0Resized = peekThroughResize(N0);
8334 if (N0Resized.getOpcode() == ISD::AND) {
8335 SDValue N1Resized = peekThroughResize(N1);
8336 SDValue N00 = N0Resized.getOperand(0);
8337 SDValue N01 = N0Resized.getOperand(1);
8338
8339 // fold or (and x, y), x --> x
8340 if (N00 == N1Resized || N01 == N1Resized)
8341 return N1;
8342
8343 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8344 // TODO: Set AllowUndefs = true.
8345 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8346 /* AllowUndefs */ false)) {
8347 if (peekThroughResize(NotOperand) == N1Resized)
8348 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8349 N1);
8350 }
8351
8352 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8353 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8354 /* AllowUndefs */ false)) {
8355 if (peekThroughResize(NotOperand) == N1Resized)
8356 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8357 N1);
8358 }
8359 }
8360
8361 SDValue X, Y;
8362
8363 // fold or (xor X, N1), N1 --> or X, N1
8364 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8365 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8366
8367 // fold or (xor x, y), (x and/or y) --> or x, y
8368 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8369 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8371 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8372
8373 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8374 return R;
8375
8376 auto peekThroughZext = [](SDValue V) {
8377 if (V->getOpcode() == ISD::ZERO_EXTEND)
8378 return V->getOperand(0);
8379 return V;
8380 };
8381
8382 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8383 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8384 N0.getOperand(0) == N1.getOperand(0) &&
8385 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8386 return N0;
8387
8388 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8389 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8390 N0.getOperand(1) == N1.getOperand(0) &&
8391 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8392 return N0;
8393
8394 // Attempt to match a legalized build_pair-esque pattern:
8395 // or(shl(aext(Hi),BW/2),zext(Lo))
8396 SDValue Lo, Hi;
8397 if (sd_match(N0,
8399 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8400 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8401 Lo.getValueType() == Hi.getValueType()) {
8402 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8403 SDValue NotLo, NotHi;
8404 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8405 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8406 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8407 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8408 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8409 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8410 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8411 }
8412 }
8413
8414 return SDValue();
8415}
8416
8417SDValue DAGCombiner::visitOR(SDNode *N) {
8418 SDValue N0 = N->getOperand(0);
8419 SDValue N1 = N->getOperand(1);
8420 EVT VT = N1.getValueType();
8421 SDLoc DL(N);
8422
8423 // x | x --> x
8424 if (N0 == N1)
8425 return N0;
8426
8427 // fold (or c1, c2) -> c1|c2
8428 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8429 return C;
8430
8431 // canonicalize constant to RHS
8434 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8435
8436 // fold vector ops
8437 if (VT.isVector()) {
8438 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8439 return FoldedVOp;
8440
8441 // fold (or x, 0) -> x, vector edition
8443 return N0;
8444
8445 // fold (or x, -1) -> -1, vector edition
8447 // do not return N1, because undef node may exist in N1
8448 return DAG.getAllOnesConstant(DL, N1.getValueType());
8449
8450 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8451 // Do this only if the resulting type / shuffle is legal.
8452 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8453 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8454 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8455 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8456 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8457 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8458 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8459 // Ensure both shuffles have a zero input.
8460 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8461 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8462 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8463 bool CanFold = true;
8464 int NumElts = VT.getVectorNumElements();
8465 SmallVector<int, 4> Mask(NumElts, -1);
8466
8467 for (int i = 0; i != NumElts; ++i) {
8468 int M0 = SV0->getMaskElt(i);
8469 int M1 = SV1->getMaskElt(i);
8470
8471 // Determine if either index is pointing to a zero vector.
8472 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8473 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8474
8475 // If one element is zero and the otherside is undef, keep undef.
8476 // This also handles the case that both are undef.
8477 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8478 continue;
8479
8480 // Make sure only one of the elements is zero.
8481 if (M0Zero == M1Zero) {
8482 CanFold = false;
8483 break;
8484 }
8485
8486 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8487
8488 // We have a zero and non-zero element. If the non-zero came from
8489 // SV0 make the index a LHS index. If it came from SV1, make it
8490 // a RHS index. We need to mod by NumElts because we don't care
8491 // which operand it came from in the original shuffles.
8492 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8493 }
8494
8495 if (CanFold) {
8496 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8497 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8498 SDValue LegalShuffle =
8499 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8500 if (LegalShuffle)
8501 return LegalShuffle;
8502 }
8503 }
8504 }
8505 }
8506
8507 // fold (or x, 0) -> x
8508 if (isNullConstant(N1))
8509 return N0;
8510
8511 // fold (or x, -1) -> -1
8512 if (isAllOnesConstant(N1))
8513 return N1;
8514
8515 if (SDValue NewSel = foldBinOpIntoSelect(N))
8516 return NewSel;
8517
8518 // fold (or x, c) -> c iff (x & ~c) == 0
8519 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8520 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8521 return N1;
8522
8523 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8524 return R;
8525
8526 if (SDValue Combined = visitORLike(N0, N1, DL))
8527 return Combined;
8528
8529 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8530 return Combined;
8531
8532 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8533 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8534 return BSwap;
8535 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8536 return BSwap;
8537
8538 // reassociate or
8539 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8540 return ROR;
8541
8542 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8543 if (SDValue SD =
8544 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8545 return SD;
8546
8547 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8548 // iff (c1 & c2) != 0 or c1/c2 are undef.
8549 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8550 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8551 };
8552 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8553 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8554 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8555 {N1, N0.getOperand(1)})) {
8556 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8557 AddToWorklist(IOR.getNode());
8558 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8559 }
8560 }
8561
8562 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8563 return Combined;
8564 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8565 return Combined;
8566
8567 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8568 if (N0.getOpcode() == N1.getOpcode())
8569 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8570 return V;
8571
8572 // See if this is some rotate idiom.
8573 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8574 return Rot;
8575
8576 if (SDValue Load = MatchLoadCombine(N))
8577 return Load;
8578
8579 // Simplify the operands using demanded-bits information.
8581 return SDValue(N, 0);
8582
8583 // If OR can be rewritten into ADD, try combines based on ADD.
8584 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8585 DAG.isADDLike(SDValue(N, 0)))
8586 if (SDValue Combined = visitADDLike(N))
8587 return Combined;
8588
8589 // Postpone until legalization completed to avoid interference with bswap
8590 // folding
8591 if (LegalOperations || VT.isVector())
8592 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8593 return R;
8594
8595 if (VT.isScalarInteger() && VT != MVT::i1)
8596 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8597 return R;
8598
8599 return SDValue();
8600}
8601
8603 SDValue &Mask) {
8604 if (Op.getOpcode() == ISD::AND &&
8605 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8606 Mask = Op.getOperand(1);
8607 return Op.getOperand(0);
8608 }
8609 return Op;
8610}
8611
8612/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8613static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8614 SDValue &Mask) {
8615 Op = stripConstantMask(DAG, Op, Mask);
8616 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8617 Shift = Op;
8618 return true;
8619 }
8620 return false;
8621}
8622
8623/// Helper function for visitOR to extract the needed side of a rotate idiom
8624/// from a shl/srl/mul/udiv. This is meant to handle cases where
8625/// InstCombine merged some outside op with one of the shifts from
8626/// the rotate pattern.
8627/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8628/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8629/// patterns:
8630///
8631/// (or (add v v) (shrl v bitwidth-1)):
8632/// expands (add v v) -> (shl v 1)
8633///
8634/// (or (mul v c0) (shrl (mul v c1) c2)):
8635/// expands (mul v c0) -> (shl (mul v c1) c3)
8636///
8637/// (or (udiv v c0) (shl (udiv v c1) c2)):
8638/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8639///
8640/// (or (shl v c0) (shrl (shl v c1) c2)):
8641/// expands (shl v c0) -> (shl (shl v c1) c3)
8642///
8643/// (or (shrl v c0) (shl (shrl v c1) c2)):
8644/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8645///
8646/// Such that in all cases, c3+c2==bitwidth(op v c1).
8648 SDValue ExtractFrom, SDValue &Mask,
8649 const SDLoc &DL) {
8650 assert(OppShift && ExtractFrom && "Empty SDValue");
8651 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8652 return SDValue();
8653
8654 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8655
8656 // Value and Type of the shift.
8657 SDValue OppShiftLHS = OppShift.getOperand(0);
8658 EVT ShiftedVT = OppShiftLHS.getValueType();
8659
8660 // Amount of the existing shift.
8661 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8662
8663 // (add v v) -> (shl v 1)
8664 // TODO: Should this be a general DAG canonicalization?
8665 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8666 ExtractFrom.getOpcode() == ISD::ADD &&
8667 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8668 ExtractFrom.getOperand(0) == OppShiftLHS &&
8669 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8670 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8671 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8672
8673 // Preconditions:
8674 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8675 //
8676 // Find opcode of the needed shift to be extracted from (op0 v c0).
8677 unsigned Opcode = ISD::DELETED_NODE;
8678 bool IsMulOrDiv = false;
8679 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8680 // opcode or its arithmetic (mul or udiv) variant.
8681 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8682 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8683 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8684 return false;
8685 Opcode = NeededShift;
8686 return true;
8687 };
8688 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8689 // that the needed shift can be extracted from.
8690 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8691 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8692 return SDValue();
8693
8694 // op0 must be the same opcode on both sides, have the same LHS argument,
8695 // and produce the same value type.
8696 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8697 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8698 ShiftedVT != ExtractFrom.getValueType())
8699 return SDValue();
8700
8701 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8702 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8703 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8704 ConstantSDNode *ExtractFromCst =
8705 isConstOrConstSplat(ExtractFrom.getOperand(1));
8706 // TODO: We should be able to handle non-uniform constant vectors for these values
8707 // Check that we have constant values.
8708 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8709 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8710 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8711 return SDValue();
8712
8713 // Compute the shift amount we need to extract to complete the rotate.
8714 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8715 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8716 return SDValue();
8717 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8718 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8719 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8720 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8721 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8722
8723 // Now try extract the needed shift from the ExtractFrom op and see if the
8724 // result matches up with the existing shift's LHS op.
8725 if (IsMulOrDiv) {
8726 // Op to extract from is a mul or udiv by a constant.
8727 // Check:
8728 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8729 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8730 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8731 NeededShiftAmt.getZExtValue());
8732 APInt ResultAmt;
8733 APInt Rem;
8734 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8735 if (Rem != 0 || ResultAmt != OppLHSAmt)
8736 return SDValue();
8737 } else {
8738 // Op to extract from is a shift by a constant.
8739 // Check:
8740 // c2 - (bitwidth(op0 v c0) - c1) == c0
8741 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8742 ExtractFromAmt.getBitWidth()))
8743 return SDValue();
8744 }
8745
8746 // Return the expanded shift op that should allow a rotate to be formed.
8747 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8748 EVT ResVT = ExtractFrom.getValueType();
8749 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8750 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8751}
8752
8753// Return true if we can prove that, whenever Neg and Pos are both in the
8754// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8755// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8756//
8757// (or (shift1 X, Neg), (shift2 X, Pos))
8758//
8759// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8760// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8761// to consider shift amounts with defined behavior.
8762//
8763// The IsRotate flag should be set when the LHS of both shifts is the same.
8764// Otherwise if matching a general funnel shift, it should be clear.
8765static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8766 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8767 const auto &TLI = DAG.getTargetLoweringInfo();
8768 // If EltSize is a power of 2 then:
8769 //
8770 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8771 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8772 //
8773 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8774 // for the stronger condition:
8775 //
8776 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8777 //
8778 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8779 // we can just replace Neg with Neg' for the rest of the function.
8780 //
8781 // In other cases we check for the even stronger condition:
8782 //
8783 // Neg == EltSize - Pos [B]
8784 //
8785 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8786 // behavior if Pos == 0 (and consequently Neg == EltSize).
8787 //
8788 // We could actually use [A] whenever EltSize is a power of 2, but the
8789 // only extra cases that it would match are those uninteresting ones
8790 // where Neg and Pos are never in range at the same time. E.g. for
8791 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8792 // as well as (sub 32, Pos), but:
8793 //
8794 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8795 //
8796 // always invokes undefined behavior for 32-bit X.
8797 //
8798 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8799 // This allows us to peek through any operations that only affect Mask's
8800 // un-demanded bits.
8801 //
8802 // NOTE: We can only do this when matching operations which won't modify the
8803 // least Log2(EltSize) significant bits and not a general funnel shift.
8804 unsigned MaskLoBits = 0;
8805 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8806 unsigned Bits = Log2_64(EltSize);
8807 unsigned NegBits = Neg.getScalarValueSizeInBits();
8808 if (NegBits >= Bits) {
8809 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8810 if (SDValue Inner =
8812 Neg = Inner;
8813 MaskLoBits = Bits;
8814 }
8815 }
8816 }
8817
8818 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8819 if (Neg.getOpcode() != ISD::SUB)
8820 return false;
8822 if (!NegC)
8823 return false;
8824 SDValue NegOp1 = Neg.getOperand(1);
8825
8826 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8827 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8828 // are redundant for the purpose of the equality.
8829 if (MaskLoBits) {
8830 unsigned PosBits = Pos.getScalarValueSizeInBits();
8831 if (PosBits >= MaskLoBits) {
8832 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8833 if (SDValue Inner =
8835 Pos = Inner;
8836 }
8837 }
8838 }
8839
8840 // The condition we need is now:
8841 //
8842 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8843 //
8844 // If NegOp1 == Pos then we need:
8845 //
8846 // EltSize & Mask == NegC & Mask
8847 //
8848 // (because "x & Mask" is a truncation and distributes through subtraction).
8849 //
8850 // We also need to account for a potential truncation of NegOp1 if the amount
8851 // has already been legalized to a shift amount type.
8852 APInt Width;
8853 if ((Pos == NegOp1) ||
8854 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8855 Width = NegC->getAPIntValue();
8856
8857 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8858 // Then the condition we want to prove becomes:
8859 //
8860 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8861 //
8862 // which, again because "x & Mask" is a truncation, becomes:
8863 //
8864 // NegC & Mask == (EltSize - PosC) & Mask
8865 // EltSize & Mask == (NegC + PosC) & Mask
8866 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8867 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8868 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8869 else
8870 return false;
8871 } else
8872 return false;
8873
8874 // Now we just need to check that EltSize & Mask == Width & Mask.
8875 if (MaskLoBits)
8876 // EltSize & Mask is 0 since Mask is EltSize - 1.
8877 return Width.getLoBits(MaskLoBits) == 0;
8878 return Width == EltSize;
8879}
8880
8881// A subroutine of MatchRotate used once we have found an OR of two opposite
8882// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8883// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8884// former being preferred if supported. InnerPos and InnerNeg are Pos and
8885// Neg with outer conversions stripped away.
8886SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8887 SDValue Neg, SDValue InnerPos,
8888 SDValue InnerNeg, bool FromAdd,
8889 bool HasPos, unsigned PosOpcode,
8890 unsigned NegOpcode, const SDLoc &DL) {
8891 // fold (or/add (shl x, (*ext y)),
8892 // (srl x, (*ext (sub 32, y)))) ->
8893 // (rotl x, y) or (rotr x, (sub 32, y))
8894 //
8895 // fold (or/add (shl x, (*ext (sub 32, y))),
8896 // (srl x, (*ext y))) ->
8897 // (rotr x, y) or (rotl x, (sub 32, y))
8898 EVT VT = Shifted.getValueType();
8899 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8900 /*IsRotate*/ true, FromAdd))
8901 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8902 HasPos ? Pos : Neg);
8903
8904 return SDValue();
8905}
8906
8907// A subroutine of MatchRotate used once we have found an OR of two opposite
8908// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8909// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8910// former being preferred if supported. InnerPos and InnerNeg are Pos and
8911// Neg with outer conversions stripped away.
8912// TODO: Merge with MatchRotatePosNeg.
8913SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8914 SDValue Neg, SDValue InnerPos,
8915 SDValue InnerNeg, bool FromAdd,
8916 bool HasPos, unsigned PosOpcode,
8917 unsigned NegOpcode, const SDLoc &DL) {
8918 EVT VT = N0.getValueType();
8919 unsigned EltBits = VT.getScalarSizeInBits();
8920
8921 // fold (or/add (shl x0, (*ext y)),
8922 // (srl x1, (*ext (sub 32, y)))) ->
8923 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8924 //
8925 // fold (or/add (shl x0, (*ext (sub 32, y))),
8926 // (srl x1, (*ext y))) ->
8927 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8928 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8929 FromAdd))
8930 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8931 HasPos ? Pos : Neg);
8932
8933 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8934 // so for now just use the PosOpcode case if its legal.
8935 // TODO: When can we use the NegOpcode case?
8936 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8937 SDValue X;
8938 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8939 // -> (fshl x0, x1, y)
8940 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8941 sd_match(InnerNeg,
8942 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8944 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8945 }
8946
8947 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8948 // -> (fshr x0, x1, y)
8949 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8950 sd_match(InnerPos,
8951 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8953 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8954 }
8955
8956 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8957 // -> (fshr x0, x1, y)
8958 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8959 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8960 sd_match(InnerPos,
8961 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8963 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8964 }
8965 }
8966
8967 return SDValue();
8968}
8969
8970// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8971// many idioms for rotate, and if the target supports rotation instructions,
8972// generate a rot[lr]. This also matches funnel shift patterns, similar to
8973// rotation but with different shifted sources.
8974SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8975 bool FromAdd) {
8976 EVT VT = LHS.getValueType();
8977
8978 // The target must have at least one rotate/funnel flavor.
8979 // We still try to match rotate by constant pre-legalization.
8980 // TODO: Support pre-legalization funnel-shift by constant.
8981 bool HasROTL = hasOperation(ISD::ROTL, VT);
8982 bool HasROTR = hasOperation(ISD::ROTR, VT);
8983 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8984 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8985
8986 // If the type is going to be promoted and the target has enabled custom
8987 // lowering for rotate, allow matching rotate by non-constants. Only allow
8988 // this for scalar types.
8989 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8993 }
8994
8995 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8996 return SDValue();
8997
8998 // Check for truncated rotate.
8999 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
9000 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
9001 assert(LHS.getValueType() == RHS.getValueType());
9002 if (SDValue Rot =
9003 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
9004 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
9005 }
9006
9007 // Match "(X shl/srl V1) & V2" where V2 may not be present.
9008 SDValue LHSShift; // The shift.
9009 SDValue LHSMask; // AND value if any.
9010 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
9011
9012 SDValue RHSShift; // The shift.
9013 SDValue RHSMask; // AND value if any.
9014 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
9015
9016 // If neither side matched a rotate half, bail
9017 if (!LHSShift && !RHSShift)
9018 return SDValue();
9019
9020 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
9021 // side of the rotate, so try to handle that here. In all cases we need to
9022 // pass the matched shift from the opposite side to compute the opcode and
9023 // needed shift amount to extract. We still want to do this if both sides
9024 // matched a rotate half because one half may be a potential overshift that
9025 // can be broken down (ie if InstCombine merged two shl or srl ops into a
9026 // single one).
9027
9028 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
9029 if (LHSShift)
9030 if (SDValue NewRHSShift =
9031 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
9032 RHSShift = NewRHSShift;
9033 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
9034 if (RHSShift)
9035 if (SDValue NewLHSShift =
9036 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
9037 LHSShift = NewLHSShift;
9038
9039 // If a side is still missing, nothing else we can do.
9040 if (!RHSShift || !LHSShift)
9041 return SDValue();
9042
9043 // At this point we've matched or extracted a shift op on each side.
9044
9045 if (LHSShift.getOpcode() == RHSShift.getOpcode())
9046 return SDValue(); // Shifts must disagree.
9047
9048 // Canonicalize shl to left side in a shl/srl pair.
9049 if (RHSShift.getOpcode() == ISD::SHL) {
9050 std::swap(LHS, RHS);
9051 std::swap(LHSShift, RHSShift);
9052 std::swap(LHSMask, RHSMask);
9053 }
9054
9055 // Something has gone wrong - we've lost the shl/srl pair - bail.
9056 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
9057 return SDValue();
9058
9059 unsigned EltSizeInBits = VT.getScalarSizeInBits();
9060 SDValue LHSShiftArg = LHSShift.getOperand(0);
9061 SDValue LHSShiftAmt = LHSShift.getOperand(1);
9062 SDValue RHSShiftArg = RHSShift.getOperand(0);
9063 SDValue RHSShiftAmt = RHSShift.getOperand(1);
9064
9065 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9066 ConstantSDNode *RHS) {
9067 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9068 };
9069
9070 auto ApplyMasks = [&](SDValue Res) {
9071 // If there is an AND of either shifted operand, apply it to the result.
9072 if (LHSMask.getNode() || RHSMask.getNode()) {
9075
9076 if (LHSMask.getNode()) {
9077 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
9078 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9079 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
9080 }
9081 if (RHSMask.getNode()) {
9082 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
9083 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9084 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
9085 }
9086
9087 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
9088 }
9089
9090 return Res;
9091 };
9092
9093 // TODO: Support pre-legalization funnel-shift by constant.
9094 bool IsRotate = LHSShiftArg == RHSShiftArg;
9095 if (!IsRotate && !(HasFSHL || HasFSHR)) {
9096 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9097 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9098 // Look for a disguised rotate by constant.
9099 // The common shifted operand X may be hidden inside another 'or'.
9100 SDValue X, Y;
9101 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9102 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9103 return false;
9104 if (CommonOp == Or.getOperand(0)) {
9105 X = CommonOp;
9106 Y = Or.getOperand(1);
9107 return true;
9108 }
9109 if (CommonOp == Or.getOperand(1)) {
9110 X = CommonOp;
9111 Y = Or.getOperand(0);
9112 return true;
9113 }
9114 return false;
9115 };
9116
9117 SDValue Res;
9118 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9119 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9120 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9121 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9122 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9123 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9124 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9125 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9126 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9127 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9128 } else {
9129 return SDValue();
9130 }
9131
9132 return ApplyMasks(Res);
9133 }
9134
9135 return SDValue(); // Requires funnel shift support.
9136 }
9137
9138 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9139 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9140 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9141 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9142 // iff C1+C2 == EltSizeInBits
9143 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9144 SDValue Res;
9145 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9146 bool UseROTL = !LegalOperations || HasROTL;
9147 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9148 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9149 } else {
9150 bool UseFSHL = !LegalOperations || HasFSHL;
9151 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9152 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9153 }
9154
9155 return ApplyMasks(Res);
9156 }
9157
9158 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9159 // shift.
9160 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9161 return SDValue();
9162
9163 // If there is a mask here, and we have a variable shift, we can't be sure
9164 // that we're masking out the right stuff.
9165 if (LHSMask.getNode() || RHSMask.getNode())
9166 return SDValue();
9167
9168 // If the shift amount is sign/zext/any-extended just peel it off.
9169 SDValue LExtOp0 = LHSShiftAmt;
9170 SDValue RExtOp0 = RHSShiftAmt;
9171 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9172 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9173 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9174 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9175 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9176 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9177 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9178 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9179 LExtOp0 = LHSShiftAmt.getOperand(0);
9180 RExtOp0 = RHSShiftAmt.getOperand(0);
9181 }
9182
9183 if (IsRotate && (HasROTL || HasROTR)) {
9184 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9185 LExtOp0, RExtOp0, FromAdd, HasROTL,
9187 return TryL;
9188
9189 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9190 RExtOp0, LExtOp0, FromAdd, HasROTR,
9192 return TryR;
9193 }
9194
9195 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9196 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9197 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9198 return TryL;
9199
9200 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9201 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9202 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9203 return TryR;
9204
9205 return SDValue();
9206}
9207
9208/// Recursively traverses the expression calculating the origin of the requested
9209/// byte of the given value. Returns std::nullopt if the provider can't be
9210/// calculated.
9211///
9212/// For all the values except the root of the expression, we verify that the
9213/// value has exactly one use and if not then return std::nullopt. This way if
9214/// the origin of the byte is returned it's guaranteed that the values which
9215/// contribute to the byte are not used outside of this expression.
9216
9217/// However, there is a special case when dealing with vector loads -- we allow
9218/// more than one use if the load is a vector type. Since the values that
9219/// contribute to the byte ultimately come from the ExtractVectorElements of the
9220/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9221/// because those operations are independent from the pattern to be combined.
9222/// For vector loads, we simply care that the ByteProviders are adjacent
9223/// positions of the same vector, and their index matches the byte that is being
9224/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9225/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9226/// byte position we are trying to provide for the LoadCombine. If these do
9227/// not match, then we can not combine the vector loads. \p Index uses the
9228/// byte position we are trying to provide for and is matched against the
9229/// shl and load size. The \p Index algorithm ensures the requested byte is
9230/// provided for by the pattern, and the pattern does not over provide bytes.
9231///
9232///
9233/// The supported LoadCombine pattern for vector loads is as follows
9234/// or
9235/// / \
9236/// or shl
9237/// / \ |
9238/// or shl zext
9239/// / \ | |
9240/// shl zext zext EVE*
9241/// | | | |
9242/// zext EVE* EVE* LOAD
9243/// | | |
9244/// EVE* LOAD LOAD
9245/// |
9246/// LOAD
9247///
9248/// *ExtractVectorElement
9250
9251static std::optional<SDByteProvider>
9252calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9253 std::optional<uint64_t> VectorIndex,
9254 unsigned StartingIndex = 0) {
9255
9256 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9257 if (Depth == 10)
9258 return std::nullopt;
9259
9260 // Only allow multiple uses if the instruction is a vector load (in which
9261 // case we will use the load for every ExtractVectorElement)
9262 if (Depth && !Op.hasOneUse() &&
9263 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9264 return std::nullopt;
9265
9266 // Fail to combine if we have encountered anything but a LOAD after handling
9267 // an ExtractVectorElement.
9268 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9269 return std::nullopt;
9270
9271 unsigned BitWidth = Op.getScalarValueSizeInBits();
9272 if (BitWidth % 8 != 0)
9273 return std::nullopt;
9274 unsigned ByteWidth = BitWidth / 8;
9275 assert(Index < ByteWidth && "invalid index requested");
9276 (void) ByteWidth;
9277
9278 switch (Op.getOpcode()) {
9279 case ISD::OR: {
9280 auto LHS =
9281 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9282 if (!LHS)
9283 return std::nullopt;
9284 auto RHS =
9285 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9286 if (!RHS)
9287 return std::nullopt;
9288
9289 if (LHS->isConstantZero())
9290 return RHS;
9291 if (RHS->isConstantZero())
9292 return LHS;
9293 return std::nullopt;
9294 }
9295 case ISD::SHL: {
9296 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9297 if (!ShiftOp)
9298 return std::nullopt;
9299
9300 uint64_t BitShift = ShiftOp->getZExtValue();
9301
9302 if (BitShift % 8 != 0)
9303 return std::nullopt;
9304 uint64_t ByteShift = BitShift / 8;
9305
9306 // If we are shifting by an amount greater than the index we are trying to
9307 // provide, then do not provide anything. Otherwise, subtract the index by
9308 // the amount we shifted by.
9309 return Index < ByteShift
9311 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9312 Depth + 1, VectorIndex, Index);
9313 }
9314 case ISD::ANY_EXTEND:
9315 case ISD::SIGN_EXTEND:
9316 case ISD::ZERO_EXTEND: {
9317 SDValue NarrowOp = Op->getOperand(0);
9318 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9319 if (NarrowBitWidth % 8 != 0)
9320 return std::nullopt;
9321 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9322
9323 if (Index >= NarrowByteWidth)
9324 return Op.getOpcode() == ISD::ZERO_EXTEND
9325 ? std::optional<SDByteProvider>(
9327 : std::nullopt;
9328 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9329 StartingIndex);
9330 }
9331 case ISD::BSWAP:
9332 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9333 Depth + 1, VectorIndex, StartingIndex);
9335 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9336 if (!OffsetOp)
9337 return std::nullopt;
9338
9339 VectorIndex = OffsetOp->getZExtValue();
9340
9341 SDValue NarrowOp = Op->getOperand(0);
9342 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9343 if (NarrowBitWidth % 8 != 0)
9344 return std::nullopt;
9345 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9346 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9347 // type, leaving the high bits undefined.
9348 if (Index >= NarrowByteWidth)
9349 return std::nullopt;
9350
9351 // Check to see if the position of the element in the vector corresponds
9352 // with the byte we are trying to provide for. In the case of a vector of
9353 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9354 // the element will provide a range of bytes. For example, if we have a
9355 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9356 // 3).
9357 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9358 return std::nullopt;
9359 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9360 return std::nullopt;
9361
9362 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9363 VectorIndex, StartingIndex);
9364 }
9365 case ISD::LOAD: {
9366 auto L = cast<LoadSDNode>(Op.getNode());
9367 if (!L->isSimple() || L->isIndexed())
9368 return std::nullopt;
9369
9370 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9371 if (NarrowBitWidth % 8 != 0)
9372 return std::nullopt;
9373 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9374
9375 // If the width of the load does not reach byte we are trying to provide for
9376 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9377 // question
9378 if (Index >= NarrowByteWidth)
9379 return L->getExtensionType() == ISD::ZEXTLOAD
9380 ? std::optional<SDByteProvider>(
9382 : std::nullopt;
9383
9384 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9385 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9386 }
9387 }
9388
9389 return std::nullopt;
9390}
9391
9392static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9393 return i;
9394}
9395
9396static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9397 return BW - i - 1;
9398}
9399
9400// Check if the bytes offsets we are looking at match with either big or
9401// little endian value loaded. Return true for big endian, false for little
9402// endian, and std::nullopt if match failed.
9403static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
9404 int64_t FirstOffset) {
9405 // The endian can be decided only when it is 2 bytes at least.
9406 unsigned Width = ByteOffsets.size();
9407 if (Width < 2)
9408 return std::nullopt;
9409
9410 bool BigEndian = true, LittleEndian = true;
9411 for (unsigned i = 0; i < Width; i++) {
9412 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9413 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9414 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9415 if (!BigEndian && !LittleEndian)
9416 return std::nullopt;
9417 }
9418
9419 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9420 "little endian");
9421 return BigEndian;
9422}
9423
9424// Look through one layer of truncate or extend.
9426 switch (Value.getOpcode()) {
9427 case ISD::TRUNCATE:
9428 case ISD::ZERO_EXTEND:
9429 case ISD::SIGN_EXTEND:
9430 case ISD::ANY_EXTEND:
9431 return Value.getOperand(0);
9432 }
9433 return SDValue();
9434}
9435
9436/// Match a pattern where a wide type scalar value is stored by several narrow
9437/// stores. Fold it into a single store or a BSWAP and a store if the targets
9438/// supports it.
9439///
9440/// Assuming little endian target:
9441/// i8 *p = ...
9442/// i32 val = ...
9443/// p[0] = (val >> 0) & 0xFF;
9444/// p[1] = (val >> 8) & 0xFF;
9445/// p[2] = (val >> 16) & 0xFF;
9446/// p[3] = (val >> 24) & 0xFF;
9447/// =>
9448/// *((i32)p) = val;
9449///
9450/// i8 *p = ...
9451/// i32 val = ...
9452/// p[0] = (val >> 24) & 0xFF;
9453/// p[1] = (val >> 16) & 0xFF;
9454/// p[2] = (val >> 8) & 0xFF;
9455/// p[3] = (val >> 0) & 0xFF;
9456/// =>
9457/// *((i32)p) = BSWAP(val);
9458SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9459 // The matching looks for "store (trunc x)" patterns that appear early but are
9460 // likely to be replaced by truncating store nodes during combining.
9461 // TODO: If there is evidence that running this later would help, this
9462 // limitation could be removed. Legality checks may need to be added
9463 // for the created store and optional bswap/rotate.
9464 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9465 return SDValue();
9466
9467 // We only handle merging simple stores of 1-4 bytes.
9468 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9469 EVT MemVT = N->getMemoryVT();
9470 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9471 !N->isSimple() || N->isIndexed())
9472 return SDValue();
9473
9474 // Collect all of the stores in the chain, upto the maximum store width (i64).
9475 SDValue Chain = N->getChain();
9477 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9478 unsigned MaxWideNumBits = 64;
9479 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9480 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9481 // All stores must be the same size to ensure that we are writing all of the
9482 // bytes in the wide value.
9483 // This store should have exactly one use as a chain operand for another
9484 // store in the merging set. If there are other chain uses, then the
9485 // transform may not be safe because order of loads/stores outside of this
9486 // set may not be preserved.
9487 // TODO: We could allow multiple sizes by tracking each stored byte.
9488 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9489 Store->isIndexed() || !Store->hasOneUse())
9490 return SDValue();
9491 Stores.push_back(Store);
9492 Chain = Store->getChain();
9493 if (MaxStores < Stores.size())
9494 return SDValue();
9495 }
9496 // There is no reason to continue if we do not have at least a pair of stores.
9497 if (Stores.size() < 2)
9498 return SDValue();
9499
9500 // Handle simple types only.
9501 LLVMContext &Context = *DAG.getContext();
9502 unsigned NumStores = Stores.size();
9503 unsigned WideNumBits = NumStores * NarrowNumBits;
9504 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9505 return SDValue();
9506
9507 // Check if all bytes of the source value that we are looking at are stored
9508 // to the same base address. Collect offsets from Base address into OffsetMap.
9509 SDValue SourceValue;
9510 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9511 int64_t FirstOffset = INT64_MAX;
9512 StoreSDNode *FirstStore = nullptr;
9513 std::optional<BaseIndexOffset> Base;
9514 for (auto *Store : Stores) {
9515 // All the stores store different parts of the CombinedValue. A truncate is
9516 // required to get the partial value.
9517 SDValue Trunc = Store->getValue();
9518 if (Trunc.getOpcode() != ISD::TRUNCATE)
9519 return SDValue();
9520 // Other than the first/last part, a shift operation is required to get the
9521 // offset.
9522 int64_t Offset = 0;
9523 SDValue WideVal = Trunc.getOperand(0);
9524 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9525 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9526 // The shift amount must be a constant multiple of the narrow type.
9527 // It is translated to the offset address in the wide source value "y".
9528 //
9529 // x = srl y, ShiftAmtC
9530 // i8 z = trunc x
9531 // store z, ...
9532 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9533 if (ShiftAmtC % NarrowNumBits != 0)
9534 return SDValue();
9535
9536 // Make sure we aren't reading bits that are shifted in.
9537 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9538 return SDValue();
9539
9540 Offset = ShiftAmtC / NarrowNumBits;
9541 WideVal = WideVal.getOperand(0);
9542 }
9543
9544 // Stores must share the same source value with different offsets.
9545 if (!SourceValue)
9546 SourceValue = WideVal;
9547 else if (SourceValue != WideVal) {
9548 // Truncate and extends can be stripped to see if the values are related.
9549 if (stripTruncAndExt(SourceValue) != WideVal &&
9550 stripTruncAndExt(WideVal) != SourceValue)
9551 return SDValue();
9552
9553 if (WideVal.getScalarValueSizeInBits() >
9554 SourceValue.getScalarValueSizeInBits())
9555 SourceValue = WideVal;
9556
9557 // Give up if the source value type is smaller than the store size.
9558 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9559 return SDValue();
9560 }
9561
9562 // Stores must share the same base address.
9563 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9564 int64_t ByteOffsetFromBase = 0;
9565 if (!Base)
9566 Base = Ptr;
9567 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9568 return SDValue();
9569
9570 // Remember the first store.
9571 if (ByteOffsetFromBase < FirstOffset) {
9572 FirstStore = Store;
9573 FirstOffset = ByteOffsetFromBase;
9574 }
9575 // Map the offset in the store and the offset in the combined value, and
9576 // early return if it has been set before.
9577 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9578 return SDValue();
9579 OffsetMap[Offset] = ByteOffsetFromBase;
9580 }
9581
9582 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9583
9584 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9585 assert(FirstStore && "First store must be set");
9586
9587 // Check that a store of the wide type is both allowed and fast on the target
9588 const DataLayout &Layout = DAG.getDataLayout();
9589 unsigned Fast = 0;
9590 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9591 *FirstStore->getMemOperand(), &Fast);
9592 if (!Allowed || !Fast)
9593 return SDValue();
9594
9595 // Check if the pieces of the value are going to the expected places in memory
9596 // to merge the stores.
9597 auto checkOffsets = [&](bool MatchLittleEndian) {
9598 if (MatchLittleEndian) {
9599 for (unsigned i = 0; i != NumStores; ++i)
9600 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9601 return false;
9602 } else { // MatchBigEndian by reversing loop counter.
9603 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9604 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9605 return false;
9606 }
9607 return true;
9608 };
9609
9610 // Check if the offsets line up for the native data layout of this target.
9611 bool NeedBswap = false;
9612 bool NeedRotate = false;
9613 if (!checkOffsets(Layout.isLittleEndian())) {
9614 // Special-case: check if byte offsets line up for the opposite endian.
9615 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9616 NeedBswap = true;
9617 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9618 NeedRotate = true;
9619 else
9620 return SDValue();
9621 }
9622
9623 SDLoc DL(N);
9624 if (WideVT != SourceValue.getValueType()) {
9625 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9626 "Unexpected store value to merge");
9627 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9628 }
9629
9630 // Before legalize we can introduce illegal bswaps/rotates which will be later
9631 // converted to an explicit bswap sequence. This way we end up with a single
9632 // store and byte shuffling instead of several stores and byte shuffling.
9633 if (NeedBswap) {
9634 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9635 } else if (NeedRotate) {
9636 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9637 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9638 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9639 }
9640
9641 SDValue NewStore =
9642 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9643 FirstStore->getPointerInfo(), FirstStore->getAlign());
9644
9645 // Rely on other DAG combine rules to remove the other individual stores.
9646 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9647 return NewStore;
9648}
9649
9650/// Match a pattern where a wide type scalar value is loaded by several narrow
9651/// loads and combined by shifts and ors. Fold it into a single load or a load
9652/// and a BSWAP if the targets supports it.
9653///
9654/// Assuming little endian target:
9655/// i8 *a = ...
9656/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9657/// =>
9658/// i32 val = *((i32)a)
9659///
9660/// i8 *a = ...
9661/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9662/// =>
9663/// i32 val = BSWAP(*((i32)a))
9664///
9665/// TODO: This rule matches complex patterns with OR node roots and doesn't
9666/// interact well with the worklist mechanism. When a part of the pattern is
9667/// updated (e.g. one of the loads) its direct users are put into the worklist,
9668/// but the root node of the pattern which triggers the load combine is not
9669/// necessarily a direct user of the changed node. For example, once the address
9670/// of t28 load is reassociated load combine won't be triggered:
9671/// t25: i32 = add t4, Constant:i32<2>
9672/// t26: i64 = sign_extend t25
9673/// t27: i64 = add t2, t26
9674/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9675/// t29: i32 = zero_extend t28
9676/// t32: i32 = shl t29, Constant:i8<8>
9677/// t33: i32 = or t23, t32
9678/// As a possible fix visitLoad can check if the load can be a part of a load
9679/// combine pattern and add corresponding OR roots to the worklist.
9680SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9681 assert(N->getOpcode() == ISD::OR &&
9682 "Can only match load combining against OR nodes");
9683
9684 // Handles simple types only
9685 EVT VT = N->getValueType(0);
9686 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9687 return SDValue();
9688 unsigned ByteWidth = VT.getSizeInBits() / 8;
9689
9690 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9691 auto MemoryByteOffset = [&](SDByteProvider P) {
9692 assert(P.hasSrc() && "Must be a memory byte provider");
9693 auto *Load = cast<LoadSDNode>(P.Src.value());
9694
9695 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9696
9697 assert(LoadBitWidth % 8 == 0 &&
9698 "can only analyze providers for individual bytes not bit");
9699 unsigned LoadByteWidth = LoadBitWidth / 8;
9700 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9701 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9702 };
9703
9704 std::optional<BaseIndexOffset> Base;
9705 SDValue Chain;
9706
9707 SmallPtrSet<LoadSDNode *, 8> Loads;
9708 std::optional<SDByteProvider> FirstByteProvider;
9709 int64_t FirstOffset = INT64_MAX;
9710
9711 // Check if all the bytes of the OR we are looking at are loaded from the same
9712 // base address. Collect bytes offsets from Base address in ByteOffsets.
9713 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9714 unsigned ZeroExtendedBytes = 0;
9715 for (int i = ByteWidth - 1; i >= 0; --i) {
9716 auto P =
9717 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9718 /*StartingIndex*/ i);
9719 if (!P)
9720 return SDValue();
9721
9722 if (P->isConstantZero()) {
9723 // It's OK for the N most significant bytes to be 0, we can just
9724 // zero-extend the load.
9725 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9726 return SDValue();
9727 continue;
9728 }
9729 assert(P->hasSrc() && "provenance should either be memory or zero");
9730 auto *L = cast<LoadSDNode>(P->Src.value());
9731
9732 // All loads must share the same chain
9733 SDValue LChain = L->getChain();
9734 if (!Chain)
9735 Chain = LChain;
9736 else if (Chain != LChain)
9737 return SDValue();
9738
9739 // Loads must share the same base address
9740 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9741 int64_t ByteOffsetFromBase = 0;
9742
9743 // For vector loads, the expected load combine pattern will have an
9744 // ExtractElement for each index in the vector. While each of these
9745 // ExtractElements will be accessing the same base address as determined
9746 // by the load instruction, the actual bytes they interact with will differ
9747 // due to different ExtractElement indices. To accurately determine the
9748 // byte position of an ExtractElement, we offset the base load ptr with
9749 // the index multiplied by the byte size of each element in the vector.
9750 if (L->getMemoryVT().isVector()) {
9751 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9752 if (LoadWidthInBit % 8 != 0)
9753 return SDValue();
9754 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9755 Ptr.addToOffset(ByteOffsetFromVector);
9756 }
9757
9758 if (!Base)
9759 Base = Ptr;
9760
9761 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9762 return SDValue();
9763
9764 // Calculate the offset of the current byte from the base address
9765 ByteOffsetFromBase += MemoryByteOffset(*P);
9766 ByteOffsets[i] = ByteOffsetFromBase;
9767
9768 // Remember the first byte load
9769 if (ByteOffsetFromBase < FirstOffset) {
9770 FirstByteProvider = P;
9771 FirstOffset = ByteOffsetFromBase;
9772 }
9773
9774 Loads.insert(L);
9775 }
9776
9777 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9778 "memory, so there must be at least one load which produces the value");
9779 assert(Base && "Base address of the accessed memory location must be set");
9780 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9781
9782 bool NeedsZext = ZeroExtendedBytes > 0;
9783
9784 EVT MemVT =
9785 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9786
9787 if (!MemVT.isSimple())
9788 return SDValue();
9789
9790 // Before legalize we can introduce too wide illegal loads which will be later
9791 // split into legal sized loads. This enables us to combine i64 load by i8
9792 // patterns to a couple of i32 loads on 32 bit targets.
9793 if (LegalOperations &&
9794 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9795 MemVT))
9796 return SDValue();
9797
9798 // Check if the bytes of the OR we are looking at match with either big or
9799 // little endian value load
9800 std::optional<bool> IsBigEndian = isBigEndian(
9801 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9802 if (!IsBigEndian)
9803 return SDValue();
9804
9805 assert(FirstByteProvider && "must be set");
9806
9807 // Ensure that the first byte is loaded from zero offset of the first load.
9808 // So the combined value can be loaded from the first load address.
9809 if (MemoryByteOffset(*FirstByteProvider) != 0)
9810 return SDValue();
9811 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9812
9813 // The node we are looking at matches with the pattern, check if we can
9814 // replace it with a single (possibly zero-extended) load and bswap + shift if
9815 // needed.
9816
9817 // If the load needs byte swap check if the target supports it
9818 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9819
9820 // Before legalize we can introduce illegal bswaps which will be later
9821 // converted to an explicit bswap sequence. This way we end up with a single
9822 // load and byte shuffling instead of several loads and byte shuffling.
9823 // We do not introduce illegal bswaps when zero-extending as this tends to
9824 // introduce too many arithmetic instructions.
9825 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9826 !TLI.isOperationLegal(ISD::BSWAP, VT))
9827 return SDValue();
9828
9829 // If we need to bswap and zero extend, we have to insert a shift. Check that
9830 // it is legal.
9831 if (NeedsBswap && NeedsZext && LegalOperations &&
9832 !TLI.isOperationLegal(ISD::SHL, VT))
9833 return SDValue();
9834
9835 // Check that a load of the wide type is both allowed and fast on the target
9836 unsigned Fast = 0;
9837 bool Allowed =
9838 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9839 *FirstLoad->getMemOperand(), &Fast);
9840 if (!Allowed || !Fast)
9841 return SDValue();
9842
9843 SDValue NewLoad =
9844 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9845 Chain, FirstLoad->getBasePtr(),
9846 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9847
9848 // Transfer chain users from old loads to the new load.
9849 for (LoadSDNode *L : Loads)
9850 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9851
9852 if (!NeedsBswap)
9853 return NewLoad;
9854
9855 SDValue ShiftedLoad =
9856 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9857 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9858 VT, SDLoc(N)))
9859 : NewLoad;
9860 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9861}
9862
9863// If the target has andn, bsl, or a similar bit-select instruction,
9864// we want to unfold masked merge, with canonical pattern of:
9865// | A | |B|
9866// ((x ^ y) & m) ^ y
9867// | D |
9868// Into:
9869// (x & m) | (y & ~m)
9870// If y is a constant, m is not a 'not', and the 'andn' does not work with
9871// immediates, we unfold into a different pattern:
9872// ~(~x & m) & (m | y)
9873// If x is a constant, m is a 'not', and the 'andn' does not work with
9874// immediates, we unfold into a different pattern:
9875// (x | ~m) & ~(~m & ~y)
9876// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9877// the very least that breaks andnpd / andnps patterns, and because those
9878// patterns are simplified in IR and shouldn't be created in the DAG
9879SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9880 assert(N->getOpcode() == ISD::XOR);
9881
9882 // Don't touch 'not' (i.e. where y = -1).
9883 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9884 return SDValue();
9885
9886 EVT VT = N->getValueType(0);
9887
9888 // There are 3 commutable operators in the pattern,
9889 // so we have to deal with 8 possible variants of the basic pattern.
9890 SDValue X, Y, M;
9891 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9892 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9893 return false;
9894 SDValue Xor = And.getOperand(XorIdx);
9895 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9896 return false;
9897 SDValue Xor0 = Xor.getOperand(0);
9898 SDValue Xor1 = Xor.getOperand(1);
9899 // Don't touch 'not' (i.e. where y = -1).
9900 if (isAllOnesOrAllOnesSplat(Xor1))
9901 return false;
9902 if (Other == Xor0)
9903 std::swap(Xor0, Xor1);
9904 if (Other != Xor1)
9905 return false;
9906 X = Xor0;
9907 Y = Xor1;
9908 M = And.getOperand(XorIdx ? 0 : 1);
9909 return true;
9910 };
9911
9912 SDValue N0 = N->getOperand(0);
9913 SDValue N1 = N->getOperand(1);
9914 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9915 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9916 return SDValue();
9917
9918 // Don't do anything if the mask is constant. This should not be reachable.
9919 // InstCombine should have already unfolded this pattern, and DAGCombiner
9920 // probably shouldn't produce it, too.
9921 if (isa<ConstantSDNode>(M.getNode()))
9922 return SDValue();
9923
9924 // We can transform if the target has AndNot
9925 if (!TLI.hasAndNot(M))
9926 return SDValue();
9927
9928 SDLoc DL(N);
9929
9930 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9931 // a bitwise not that would already allow ANDN to be used.
9932 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9933 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9934 // If not, we need to do a bit more work to make sure andn is still used.
9935 SDValue NotX = DAG.getNOT(DL, X, VT);
9936 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9937 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9938 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9939 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9940 }
9941
9942 // If X is a constant and M is a bitwise not, check that 'andn' works with
9943 // immediates.
9944 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9945 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9946 // If not, we need to do a bit more work to make sure andn is still used.
9947 SDValue NotM = M.getOperand(0);
9948 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9949 SDValue NotY = DAG.getNOT(DL, Y, VT);
9950 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9951 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9952 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9953 }
9954
9955 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9956 SDValue NotM = DAG.getNOT(DL, M, VT);
9957 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9958
9959 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9960}
9961
9962SDValue DAGCombiner::visitXOR(SDNode *N) {
9963 SDValue N0 = N->getOperand(0);
9964 SDValue N1 = N->getOperand(1);
9965 EVT VT = N0.getValueType();
9966 SDLoc DL(N);
9967
9968 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9969 if (N0.isUndef() && N1.isUndef())
9970 return DAG.getConstant(0, DL, VT);
9971
9972 // fold (xor x, undef) -> undef
9973 if (N0.isUndef())
9974 return N0;
9975 if (N1.isUndef())
9976 return N1;
9977
9978 // fold (xor c1, c2) -> c1^c2
9979 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9980 return C;
9981
9982 // canonicalize constant to RHS
9985 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9986
9987 // fold vector ops
9988 if (VT.isVector()) {
9989 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9990 return FoldedVOp;
9991
9992 // fold (xor x, 0) -> x, vector edition
9994 return N0;
9995 }
9996
9997 // fold (xor x, 0) -> x
9998 if (isNullConstant(N1))
9999 return N0;
10000
10001 if (SDValue NewSel = foldBinOpIntoSelect(N))
10002 return NewSel;
10003
10004 // reassociate xor
10005 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
10006 return RXOR;
10007
10008 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
10009 if (SDValue SD =
10010 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
10011 return SD;
10012
10013 // fold (a^b) -> (a|b) iff a and b share no bits.
10014 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
10015 DAG.haveNoCommonBitsSet(N0, N1))
10016 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
10017
10018 // look for 'add-like' folds:
10019 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
10020 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
10022 if (SDValue Combined = visitADDLike(N))
10023 return Combined;
10024
10025 // fold not (setcc x, y, cc) -> setcc x y !cc
10026 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
10027 unsigned N0Opcode = N0.getOpcode();
10028 SDValue LHS, RHS, CC;
10029 if (TLI.isConstTrueVal(N1) &&
10030 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
10031 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
10032 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
10034 LHS.getValueType());
10035 if (!LegalOperations ||
10036 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
10037 switch (N0Opcode) {
10038 default:
10039 llvm_unreachable("Unhandled SetCC Equivalent!");
10040 case ISD::SETCC:
10041 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
10042 case ISD::SELECT_CC:
10043 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
10044 N0.getOperand(3), NotCC);
10045 case ISD::STRICT_FSETCC:
10046 case ISD::STRICT_FSETCCS: {
10047 if (N0.hasOneUse()) {
10048 // FIXME Can we handle multiple uses? Could we token factor the chain
10049 // results from the new/old setcc?
10050 SDValue SetCC =
10051 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
10052 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
10053 CombineTo(N, SetCC);
10054 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
10055 recursivelyDeleteUnusedNodes(N0.getNode());
10056 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10057 }
10058 break;
10059 }
10060 }
10061 }
10062 }
10063
10064 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10065 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10066 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
10067 SDValue V = N0.getOperand(0);
10068 SDLoc DL0(N0);
10069 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
10070 DAG.getConstant(1, DL0, V.getValueType()));
10071 AddToWorklist(V.getNode());
10072 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
10073 }
10074
10075 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10076 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10077 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
10078 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10079 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10080 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
10081 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10082 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10083 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10084 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10085 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10086 }
10087 }
10088 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10089 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10090 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
10091 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10092 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10093 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
10094 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10095 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10096 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10097 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10098 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10099 }
10100 }
10101
10102 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10103 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10104 SDValue Y = N0.getOperand(0);
10105 SDValue X = N0.getOperand(1);
10106
10107 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10108 APInt NotYValue = ~YConst->getAPIntValue();
10109 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10110 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10111 }
10112 }
10113
10114 // fold (not (add X, -1)) -> (neg X)
10115 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10117 return DAG.getNegative(N0.getOperand(0), DL, VT);
10118 }
10119
10120 // fold (xor (and x, y), y) -> (and (not x), y)
10121 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10122 SDValue X = N0.getOperand(0);
10123 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10124 AddToWorklist(NotX.getNode());
10125 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10126 }
10127
10128 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10129 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10130 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10131 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10132 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10133 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10134 SDValue S0 = S.getOperand(0);
10135 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10136 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10137 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10138 return DAG.getNode(ISD::ABS, DL, VT, S0);
10139 }
10140 }
10141
10142 // fold (xor x, x) -> 0
10143 if (N0 == N1)
10144 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10145
10146 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10147 // Here is a concrete example of this equivalence:
10148 // i16 x == 14
10149 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10150 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10151 //
10152 // =>
10153 //
10154 // i16 ~1 == 0b1111111111111110
10155 // i16 rol(~1, 14) == 0b1011111111111111
10156 //
10157 // Some additional tips to help conceptualize this transform:
10158 // - Try to see the operation as placing a single zero in a value of all ones.
10159 // - There exists no value for x which would allow the result to contain zero.
10160 // - Values of x larger than the bitwidth are undefined and do not require a
10161 // consistent result.
10162 // - Pushing the zero left requires shifting one bits in from the right.
10163 // A rotate left of ~1 is a nice way of achieving the desired result.
10164 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10166 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10167 N0.getOperand(1));
10168 }
10169
10170 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10171 if (N0Opcode == N1.getOpcode())
10172 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10173 return V;
10174
10175 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10176 return R;
10177 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10178 return R;
10179 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10180 return R;
10181
10182 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10183 if (SDValue MM = unfoldMaskedMerge(N))
10184 return MM;
10185
10186 // Simplify the expression using non-local knowledge.
10188 return SDValue(N, 0);
10189
10190 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10191 return Combined;
10192
10193 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10194 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10195 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10196 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10197 SDValue Op0;
10198 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10199 m_SMax(m_Value(Op0), m_Specific(N1)),
10200 m_UMin(m_Value(Op0), m_Specific(N1)),
10201 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10202
10203 if (isa<ConstantSDNode>(N1) ||
10205 // For vectors, only optimize when the constant is zero or all-ones to
10206 // avoid generating more instructions
10207 if (VT.isVector()) {
10208 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10209 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10210 return SDValue();
10211 }
10212
10213 // Avoid the fold if the minmax operation is legal and select is expensive
10214 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10216 return SDValue();
10217
10218 EVT CCVT = getSetCCResultType(VT);
10219 ISD::CondCode CC;
10220 switch (N0.getOpcode()) {
10221 case ISD::SMIN:
10222 CC = ISD::SETLT;
10223 break;
10224 case ISD::SMAX:
10225 CC = ISD::SETGT;
10226 break;
10227 case ISD::UMIN:
10228 CC = ISD::SETULT;
10229 break;
10230 case ISD::UMAX:
10231 CC = ISD::SETUGT;
10232 break;
10233 }
10234 SDValue FN1 = DAG.getFreeze(N1);
10235 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10236 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10237 SDValue Zero = DAG.getConstant(0, DL, VT);
10238 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10239 }
10240 }
10241
10242 return SDValue();
10243}
10244
10245/// If we have a shift-by-constant of a bitwise logic op that itself has a
10246/// shift-by-constant operand with identical opcode, we may be able to convert
10247/// that into 2 independent shifts followed by the logic op. This is a
10248/// throughput improvement.
10250 // Match a one-use bitwise logic op.
10251 SDValue LogicOp = Shift->getOperand(0);
10252 if (!LogicOp.hasOneUse())
10253 return SDValue();
10254
10255 unsigned LogicOpcode = LogicOp.getOpcode();
10256 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10257 LogicOpcode != ISD::XOR)
10258 return SDValue();
10259
10260 // Find a matching one-use shift by constant.
10261 unsigned ShiftOpcode = Shift->getOpcode();
10262 SDValue C1 = Shift->getOperand(1);
10263 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10264 assert(C1Node && "Expected a shift with constant operand");
10265 const APInt &C1Val = C1Node->getAPIntValue();
10266 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10267 const APInt *&ShiftAmtVal) {
10268 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10269 return false;
10270
10271 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10272 if (!ShiftCNode)
10273 return false;
10274
10275 // Capture the shifted operand and shift amount value.
10276 ShiftOp = V.getOperand(0);
10277 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10278
10279 // Shift amount types do not have to match their operand type, so check that
10280 // the constants are the same width.
10281 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10282 return false;
10283
10284 // The fold is not valid if the sum of the shift values doesn't fit in the
10285 // given shift amount type.
10286 bool Overflow = false;
10287 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10288 if (Overflow)
10289 return false;
10290
10291 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10292 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10293 return false;
10294
10295 return true;
10296 };
10297
10298 // Logic ops are commutative, so check each operand for a match.
10299 SDValue X, Y;
10300 const APInt *C0Val;
10301 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10302 Y = LogicOp.getOperand(1);
10303 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10304 Y = LogicOp.getOperand(0);
10305 else
10306 return SDValue();
10307
10308 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10309 SDLoc DL(Shift);
10310 EVT VT = Shift->getValueType(0);
10311 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10312 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10313 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10314 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10315 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10316 LogicOp->getFlags());
10317}
10318
10319/// Handle transforms common to the three shifts, when the shift amount is a
10320/// constant.
10321/// We are looking for: (shift being one of shl/sra/srl)
10322/// shift (binop X, C0), C1
10323/// And want to transform into:
10324/// binop (shift X, C1), (shift C0, C1)
10325SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10326 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10327
10328 // Do not turn a 'not' into a regular xor.
10329 if (isBitwiseNot(N->getOperand(0)))
10330 return SDValue();
10331
10332 // The inner binop must be one-use, since we want to replace it.
10333 SDValue LHS = N->getOperand(0);
10334 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10335 return SDValue();
10336
10337 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10338 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10339 return R;
10340
10341 // We want to pull some binops through shifts, so that we have (and (shift))
10342 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10343 // thing happens with address calculations, so it's important to canonicalize
10344 // it.
10345 switch (LHS.getOpcode()) {
10346 default:
10347 return SDValue();
10348 case ISD::OR:
10349 case ISD::XOR:
10350 case ISD::AND:
10351 break;
10352 case ISD::ADD:
10353 if (N->getOpcode() != ISD::SHL)
10354 return SDValue(); // only shl(add) not sr[al](add).
10355 break;
10356 }
10357
10358 // FIXME: disable this unless the input to the binop is a shift by a constant
10359 // or is copy/select. Enable this in other cases when figure out it's exactly
10360 // profitable.
10361 SDValue BinOpLHSVal = LHS.getOperand(0);
10362 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10363 BinOpLHSVal.getOpcode() == ISD::SRA ||
10364 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10365 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10366 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10367 BinOpLHSVal.getOpcode() == ISD::SELECT;
10368
10369 if (!IsShiftByConstant && !IsCopyOrSelect)
10370 return SDValue();
10371
10372 if (IsCopyOrSelect && N->hasOneUse())
10373 return SDValue();
10374
10375 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10376 SDLoc DL(N);
10377 EVT VT = N->getValueType(0);
10378 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10379 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10380 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10381 N->getOperand(1));
10382 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10383 }
10384
10385 return SDValue();
10386}
10387
10388SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10389 assert(N->getOpcode() == ISD::TRUNCATE);
10390 assert(N->getOperand(0).getOpcode() == ISD::AND);
10391
10392 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10393 EVT TruncVT = N->getValueType(0);
10394 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10395 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10396 SDValue N01 = N->getOperand(0).getOperand(1);
10397 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10398 SDLoc DL(N);
10399 SDValue N00 = N->getOperand(0).getOperand(0);
10400 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10401 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10402 AddToWorklist(Trunc00.getNode());
10403 AddToWorklist(Trunc01.getNode());
10404 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10405 }
10406 }
10407
10408 return SDValue();
10409}
10410
10411SDValue DAGCombiner::visitRotate(SDNode *N) {
10412 SDLoc dl(N);
10413 SDValue N0 = N->getOperand(0);
10414 SDValue N1 = N->getOperand(1);
10415 EVT VT = N->getValueType(0);
10416 unsigned Bitsize = VT.getScalarSizeInBits();
10417
10418 // fold (rot x, 0) -> x
10419 if (isNullOrNullSplat(N1))
10420 return N0;
10421
10422 // fold (rot x, c) -> x iff (c % BitSize) == 0
10423 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10424 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10425 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10426 return N0;
10427 }
10428
10429 // fold (rot x, c) -> (rot x, c % BitSize)
10430 bool OutOfRange = false;
10431 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10432 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10433 return true;
10434 };
10435 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10436 EVT AmtVT = N1.getValueType();
10437 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10438 if (SDValue Amt =
10439 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10440 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10441 }
10442
10443 // rot i16 X, 8 --> bswap X
10444 auto *RotAmtC = isConstOrConstSplat(N1);
10445 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10446 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10447 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10448
10449 // Simplify the operands using demanded-bits information.
10451 return SDValue(N, 0);
10452
10453 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10454 if (N1.getOpcode() == ISD::TRUNCATE &&
10455 N1.getOperand(0).getOpcode() == ISD::AND) {
10456 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10457 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10458 }
10459
10460 unsigned NextOp = N0.getOpcode();
10461
10462 // fold (rot* (rot* x, c2), c1)
10463 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10464 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10465 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10467 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10468 EVT ShiftVT = N1.getValueType();
10469 bool SameSide = (N->getOpcode() == NextOp);
10470 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10471 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10472 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10473 {N1, BitsizeC});
10474 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10475 {N0.getOperand(1), BitsizeC});
10476 if (Norm1 && Norm2)
10477 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10478 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10479 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10480 {CombinedShift, BitsizeC});
10481 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10482 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10483 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10484 CombinedShiftNorm);
10485 }
10486 }
10487 }
10488 return SDValue();
10489}
10490
10491SDValue DAGCombiner::visitSHL(SDNode *N) {
10492 SDValue N0 = N->getOperand(0);
10493 SDValue N1 = N->getOperand(1);
10494 if (SDValue V = DAG.simplifyShift(N0, N1))
10495 return V;
10496
10497 SDLoc DL(N);
10498 EVT VT = N0.getValueType();
10499 EVT ShiftVT = N1.getValueType();
10500 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10501
10502 // fold (shl c1, c2) -> c1<<c2
10503 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10504 return C;
10505
10506 // fold vector ops
10507 if (VT.isVector()) {
10508 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10509 return FoldedVOp;
10510
10511 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10512 // If setcc produces all-one true value then:
10513 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10514 if (N1CV && N1CV->isConstant()) {
10515 if (N0.getOpcode() == ISD::AND) {
10516 SDValue N00 = N0->getOperand(0);
10517 SDValue N01 = N0->getOperand(1);
10518 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10519
10520 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10523 if (SDValue C =
10524 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10525 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10526 }
10527 }
10528 }
10529 }
10530
10531 if (SDValue NewSel = foldBinOpIntoSelect(N))
10532 return NewSel;
10533
10534 // if (shl x, c) is known to be zero, return 0
10535 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10536 return DAG.getConstant(0, DL, VT);
10537
10538 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10539 if (N1.getOpcode() == ISD::TRUNCATE &&
10540 N1.getOperand(0).getOpcode() == ISD::AND) {
10541 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10542 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10543 }
10544
10545 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10546 if (N0.getOpcode() == ISD::SHL) {
10547 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10548 ConstantSDNode *RHS) {
10549 APInt c1 = LHS->getAPIntValue();
10550 APInt c2 = RHS->getAPIntValue();
10551 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10552 return (c1 + c2).uge(OpSizeInBits);
10553 };
10554 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10555 return DAG.getConstant(0, DL, VT);
10556
10557 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10558 ConstantSDNode *RHS) {
10559 APInt c1 = LHS->getAPIntValue();
10560 APInt c2 = RHS->getAPIntValue();
10561 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10562 return (c1 + c2).ult(OpSizeInBits);
10563 };
10564 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10565 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10566 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10567 }
10568 }
10569
10570 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10571 // For this to be valid, the second form must not preserve any of the bits
10572 // that are shifted out by the inner shift in the first form. This means
10573 // the outer shift size must be >= the number of bits added by the ext.
10574 // As a corollary, we don't care what kind of ext it is.
10575 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10576 N0.getOpcode() == ISD::ANY_EXTEND ||
10577 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10578 N0.getOperand(0).getOpcode() == ISD::SHL) {
10579 SDValue N0Op0 = N0.getOperand(0);
10580 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10581 EVT InnerVT = N0Op0.getValueType();
10582 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10583
10584 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10585 ConstantSDNode *RHS) {
10586 APInt c1 = LHS->getAPIntValue();
10587 APInt c2 = RHS->getAPIntValue();
10588 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10589 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10590 (c1 + c2).uge(OpSizeInBits);
10591 };
10592 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10593 /*AllowUndefs*/ false,
10594 /*AllowTypeMismatch*/ true))
10595 return DAG.getConstant(0, DL, VT);
10596
10597 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10598 ConstantSDNode *RHS) {
10599 APInt c1 = LHS->getAPIntValue();
10600 APInt c2 = RHS->getAPIntValue();
10601 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10602 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10603 (c1 + c2).ult(OpSizeInBits);
10604 };
10605 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10606 /*AllowUndefs*/ false,
10607 /*AllowTypeMismatch*/ true)) {
10608 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10609 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10610 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10611 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10612 }
10613 }
10614
10615 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10616 // Only fold this if the inner zext has no other uses to avoid increasing
10617 // the total number of instructions.
10618 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10619 N0.getOperand(0).getOpcode() == ISD::SRL) {
10620 SDValue N0Op0 = N0.getOperand(0);
10621 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10622
10623 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10624 APInt c1 = LHS->getAPIntValue();
10625 APInt c2 = RHS->getAPIntValue();
10626 zeroExtendToMatch(c1, c2);
10627 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10628 };
10629 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10630 /*AllowUndefs*/ false,
10631 /*AllowTypeMismatch*/ true)) {
10632 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10633 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10634 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10635 AddToWorklist(NewSHL.getNode());
10636 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10637 }
10638 }
10639
10640 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10641 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10642 ConstantSDNode *RHS) {
10643 const APInt &LHSC = LHS->getAPIntValue();
10644 const APInt &RHSC = RHS->getAPIntValue();
10645 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10646 LHSC.getZExtValue() <= RHSC.getZExtValue();
10647 };
10648
10649 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10650 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10651 if (N0->getFlags().hasExact()) {
10652 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10653 /*AllowUndefs*/ false,
10654 /*AllowTypeMismatch*/ true)) {
10655 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10656 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10657 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10658 }
10659 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10660 /*AllowUndefs*/ false,
10661 /*AllowTypeMismatch*/ true)) {
10662 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10663 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10664 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10665 }
10666 }
10667
10668 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10669 // (and (srl x, (sub c1, c2), MASK)
10670 // Only fold this if the inner shift has no other uses -- if it does,
10671 // folding this will increase the total number of instructions.
10672 if (N0.getOpcode() == ISD::SRL &&
10673 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10675 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10676 /*AllowUndefs*/ false,
10677 /*AllowTypeMismatch*/ true)) {
10678 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10679 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10680 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10681 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10682 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10683 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10684 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10685 }
10686 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10687 /*AllowUndefs*/ false,
10688 /*AllowTypeMismatch*/ true)) {
10689 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10690 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10691 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10692 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10693 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10694 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10695 }
10696 }
10697 }
10698
10699 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10700 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10701 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10702 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10703 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10704 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10705 }
10706
10707 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10708 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10709 // Variant of version done on multiply, except mul by a power of 2 is turned
10710 // into a shift.
10711 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10712 TLI.isDesirableToCommuteWithShift(N, Level)) {
10713 SDValue N01 = N0.getOperand(1);
10714 if (SDValue Shl1 =
10715 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10716 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10717 AddToWorklist(Shl0.getNode());
10718 SDNodeFlags Flags;
10719 // Preserve the disjoint flag for Or.
10720 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10722 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10723 }
10724 }
10725
10726 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10727 // TODO: Add zext/add_nuw variant with suitable test coverage
10728 // TODO: Should we limit this with isLegalAddImmediate?
10729 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10730 N0.getOperand(0).getOpcode() == ISD::ADD &&
10731 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10732 TLI.isDesirableToCommuteWithShift(N, Level)) {
10733 SDValue Add = N0.getOperand(0);
10734 SDLoc DL(N0);
10735 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10736 {Add.getOperand(1)})) {
10737 if (SDValue ShlC =
10738 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10739 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10740 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10741 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10742 }
10743 }
10744 }
10745
10746 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10747 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10748 SDValue N01 = N0.getOperand(1);
10749 if (SDValue Shl =
10750 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10751 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10752 }
10753
10754 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10755 if (N1C && !N1C->isOpaque())
10756 if (SDValue NewSHL = visitShiftByConstant(N))
10757 return NewSHL;
10758
10759 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10760 // target.
10761 if (((N1.getOpcode() == ISD::CTTZ &&
10762 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10764 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10766 SDValue Y = N1.getOperand(0);
10767 SDLoc DL(N);
10768 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10769 SDValue And =
10770 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10771 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10772 }
10773
10775 return SDValue(N, 0);
10776
10777 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10778 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10779 const APInt &C0 = N0.getConstantOperandAPInt(0);
10780 const APInt &C1 = N1C->getAPIntValue();
10781 return DAG.getVScale(DL, VT, C0 << C1);
10782 }
10783
10784 SDValue X;
10785 APInt VS0;
10786
10787 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10788 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10789 SDNodeFlags Flags;
10790 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10791 N0->getFlags().hasNoUnsignedWrap());
10792
10793 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10794 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10795 }
10796
10797 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10798 APInt ShlVal;
10799 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10800 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10801 const APInt &C0 = N0.getConstantOperandAPInt(0);
10802 if (ShlVal.ult(C0.getBitWidth())) {
10803 APInt NewStep = C0 << ShlVal;
10804 return DAG.getStepVector(DL, VT, NewStep);
10805 }
10806 }
10807
10808 return SDValue();
10809}
10810
10811// Transform a right shift of a multiply into a multiply-high.
10812// Examples:
10813// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10814// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10816 const TargetLowering &TLI) {
10817 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10818 "SRL or SRA node is required here!");
10819
10820 // Check the shift amount. Proceed with the transformation if the shift
10821 // amount is constant.
10822 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10823 if (!ShiftAmtSrc)
10824 return SDValue();
10825
10826 // The operation feeding into the shift must be a multiply.
10827 SDValue ShiftOperand = N->getOperand(0);
10828 if (ShiftOperand.getOpcode() != ISD::MUL)
10829 return SDValue();
10830
10831 // Both operands must be equivalent extend nodes.
10832 SDValue LeftOp = ShiftOperand.getOperand(0);
10833 SDValue RightOp = ShiftOperand.getOperand(1);
10834
10835 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10836 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10837
10838 if (!IsSignExt && !IsZeroExt)
10839 return SDValue();
10840
10841 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10842 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10843
10844 // return true if U may use the lower bits of its operands
10845 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10846 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10847 return true;
10848 }
10849 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10850 if (!UShiftAmtSrc) {
10851 return true;
10852 }
10853 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10854 return UShiftAmt < NarrowVTSize;
10855 };
10856
10857 // If the lower part of the MUL is also used and MUL_LOHI is supported
10858 // do not introduce the MULH in favor of MUL_LOHI
10859 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10860 if (!ShiftOperand.hasOneUse() &&
10861 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10862 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10863 return SDValue();
10864 }
10865
10866 SDValue MulhRightOp;
10868 unsigned ActiveBits = IsSignExt
10869 ? Constant->getAPIntValue().getSignificantBits()
10870 : Constant->getAPIntValue().getActiveBits();
10871 if (ActiveBits > NarrowVTSize)
10872 return SDValue();
10873 MulhRightOp = DAG.getConstant(
10874 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10875 NarrowVT);
10876 } else {
10877 if (LeftOp.getOpcode() != RightOp.getOpcode())
10878 return SDValue();
10879 // Check that the two extend nodes are the same type.
10880 if (NarrowVT != RightOp.getOperand(0).getValueType())
10881 return SDValue();
10882 MulhRightOp = RightOp.getOperand(0);
10883 }
10884
10885 EVT WideVT = LeftOp.getValueType();
10886 // Proceed with the transformation if the wide types match.
10887 assert((WideVT == RightOp.getValueType()) &&
10888 "Cannot have a multiply node with two different operand types.");
10889
10890 // Proceed with the transformation if the wide type is twice as large
10891 // as the narrow type.
10892 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10893 return SDValue();
10894
10895 // Check the shift amount with the narrow type size.
10896 // Proceed with the transformation if the shift amount is the width
10897 // of the narrow type.
10898 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10899 if (ShiftAmt != NarrowVTSize)
10900 return SDValue();
10901
10902 // If the operation feeding into the MUL is a sign extend (sext),
10903 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10904 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10905
10906 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10907 // or if it is a vector type then we could transform to an acceptable type and
10908 // rely on legalization to split/combine the result.
10909 EVT TransformVT = NarrowVT;
10910 if (NarrowVT.isVector()) {
10911 TransformVT = TLI.getLegalTypeToTransformTo(*DAG.getContext(), NarrowVT);
10912 if (TransformVT.getScalarType() != NarrowVT.getScalarType())
10913 return SDValue();
10914 }
10915 if (!TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10916 return SDValue();
10917
10918 SDValue Result =
10919 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10920 bool IsSigned = N->getOpcode() == ISD::SRA;
10921 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10922}
10923
10924// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10925// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10927 unsigned Opcode = N->getOpcode();
10928 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10929 return SDValue();
10930
10931 SDValue N0 = N->getOperand(0);
10932 EVT VT = N->getValueType(0);
10933 SDLoc DL(N);
10934 SDValue X, Y;
10935
10936 // If both operands are bswap/bitreverse, ignore the multiuse
10938 m_UnaryOp(Opcode, m_Value(Y))))))
10939 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10940
10941 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10943 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10944 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10945 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10946 }
10947
10948 return SDValue();
10949}
10950
10951SDValue DAGCombiner::visitSRA(SDNode *N) {
10952 SDValue N0 = N->getOperand(0);
10953 SDValue N1 = N->getOperand(1);
10954 if (SDValue V = DAG.simplifyShift(N0, N1))
10955 return V;
10956
10957 SDLoc DL(N);
10958 EVT VT = N0.getValueType();
10959 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10960
10961 // fold (sra c1, c2) -> (sra c1, c2)
10962 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10963 return C;
10964
10965 // Arithmetic shifting an all-sign-bit value is a no-op.
10966 // fold (sra 0, x) -> 0
10967 // fold (sra -1, x) -> -1
10968 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10969 return N0;
10970
10971 // fold vector ops
10972 if (VT.isVector())
10973 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10974 return FoldedVOp;
10975
10976 if (SDValue NewSel = foldBinOpIntoSelect(N))
10977 return NewSel;
10978
10979 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10980
10981 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10982 // clamp (add c1, c2) to max shift.
10983 if (N0.getOpcode() == ISD::SRA) {
10984 EVT ShiftVT = N1.getValueType();
10985 EVT ShiftSVT = ShiftVT.getScalarType();
10986 SmallVector<SDValue, 16> ShiftValues;
10987
10988 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10989 APInt c1 = LHS->getAPIntValue();
10990 APInt c2 = RHS->getAPIntValue();
10991 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10992 APInt Sum = c1 + c2;
10993 unsigned ShiftSum =
10994 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10995 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10996 return true;
10997 };
10998 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10999 SDValue ShiftValue;
11000 if (N1.getOpcode() == ISD::BUILD_VECTOR)
11001 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
11002 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
11003 assert(ShiftValues.size() == 1 &&
11004 "Expected matchBinaryPredicate to return one element for "
11005 "SPLAT_VECTORs");
11006 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
11007 } else
11008 ShiftValue = ShiftValues[0];
11009 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
11010 }
11011 }
11012
11013 // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
11014 // This allows merging two arithmetic shifts even when there's a NOT in
11015 // between.
11016 SDValue X;
11017 APInt C1;
11018 if (N1C && sd_match(N0, m_OneUse(m_Not(
11019 m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) {
11020 APInt C2 = N1C->getAPIntValue();
11021 zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */);
11022 APInt Sum = C1 + C2;
11023 unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1);
11024 SDValue NewShift = DAG.getNode(
11025 ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL));
11026 return DAG.getNOT(DL, NewShift, VT);
11027 }
11028
11029 // fold (sra (shl X, m), (sub result_size, n))
11030 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
11031 // result_size - n != m.
11032 // If truncate is free for the target sext(shl) is likely to result in better
11033 // code.
11034 if (N0.getOpcode() == ISD::SHL && N1C) {
11035 // Get the two constants of the shifts, CN0 = m, CN = n.
11036 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
11037 if (N01C) {
11038 LLVMContext &Ctx = *DAG.getContext();
11039 // Determine what the truncate's result bitsize and type would be.
11040 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
11041
11042 if (VT.isVector())
11043 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11044
11045 // Determine the residual right-shift amount.
11046 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
11047
11048 // If the shift is not a no-op (in which case this should be just a sign
11049 // extend already), the truncated to type is legal, sign_extend is legal
11050 // on that type, and the truncate to that type is both legal and free,
11051 // perform the transform.
11052 if ((ShiftAmt > 0) &&
11055 TLI.isTruncateFree(VT, TruncVT)) {
11056 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
11057 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
11058 N0.getOperand(0), Amt);
11059 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
11060 Shift);
11061 return DAG.getNode(ISD::SIGN_EXTEND, DL,
11062 N->getValueType(0), Trunc);
11063 }
11064 }
11065 }
11066
11067 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
11068 // sra (add (shl X, N1C), AddC), N1C -->
11069 // sext (add (trunc X to (width - N1C)), AddC')
11070 // sra (sub AddC, (shl X, N1C)), N1C -->
11071 // sext (sub AddC1',(trunc X to (width - N1C)))
11072 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
11073 N0.hasOneUse()) {
11074 bool IsAdd = N0.getOpcode() == ISD::ADD;
11075 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
11076 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
11077 Shl.hasOneUse()) {
11078 // TODO: AddC does not need to be a splat.
11079 if (ConstantSDNode *AddC =
11080 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
11081 // Determine what the truncate's type would be and ask the target if
11082 // that is a free operation.
11083 LLVMContext &Ctx = *DAG.getContext();
11084 unsigned ShiftAmt = N1C->getZExtValue();
11085 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
11086 if (VT.isVector())
11087 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11088
11089 // TODO: The simple type check probably belongs in the default hook
11090 // implementation and/or target-specific overrides (because
11091 // non-simple types likely require masking when legalized), but
11092 // that restriction may conflict with other transforms.
11093 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
11094 TLI.isTruncateFree(VT, TruncVT)) {
11095 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
11096 SDValue ShiftC =
11097 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
11098 TruncVT.getScalarSizeInBits()),
11099 DL, TruncVT);
11100 SDValue Add;
11101 if (IsAdd)
11102 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
11103 else
11104 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
11105 return DAG.getSExtOrTrunc(Add, DL, VT);
11106 }
11107 }
11108 }
11109 }
11110
11111 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11112 if (N1.getOpcode() == ISD::TRUNCATE &&
11113 N1.getOperand(0).getOpcode() == ISD::AND) {
11114 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11115 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11116 }
11117
11118 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11119 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11120 // if c1 is equal to the number of bits the trunc removes
11121 // TODO - support non-uniform vector shift amounts.
11122 if (N0.getOpcode() == ISD::TRUNCATE &&
11123 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11124 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11125 N0.getOperand(0).hasOneUse() &&
11126 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11127 SDValue N0Op0 = N0.getOperand(0);
11128 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11129 EVT LargeVT = N0Op0.getValueType();
11130 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11131 if (LargeShift->getAPIntValue() == TruncBits) {
11132 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11133 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11134 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11135 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11136 SDValue SRA =
11137 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11138 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11139 }
11140 }
11141 }
11142
11143 // Simplify, based on bits shifted out of the LHS.
11145 return SDValue(N, 0);
11146
11147 // If the sign bit is known to be zero, switch this to a SRL.
11148 if (DAG.SignBitIsZero(N0))
11149 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11150
11151 if (N1C && !N1C->isOpaque())
11152 if (SDValue NewSRA = visitShiftByConstant(N))
11153 return NewSRA;
11154
11155 // Try to transform this shift into a multiply-high if
11156 // it matches the appropriate pattern detected in combineShiftToMULH.
11157 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11158 return MULH;
11159
11160 // Attempt to convert a sra of a load into a narrower sign-extending load.
11161 if (SDValue NarrowLoad = reduceLoadWidth(N))
11162 return NarrowLoad;
11163
11164 if (SDValue AVG = foldShiftToAvg(N, DL))
11165 return AVG;
11166
11167 return SDValue();
11168}
11169
11170SDValue DAGCombiner::visitSRL(SDNode *N) {
11171 SDValue N0 = N->getOperand(0);
11172 SDValue N1 = N->getOperand(1);
11173 if (SDValue V = DAG.simplifyShift(N0, N1))
11174 return V;
11175
11176 SDLoc DL(N);
11177 EVT VT = N0.getValueType();
11178 EVT ShiftVT = N1.getValueType();
11179 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11180
11181 // fold (srl c1, c2) -> c1 >>u c2
11182 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11183 return C;
11184
11185 // fold vector ops
11186 if (VT.isVector())
11187 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11188 return FoldedVOp;
11189
11190 if (SDValue NewSel = foldBinOpIntoSelect(N))
11191 return NewSel;
11192
11193 // if (srl x, c) is known to be zero, return 0
11194 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11195 if (N1C &&
11196 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11197 return DAG.getConstant(0, DL, VT);
11198
11199 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11200 if (N0.getOpcode() == ISD::SRL) {
11201 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11202 ConstantSDNode *RHS) {
11203 APInt c1 = LHS->getAPIntValue();
11204 APInt c2 = RHS->getAPIntValue();
11205 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11206 return (c1 + c2).uge(OpSizeInBits);
11207 };
11208 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11209 return DAG.getConstant(0, DL, VT);
11210
11211 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11212 ConstantSDNode *RHS) {
11213 APInt c1 = LHS->getAPIntValue();
11214 APInt c2 = RHS->getAPIntValue();
11215 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11216 return (c1 + c2).ult(OpSizeInBits);
11217 };
11218 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11219 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11220 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11221 }
11222 }
11223
11224 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11225 N0.getOperand(0).getOpcode() == ISD::SRL) {
11226 SDValue InnerShift = N0.getOperand(0);
11227 // TODO - support non-uniform vector shift amounts.
11228 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11229 uint64_t c1 = N001C->getZExtValue();
11230 uint64_t c2 = N1C->getZExtValue();
11231 EVT InnerShiftVT = InnerShift.getValueType();
11232 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11233 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11234 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11235 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11236 if (c1 + OpSizeInBits == InnerShiftSize) {
11237 if (c1 + c2 >= InnerShiftSize)
11238 return DAG.getConstant(0, DL, VT);
11239 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11240 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11241 InnerShift.getOperand(0), NewShiftAmt);
11242 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11243 }
11244 // In the more general case, we can clear the high bits after the shift:
11245 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11246 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11247 c1 + c2 < InnerShiftSize) {
11248 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11249 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11250 InnerShift.getOperand(0), NewShiftAmt);
11251 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11252 OpSizeInBits - c2),
11253 DL, InnerShiftVT);
11254 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11255 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11256 }
11257 }
11258 }
11259
11260 if (N0.getOpcode() == ISD::SHL) {
11261 // fold (srl (shl nuw x, c), c) -> x
11262 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11263 return N0.getOperand(0);
11264
11265 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11266 // (and (srl x, (sub c2, c1), MASK)
11267 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11269 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11270 ConstantSDNode *RHS) {
11271 const APInt &LHSC = LHS->getAPIntValue();
11272 const APInt &RHSC = RHS->getAPIntValue();
11273 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11274 LHSC.getZExtValue() <= RHSC.getZExtValue();
11275 };
11276 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11277 /*AllowUndefs*/ false,
11278 /*AllowTypeMismatch*/ true)) {
11279 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11280 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11281 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11282 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11283 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11284 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11285 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11286 }
11287 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11288 /*AllowUndefs*/ false,
11289 /*AllowTypeMismatch*/ true)) {
11290 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11291 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11292 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11293 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11294 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11295 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11296 }
11297 }
11298 }
11299
11300 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11301 // TODO - support non-uniform vector shift amounts.
11302 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11303 // Shifting in all undef bits?
11304 EVT SmallVT = N0.getOperand(0).getValueType();
11305 unsigned BitSize = SmallVT.getScalarSizeInBits();
11306 if (N1C->getAPIntValue().uge(BitSize))
11307 return DAG.getUNDEF(VT);
11308
11309 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11310 uint64_t ShiftAmt = N1C->getZExtValue();
11311 SDLoc DL0(N0);
11312 SDValue SmallShift =
11313 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11314 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11315 AddToWorklist(SmallShift.getNode());
11316 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11317 return DAG.getNode(ISD::AND, DL, VT,
11318 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11319 DAG.getConstant(Mask, DL, VT));
11320 }
11321 }
11322
11323 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11324 // bit, which is unmodified by sra.
11325 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11326 if (N0.getOpcode() == ISD::SRA)
11327 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11328 }
11329
11330 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11331 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11332 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11333 isPowerOf2_32(OpSizeInBits) &&
11334 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11335 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11336
11337 // If any of the input bits are KnownOne, then the input couldn't be all
11338 // zeros, thus the result of the srl will always be zero.
11339 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11340
11341 // If all of the bits input the to ctlz node are known to be zero, then
11342 // the result of the ctlz is "32" and the result of the shift is one.
11343 APInt UnknownBits = ~Known.Zero;
11344 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11345
11346 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11347 if (UnknownBits.isPowerOf2()) {
11348 // Okay, we know that only that the single bit specified by UnknownBits
11349 // could be set on input to the CTLZ node. If this bit is set, the SRL
11350 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11351 // to an SRL/XOR pair, which is likely to simplify more.
11352 unsigned ShAmt = UnknownBits.countr_zero();
11353 SDValue Op = N0.getOperand(0);
11354
11355 if (ShAmt) {
11356 SDLoc DL(N0);
11357 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11358 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11359 AddToWorklist(Op.getNode());
11360 }
11361 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11362 }
11363 }
11364
11365 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11366 if (N1.getOpcode() == ISD::TRUNCATE &&
11367 N1.getOperand(0).getOpcode() == ISD::AND) {
11368 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11369 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11370 }
11371
11372 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11373 // -> (logic_op (srl x, c1), (zext y))
11374 // c1 <= leadingzeros(zext(y))
11375 SDValue X, ZExtY;
11376 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11377 m_Value(X),
11380 m_Specific(N1))))))) {
11381 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11383 if (N1C->getZExtValue() <= NumLeadingZeros)
11384 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11385 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11386 }
11387
11388 // fold operands of srl based on knowledge that the low bits are not
11389 // demanded.
11391 return SDValue(N, 0);
11392
11393 if (N1C && !N1C->isOpaque())
11394 if (SDValue NewSRL = visitShiftByConstant(N))
11395 return NewSRL;
11396
11397 // Attempt to convert a srl of a load into a narrower zero-extending load.
11398 if (SDValue NarrowLoad = reduceLoadWidth(N))
11399 return NarrowLoad;
11400
11401 // Here is a common situation. We want to optimize:
11402 //
11403 // %a = ...
11404 // %b = and i32 %a, 2
11405 // %c = srl i32 %b, 1
11406 // brcond i32 %c ...
11407 //
11408 // into
11409 //
11410 // %a = ...
11411 // %b = and %a, 2
11412 // %c = setcc eq %b, 0
11413 // brcond %c ...
11414 //
11415 // However when after the source operand of SRL is optimized into AND, the SRL
11416 // itself may not be optimized further. Look for it and add the BRCOND into
11417 // the worklist.
11418 //
11419 // The also tends to happen for binary operations when SimplifyDemandedBits
11420 // is involved.
11421 //
11422 // FIXME: This is unecessary if we process the DAG in topological order,
11423 // which we plan to do. This workaround can be removed once the DAG is
11424 // processed in topological order.
11425 if (N->hasOneUse()) {
11426 SDNode *User = *N->user_begin();
11427
11428 // Look pass the truncate.
11429 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11430 User = *User->user_begin();
11431
11432 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11433 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11434 AddToWorklist(User);
11435 }
11436
11437 // Try to transform this shift into a multiply-high if
11438 // it matches the appropriate pattern detected in combineShiftToMULH.
11439 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11440 return MULH;
11441
11442 if (SDValue AVG = foldShiftToAvg(N, DL))
11443 return AVG;
11444
11445 return SDValue();
11446}
11447
11448SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11449 EVT VT = N->getValueType(0);
11450 SDValue N0 = N->getOperand(0);
11451 SDValue N1 = N->getOperand(1);
11452 SDValue N2 = N->getOperand(2);
11453 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11454 unsigned BitWidth = VT.getScalarSizeInBits();
11455 SDLoc DL(N);
11456
11457 // fold (fshl/fshr C0, C1, C2) -> C3
11458 if (SDValue C =
11459 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11460 return C;
11461
11462 // fold (fshl N0, N1, 0) -> N0
11463 // fold (fshr N0, N1, 0) -> N1
11465 if (DAG.MaskedValueIsZero(
11466 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11467 return IsFSHL ? N0 : N1;
11468
11469 auto IsUndefOrZero = [](SDValue V) {
11470 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11471 };
11472
11473 // TODO - support non-uniform vector shift amounts.
11474 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11475 EVT ShAmtTy = N2.getValueType();
11476
11477 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11478 if (Cst->getAPIntValue().uge(BitWidth)) {
11479 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11480 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11481 DAG.getConstant(RotAmt, DL, ShAmtTy));
11482 }
11483
11484 unsigned ShAmt = Cst->getZExtValue();
11485 if (ShAmt == 0)
11486 return IsFSHL ? N0 : N1;
11487
11488 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11489 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11490 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11491 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11492 if (IsUndefOrZero(N0))
11493 return DAG.getNode(
11494 ISD::SRL, DL, VT, N1,
11495 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11496 if (IsUndefOrZero(N1))
11497 return DAG.getNode(
11498 ISD::SHL, DL, VT, N0,
11499 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11500
11501 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11502 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11503 // TODO - bigendian support once we have test coverage.
11504 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11505 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11506 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11507 !DAG.getDataLayout().isBigEndian()) {
11508 auto *LHS = dyn_cast<LoadSDNode>(N0);
11509 auto *RHS = dyn_cast<LoadSDNode>(N1);
11510 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11511 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11512 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11514 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11515 SDLoc DL(RHS);
11516 uint64_t PtrOff =
11517 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11518 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11519 unsigned Fast = 0;
11520 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11521 RHS->getAddressSpace(), NewAlign,
11522 RHS->getMemOperand()->getFlags(), &Fast) &&
11523 Fast) {
11524 SDValue NewPtr = DAG.getMemBasePlusOffset(
11525 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11526 AddToWorklist(NewPtr.getNode());
11527 SDValue Load = DAG.getLoad(
11528 VT, DL, RHS->getChain(), NewPtr,
11529 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11530 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11531 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11532 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11533 return Load;
11534 }
11535 }
11536 }
11537 }
11538 }
11539
11540 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11541 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11542 // iff We know the shift amount is in range.
11543 // TODO: when is it worth doing SUB(BW, N2) as well?
11544 if (isPowerOf2_32(BitWidth)) {
11545 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11546 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11547 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11548 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11549 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11550 }
11551
11552 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11553 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11554 // TODO: Investigate flipping this rotate if only one is legal.
11555 // If funnel shift is legal as well we might be better off avoiding
11556 // non-constant (BW - N2).
11557 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11558 if (N0 == N1 && hasOperation(RotOpc, VT))
11559 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11560
11561 // Simplify, based on bits shifted out of N0/N1.
11563 return SDValue(N, 0);
11564
11565 return SDValue();
11566}
11567
11568SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11569 SDValue N0 = N->getOperand(0);
11570 SDValue N1 = N->getOperand(1);
11571 if (SDValue V = DAG.simplifyShift(N0, N1))
11572 return V;
11573
11574 SDLoc DL(N);
11575 EVT VT = N0.getValueType();
11576
11577 // fold (*shlsat c1, c2) -> c1<<c2
11578 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11579 return C;
11580
11581 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11582
11583 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11584 // fold (sshlsat x, c) -> (shl x, c)
11585 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11586 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11587 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11588
11589 // fold (ushlsat x, c) -> (shl x, c)
11590 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11591 N1C->getAPIntValue().ule(
11593 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11594 }
11595
11596 return SDValue();
11597}
11598
11599// Given a ABS node, detect the following patterns:
11600// (ABS (SUB (EXTEND a), (EXTEND b))).
11601// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11602// Generates UABD/SABD instruction.
11603SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11604 EVT SrcVT = N->getValueType(0);
11605
11606 if (N->getOpcode() == ISD::TRUNCATE)
11607 N = N->getOperand(0).getNode();
11608
11609 EVT VT = N->getValueType(0);
11610 SDValue Op0, Op1;
11611
11612 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11613 return SDValue();
11614
11615 SDValue AbsOp0 = N->getOperand(0);
11616 unsigned Opc0 = Op0.getOpcode();
11617
11618 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11619 // fallback to ValueTracking.
11620 if (Opc0 != Op1.getOpcode() ||
11621 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11622 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11623 // fold (abs (sub nsw x, y)) -> abds(x, y)
11624 // Don't fold this for unsupported types as we lose the NSW handling.
11625 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11626 (AbsOp0->getFlags().hasNoSignedWrap() ||
11627 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11628 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11629 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11630 }
11631 // fold (abs (sub x, y)) -> abdu(x, y)
11632 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11633 DAG.SignBitIsZero(Op1)) {
11634 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11635 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11636 }
11637 return SDValue();
11638 }
11639
11640 EVT VT0, VT1;
11641 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11642 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11643 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11644 } else {
11645 VT0 = Op0.getOperand(0).getValueType();
11646 VT1 = Op1.getOperand(0).getValueType();
11647 }
11648 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11649
11650 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11651 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11652 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11653 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11654 (VT1 == MaxVT || Op1->hasOneUse()) &&
11655 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11656 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11657 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11658 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11659 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11660 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11661 }
11662
11663 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11664 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11665 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11666 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11667 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11668 }
11669
11670 return SDValue();
11671}
11672
11673SDValue DAGCombiner::visitABS(SDNode *N) {
11674 SDValue N0 = N->getOperand(0);
11675 EVT VT = N->getValueType(0);
11676 SDLoc DL(N);
11677
11678 // fold (abs c1) -> c2
11679 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11680 return C;
11681 // fold (abs (abs x)) -> (abs x)
11682 if (N0.getOpcode() == ISD::ABS)
11683 return N0;
11684 // fold (abs x) -> x iff not-negative
11685 if (DAG.SignBitIsZero(N0))
11686 return N0;
11687
11688 if (SDValue ABD = foldABSToABD(N, DL))
11689 return ABD;
11690
11691 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11692 // iff zero_extend/truncate are free.
11693 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11694 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11695 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11696 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11697 hasOperation(ISD::ABS, ExtVT)) {
11698 return DAG.getNode(
11699 ISD::ZERO_EXTEND, DL, VT,
11700 DAG.getNode(ISD::ABS, DL, ExtVT,
11701 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11702 }
11703 }
11704
11705 return SDValue();
11706}
11707
11708SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11709 SDValue N0 = N->getOperand(0);
11710 EVT VT = N->getValueType(0);
11711 SDLoc DL(N);
11712
11713 // fold (bswap c1) -> c2
11714 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11715 return C;
11716 // fold (bswap (bswap x)) -> x
11717 if (N0.getOpcode() == ISD::BSWAP)
11718 return N0.getOperand(0);
11719
11720 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11721 // isn't supported, it will be expanded to bswap followed by a manual reversal
11722 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11723 // the two bswaps if the bitreverse gets expanded.
11724 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11725 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11726 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11727 }
11728
11729 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11730 // iff x >= bw/2 (i.e. lower half is known zero)
11731 unsigned BW = VT.getScalarSizeInBits();
11732 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11733 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11734 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11735 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11736 ShAmt->getZExtValue() >= (BW / 2) &&
11737 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11738 TLI.isTruncateFree(VT, HalfVT) &&
11739 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11740 SDValue Res = N0.getOperand(0);
11741 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11742 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11743 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11744 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11745 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11746 return DAG.getZExtOrTrunc(Res, DL, VT);
11747 }
11748 }
11749
11750 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11751 // inverse-shift-of-bswap:
11752 // bswap (X u<< C) --> (bswap X) u>> C
11753 // bswap (X u>> C) --> (bswap X) u<< C
11754 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11755 N0.hasOneUse()) {
11756 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11757 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11758 ShAmt->getZExtValue() % 8 == 0) {
11759 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11760 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11761 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11762 }
11763 }
11764
11765 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11766 return V;
11767
11768 return SDValue();
11769}
11770
11771SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11772 SDValue N0 = N->getOperand(0);
11773 EVT VT = N->getValueType(0);
11774 SDLoc DL(N);
11775
11776 // fold (bitreverse c1) -> c2
11777 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11778 return C;
11779
11780 // fold (bitreverse (bitreverse x)) -> x
11781 if (N0.getOpcode() == ISD::BITREVERSE)
11782 return N0.getOperand(0);
11783
11784 SDValue X, Y;
11785
11786 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11787 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11789 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11790
11791 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11792 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11794 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11795
11796 return SDValue();
11797}
11798
11799SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11800 SDValue N0 = N->getOperand(0);
11801 EVT VT = N->getValueType(0);
11802 SDLoc DL(N);
11803
11804 // fold (ctlz c1) -> c2
11805 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11806 return C;
11807
11808 // If the value is known never to be zero, switch to the undef version.
11809 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11810 if (DAG.isKnownNeverZero(N0))
11811 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11812
11813 return SDValue();
11814}
11815
11816SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11817 SDValue N0 = N->getOperand(0);
11818 EVT VT = N->getValueType(0);
11819 SDLoc DL(N);
11820
11821 // fold (ctlz_zero_undef c1) -> c2
11822 if (SDValue C =
11824 return C;
11825 return SDValue();
11826}
11827
11828SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11829 SDValue N0 = N->getOperand(0);
11830 EVT VT = N->getValueType(0);
11831 SDLoc DL(N);
11832
11833 // fold (cttz c1) -> c2
11834 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11835 return C;
11836
11837 // If the value is known never to be zero, switch to the undef version.
11838 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11839 if (DAG.isKnownNeverZero(N0))
11840 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11841
11842 return SDValue();
11843}
11844
11845SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11846 SDValue N0 = N->getOperand(0);
11847 EVT VT = N->getValueType(0);
11848 SDLoc DL(N);
11849
11850 // fold (cttz_zero_undef c1) -> c2
11851 if (SDValue C =
11853 return C;
11854 return SDValue();
11855}
11856
11857SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11858 SDValue N0 = N->getOperand(0);
11859 EVT VT = N->getValueType(0);
11860 unsigned NumBits = VT.getScalarSizeInBits();
11861 SDLoc DL(N);
11862
11863 // fold (ctpop c1) -> c2
11864 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11865 return C;
11866
11867 // If the source is being shifted, but doesn't affect any active bits,
11868 // then we can call CTPOP on the shift source directly.
11869 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11870 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11871 const APInt &Amt = AmtC->getAPIntValue();
11872 if (Amt.ult(NumBits)) {
11873 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11874 if ((N0.getOpcode() == ISD::SRL &&
11875 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11876 (N0.getOpcode() == ISD::SHL &&
11877 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11878 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11879 }
11880 }
11881 }
11882 }
11883
11884 // If the upper bits are known to be zero, then see if its profitable to
11885 // only count the lower bits.
11886 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11887 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11888 if (hasOperation(ISD::CTPOP, HalfVT) &&
11889 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11890 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11891 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11892 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11893 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11894 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11895 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11896 }
11897 }
11898 }
11899
11900 return SDValue();
11901}
11902
11904 SDValue RHS, const SDNodeFlags Flags,
11905 const TargetLowering &TLI) {
11906 EVT VT = LHS.getValueType();
11907 if (!VT.isFloatingPoint())
11908 return false;
11909
11910 return Flags.hasNoSignedZeros() &&
11912 (Flags.hasNoNaNs() ||
11913 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11914}
11915
11917 SDValue RHS, SDValue True, SDValue False,
11918 ISD::CondCode CC,
11919 const TargetLowering &TLI,
11920 SelectionDAG &DAG) {
11921 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11922 switch (CC) {
11923 case ISD::SETOLT:
11924 case ISD::SETOLE:
11925 case ISD::SETLT:
11926 case ISD::SETLE:
11927 case ISD::SETULT:
11928 case ISD::SETULE: {
11929 // Since it's known never nan to get here already, either fminnum or
11930 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11931 // expanded in terms of it.
11932 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11933 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11934 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11935
11936 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11937 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11938 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11939 return SDValue();
11940 }
11941 case ISD::SETOGT:
11942 case ISD::SETOGE:
11943 case ISD::SETGT:
11944 case ISD::SETGE:
11945 case ISD::SETUGT:
11946 case ISD::SETUGE: {
11947 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11948 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11949 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11950
11951 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11952 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11953 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11954 return SDValue();
11955 }
11956 default:
11957 return SDValue();
11958 }
11959}
11960
11961// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11962SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11963 const unsigned Opcode = N->getOpcode();
11964 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11965 return SDValue();
11966
11967 EVT VT = N->getValueType(0);
11968 bool IsUnsigned = Opcode == ISD::SRL;
11969
11970 // Captured values.
11971 SDValue A, B, Add;
11972
11973 // Match floor average as it is common to both floor/ceil avgs.
11974 if (sd_match(N, m_BinOp(Opcode,
11976 m_One()))) {
11977 // Decide whether signed or unsigned.
11978 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11979 if (!hasOperation(FloorISD, VT))
11980 return SDValue();
11981
11982 // Can't optimize adds that may wrap.
11983 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11984 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11985 return SDValue();
11986
11987 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11988 }
11989
11990 return SDValue();
11991}
11992
11993SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
11994 unsigned Opc = N->getOpcode();
11995 SDValue X, Y, Z;
11996 if (sd_match(
11998 return DAG.getNode(Opc, DL, VT, X,
11999 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
12000
12002 m_Value(Z)))))
12003 return DAG.getNode(Opc, DL, VT, X,
12004 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
12005
12006 return SDValue();
12007}
12008
12009/// Generate Min/Max node
12010SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
12011 SDValue RHS, SDValue True,
12012 SDValue False, ISD::CondCode CC) {
12013 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
12014 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
12015
12016 // If we can't directly match this, try to see if we can pull an fneg out of
12017 // the select.
12019 True, DAG, LegalOperations, ForCodeSize);
12020 if (!NegTrue)
12021 return SDValue();
12022
12023 HandleSDNode NegTrueHandle(NegTrue);
12024
12025 // Try to unfold an fneg from the select if we are comparing the negated
12026 // constant.
12027 //
12028 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
12029 //
12030 // TODO: Handle fabs
12031 if (LHS == NegTrue) {
12032 // If we can't directly match this, try to see if we can pull an fneg out of
12033 // the select.
12035 RHS, DAG, LegalOperations, ForCodeSize);
12036 if (NegRHS) {
12037 HandleSDNode NegRHSHandle(NegRHS);
12038 if (NegRHS == False) {
12039 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
12040 False, CC, TLI, DAG);
12041 if (Combined)
12042 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
12043 }
12044 }
12045 }
12046
12047 return SDValue();
12048}
12049
12050/// If a (v)select has a condition value that is a sign-bit test, try to smear
12051/// the condition operand sign-bit across the value width and use it as a mask.
12053 SelectionDAG &DAG) {
12054 SDValue Cond = N->getOperand(0);
12055 SDValue C1 = N->getOperand(1);
12056 SDValue C2 = N->getOperand(2);
12058 return SDValue();
12059
12060 EVT VT = N->getValueType(0);
12061 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
12062 VT != Cond.getOperand(0).getValueType())
12063 return SDValue();
12064
12065 // The inverted-condition + commuted-select variants of these patterns are
12066 // canonicalized to these forms in IR.
12067 SDValue X = Cond.getOperand(0);
12068 SDValue CondC = Cond.getOperand(1);
12069 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12070 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
12072 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
12073 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12074 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12075 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
12076 }
12077 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
12078 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12079 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12080 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12081 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
12082 }
12083 return SDValue();
12084}
12085
12087 const TargetLowering &TLI) {
12088 if (!TLI.convertSelectOfConstantsToMath(VT))
12089 return false;
12090
12091 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
12092 return true;
12094 return true;
12095
12096 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12097 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
12098 return true;
12099 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
12100 return true;
12101
12102 return false;
12103}
12104
12105SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12106 SDValue Cond = N->getOperand(0);
12107 SDValue N1 = N->getOperand(1);
12108 SDValue N2 = N->getOperand(2);
12109 EVT VT = N->getValueType(0);
12110 EVT CondVT = Cond.getValueType();
12111 SDLoc DL(N);
12112
12113 if (!VT.isInteger())
12114 return SDValue();
12115
12116 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12117 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12118 if (!C1 || !C2)
12119 return SDValue();
12120
12121 if (CondVT != MVT::i1 || LegalOperations) {
12122 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12123 // We can't do this reliably if integer based booleans have different contents
12124 // to floating point based booleans. This is because we can't tell whether we
12125 // have an integer-based boolean or a floating-point-based boolean unless we
12126 // can find the SETCC that produced it and inspect its operands. This is
12127 // fairly easy if C is the SETCC node, but it can potentially be
12128 // undiscoverable (or not reasonably discoverable). For example, it could be
12129 // in another basic block or it could require searching a complicated
12130 // expression.
12131 if (CondVT.isInteger() &&
12132 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12134 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12136 C1->isZero() && C2->isOne()) {
12137 SDValue NotCond =
12138 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
12139 if (VT.bitsEq(CondVT))
12140 return NotCond;
12141 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12142 }
12143
12144 return SDValue();
12145 }
12146
12147 // Only do this before legalization to avoid conflicting with target-specific
12148 // transforms in the other direction (create a select from a zext/sext). There
12149 // is also a target-independent combine here in DAGCombiner in the other
12150 // direction for (select Cond, -1, 0) when the condition is not i1.
12151 assert(CondVT == MVT::i1 && !LegalOperations);
12152
12153 // select Cond, 1, 0 --> zext (Cond)
12154 if (C1->isOne() && C2->isZero())
12155 return DAG.getZExtOrTrunc(Cond, DL, VT);
12156
12157 // select Cond, -1, 0 --> sext (Cond)
12158 if (C1->isAllOnes() && C2->isZero())
12159 return DAG.getSExtOrTrunc(Cond, DL, VT);
12160
12161 // select Cond, 0, 1 --> zext (!Cond)
12162 if (C1->isZero() && C2->isOne()) {
12163 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12164 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12165 return NotCond;
12166 }
12167
12168 // select Cond, 0, -1 --> sext (!Cond)
12169 if (C1->isZero() && C2->isAllOnes()) {
12170 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12171 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12172 return NotCond;
12173 }
12174
12175 // Use a target hook because some targets may prefer to transform in the
12176 // other direction.
12178 return SDValue();
12179
12180 // For any constants that differ by 1, we can transform the select into
12181 // an extend and add.
12182 const APInt &C1Val = C1->getAPIntValue();
12183 const APInt &C2Val = C2->getAPIntValue();
12184
12185 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12186 if (C1Val - 1 == C2Val) {
12187 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12188 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12189 }
12190
12191 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12192 if (C1Val + 1 == C2Val) {
12193 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12194 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12195 }
12196
12197 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12198 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12199 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12200 SDValue ShAmtC =
12201 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12202 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12203 }
12204
12205 // select Cond, -1, C --> or (sext Cond), C
12206 if (C1->isAllOnes()) {
12207 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12208 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12209 }
12210
12211 // select Cond, C, -1 --> or (sext (not Cond)), C
12212 if (C2->isAllOnes()) {
12213 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12214 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12215 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12216 }
12217
12219 return V;
12220
12221 return SDValue();
12222}
12223
12224template <class MatchContextClass>
12226 SelectionDAG &DAG) {
12227 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12228 N->getOpcode() == ISD::VP_SELECT) &&
12229 "Expected a (v)(vp.)select");
12230 SDValue Cond = N->getOperand(0);
12231 SDValue T = N->getOperand(1), F = N->getOperand(2);
12232 EVT VT = N->getValueType(0);
12233 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12234 MatchContextClass matcher(DAG, TLI, N);
12235
12236 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12237 return SDValue();
12238
12239 // select Cond, Cond, F --> or Cond, freeze(F)
12240 // select Cond, 1, F --> or Cond, freeze(F)
12241 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12242 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12243
12244 // select Cond, T, Cond --> and Cond, freeze(T)
12245 // select Cond, T, 0 --> and Cond, freeze(T)
12246 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12247 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12248
12249 // select Cond, T, 1 --> or (not Cond), freeze(T)
12250 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12251 SDValue NotCond =
12252 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12253 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12254 }
12255
12256 // select Cond, 0, F --> and (not Cond), freeze(F)
12257 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12258 SDValue NotCond =
12259 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12260 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12261 }
12262
12263 return SDValue();
12264}
12265
12267 SDValue N0 = N->getOperand(0);
12268 SDValue N1 = N->getOperand(1);
12269 SDValue N2 = N->getOperand(2);
12270 EVT VT = N->getValueType(0);
12271 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12272
12273 SDValue Cond0, Cond1;
12274 ISD::CondCode CC;
12275 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12276 m_CondCode(CC)))) ||
12277 VT != Cond0.getValueType())
12278 return SDValue();
12279
12280 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12281 // compare is inverted from that pattern ("Cond0 s> -1").
12282 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12283 ; // This is the pattern we are looking for.
12284 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12285 std::swap(N1, N2);
12286 else
12287 return SDValue();
12288
12289 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12290 if (isNullOrNullSplat(N2)) {
12291 SDLoc DL(N);
12292 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12293 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12294 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12295 }
12296
12297 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12298 if (isAllOnesOrAllOnesSplat(N1)) {
12299 SDLoc DL(N);
12300 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12301 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12302 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12303 }
12304
12305 // If we have to invert the sign bit mask, only do that transform if the
12306 // target has a bitwise 'and not' instruction (the invert is free).
12307 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12308 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12309 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12310 SDLoc DL(N);
12311 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12312 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12313 SDValue Not = DAG.getNOT(DL, Sra, VT);
12314 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12315 }
12316
12317 // TODO: There's another pattern in this family, but it may require
12318 // implementing hasOrNot() to check for profitability:
12319 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12320
12321 return SDValue();
12322}
12323
12324// Match SELECTs with absolute difference patterns.
12325// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12326// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12327// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12328// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12329SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12330 SDValue False, ISD::CondCode CC,
12331 const SDLoc &DL) {
12332 bool IsSigned = isSignedIntSetCC(CC);
12333 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12334 EVT VT = LHS.getValueType();
12335
12336 if (LegalOperations && !hasOperation(ABDOpc, VT))
12337 return SDValue();
12338
12339 switch (CC) {
12340 case ISD::SETGT:
12341 case ISD::SETGE:
12342 case ISD::SETUGT:
12343 case ISD::SETUGE:
12344 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12346 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12347 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12348 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12349 hasOperation(ABDOpc, VT))
12350 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12351 break;
12352 case ISD::SETLT:
12353 case ISD::SETLE:
12354 case ISD::SETULT:
12355 case ISD::SETULE:
12356 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12358 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12359 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12360 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12361 hasOperation(ABDOpc, VT))
12362 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12363 break;
12364 default:
12365 break;
12366 }
12367
12368 return SDValue();
12369}
12370
12371// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12372// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12373SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12374 SDValue False, ISD::CondCode CC,
12375 const SDLoc &DL) {
12376 APInt C;
12377 EVT VT = True.getValueType();
12378 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12379 if (CC == ISD::SETUGT && LHS == False &&
12380 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12381 SDValue AddC = DAG.getConstant(~C, DL, VT);
12382 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12383 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12384 }
12385 if (CC == ISD::SETULT && LHS == True &&
12386 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12387 SDValue AddC = DAG.getConstant(-C, DL, VT);
12388 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12389 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12390 }
12391 }
12392 return SDValue();
12393}
12394
12395SDValue DAGCombiner::visitSELECT(SDNode *N) {
12396 SDValue N0 = N->getOperand(0);
12397 SDValue N1 = N->getOperand(1);
12398 SDValue N2 = N->getOperand(2);
12399 EVT VT = N->getValueType(0);
12400 EVT VT0 = N0.getValueType();
12401 SDLoc DL(N);
12402 SDNodeFlags Flags = N->getFlags();
12403
12404 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12405 return V;
12406
12408 return V;
12409
12410 // select (not Cond), N1, N2 -> select Cond, N2, N1
12411 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12412 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12413
12414 if (SDValue V = foldSelectOfConstants(N))
12415 return V;
12416
12417 // If we can fold this based on the true/false value, do so.
12418 if (SimplifySelectOps(N, N1, N2))
12419 return SDValue(N, 0); // Don't revisit N.
12420
12421 if (VT0 == MVT::i1) {
12422 // The code in this block deals with the following 2 equivalences:
12423 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12424 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12425 // The target can specify its preferred form with the
12426 // shouldNormalizeToSelectSequence() callback. However we always transform
12427 // to the right anyway if we find the inner select exists in the DAG anyway
12428 // and we always transform to the left side if we know that we can further
12429 // optimize the combination of the conditions.
12430 bool normalizeToSequence =
12432 // select (and Cond0, Cond1), X, Y
12433 // -> select Cond0, (select Cond1, X, Y), Y
12434 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12435 SDValue Cond0 = N0->getOperand(0);
12436 SDValue Cond1 = N0->getOperand(1);
12437 SDValue InnerSelect =
12438 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12439 if (normalizeToSequence || !InnerSelect.use_empty())
12440 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12441 InnerSelect, N2, Flags);
12442 // Cleanup on failure.
12443 if (InnerSelect.use_empty())
12444 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12445 }
12446 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12447 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12448 SDValue Cond0 = N0->getOperand(0);
12449 SDValue Cond1 = N0->getOperand(1);
12450 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12451 Cond1, N1, N2, Flags);
12452 if (normalizeToSequence || !InnerSelect.use_empty())
12453 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12454 InnerSelect, Flags);
12455 // Cleanup on failure.
12456 if (InnerSelect.use_empty())
12457 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12458 }
12459
12460 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12461 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12462 SDValue N1_0 = N1->getOperand(0);
12463 SDValue N1_1 = N1->getOperand(1);
12464 SDValue N1_2 = N1->getOperand(2);
12465 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12466 // Create the actual and node if we can generate good code for it.
12467 if (!normalizeToSequence) {
12468 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12469 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12470 N2, Flags);
12471 }
12472 // Otherwise see if we can optimize the "and" to a better pattern.
12473 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12474 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12475 N2, Flags);
12476 }
12477 }
12478 }
12479 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12480 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12481 SDValue N2_0 = N2->getOperand(0);
12482 SDValue N2_1 = N2->getOperand(1);
12483 SDValue N2_2 = N2->getOperand(2);
12484 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12485 // Create the actual or node if we can generate good code for it.
12486 if (!normalizeToSequence) {
12487 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12488 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12489 N2_2, Flags);
12490 }
12491 // Otherwise see if we can optimize to a better pattern.
12492 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12493 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12494 N2_2, Flags);
12495 }
12496 }
12497
12498 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12499 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12500 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12501 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12502 N2.getOperand(1) == N1.getOperand(0) &&
12503 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12504 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12505
12506 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12507 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12508 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12509 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12510 N2.getOperand(1) == N1.getOperand(0) &&
12511 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12512 return DAG.getNegative(
12513 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12514 DL, VT);
12515 }
12516
12517 // Fold selects based on a setcc into other things, such as min/max/abs.
12518 if (N0.getOpcode() == ISD::SETCC) {
12519 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12521
12522 // select (fcmp lt x, y), x, y -> fminnum x, y
12523 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12524 //
12525 // This is OK if we don't care what happens if either operand is a NaN.
12526 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12527 if (SDValue FMinMax =
12528 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12529 return FMinMax;
12530
12531 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12532 // This is conservatively limited to pre-legal-operations to give targets
12533 // a chance to reverse the transform if they want to do that. Also, it is
12534 // unlikely that the pattern would be formed late, so it's probably not
12535 // worth going through the other checks.
12536 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12537 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12538 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12539 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12540 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12541 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12542 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12543 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12544 //
12545 // The IR equivalent of this transform would have this form:
12546 // %a = add %x, C
12547 // %c = icmp ugt %x, ~C
12548 // %r = select %c, -1, %a
12549 // =>
12550 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12551 // %u0 = extractvalue %u, 0
12552 // %u1 = extractvalue %u, 1
12553 // %r = select %u1, -1, %u0
12554 SDVTList VTs = DAG.getVTList(VT, VT0);
12555 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12556 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12557 }
12558 }
12559
12560 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12561 (!LegalOperations &&
12563 // Any flags available in a select/setcc fold will be on the setcc as they
12564 // migrated from fcmp
12565 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12566 N0.getOperand(2), N0->getFlags());
12567 }
12568
12569 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12570 return ABD;
12571
12572 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12573 return NewSel;
12574
12575 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12576 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12577 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12578 return UMin;
12579 }
12580
12581 if (!VT.isVector())
12582 if (SDValue BinOp = foldSelectOfBinops(N))
12583 return BinOp;
12584
12585 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12586 return R;
12587
12588 return SDValue();
12589}
12590
12591// This function assumes all the vselect's arguments are CONCAT_VECTOR
12592// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12594 SDLoc DL(N);
12595 SDValue Cond = N->getOperand(0);
12596 SDValue LHS = N->getOperand(1);
12597 SDValue RHS = N->getOperand(2);
12598 EVT VT = N->getValueType(0);
12599 int NumElems = VT.getVectorNumElements();
12600 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12601 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12602 Cond.getOpcode() == ISD::BUILD_VECTOR);
12603
12604 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12605 // binary ones here.
12606 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12607 return SDValue();
12608
12609 // We're sure we have an even number of elements due to the
12610 // concat_vectors we have as arguments to vselect.
12611 // Skip BV elements until we find one that's not an UNDEF
12612 // After we find an UNDEF element, keep looping until we get to half the
12613 // length of the BV and see if all the non-undef nodes are the same.
12614 ConstantSDNode *BottomHalf = nullptr;
12615 for (int i = 0; i < NumElems / 2; ++i) {
12616 if (Cond->getOperand(i)->isUndef())
12617 continue;
12618
12619 if (BottomHalf == nullptr)
12620 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12621 else if (Cond->getOperand(i).getNode() != BottomHalf)
12622 return SDValue();
12623 }
12624
12625 // Do the same for the second half of the BuildVector
12626 ConstantSDNode *TopHalf = nullptr;
12627 for (int i = NumElems / 2; i < NumElems; ++i) {
12628 if (Cond->getOperand(i)->isUndef())
12629 continue;
12630
12631 if (TopHalf == nullptr)
12632 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12633 else if (Cond->getOperand(i).getNode() != TopHalf)
12634 return SDValue();
12635 }
12636
12637 assert(TopHalf && BottomHalf &&
12638 "One half of the selector was all UNDEFs and the other was all the "
12639 "same value. This should have been addressed before this function.");
12640 return DAG.getNode(
12642 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12643 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12644}
12645
12646bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12647 SelectionDAG &DAG, const SDLoc &DL) {
12648
12649 // Only perform the transformation when existing operands can be reused.
12650 if (IndexIsScaled)
12651 return false;
12652
12653 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12654 return false;
12655
12656 EVT VT = BasePtr.getValueType();
12657
12658 if (SDValue SplatVal = DAG.getSplatValue(Index);
12659 SplatVal && !isNullConstant(SplatVal) &&
12660 SplatVal.getValueType() == VT) {
12661 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12662 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12663 return true;
12664 }
12665
12666 if (Index.getOpcode() != ISD::ADD)
12667 return false;
12668
12669 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12670 SplatVal && SplatVal.getValueType() == VT) {
12671 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12672 Index = Index.getOperand(1);
12673 return true;
12674 }
12675 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12676 SplatVal && SplatVal.getValueType() == VT) {
12677 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12678 Index = Index.getOperand(0);
12679 return true;
12680 }
12681 return false;
12682}
12683
12684// Fold sext/zext of index into index type.
12685bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12686 SelectionDAG &DAG) {
12687 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12688
12689 // It's always safe to look through zero extends.
12690 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12691 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12692 IndexType = ISD::UNSIGNED_SCALED;
12693 Index = Index.getOperand(0);
12694 return true;
12695 }
12696 if (ISD::isIndexTypeSigned(IndexType)) {
12697 IndexType = ISD::UNSIGNED_SCALED;
12698 return true;
12699 }
12700 }
12701
12702 // It's only safe to look through sign extends when Index is signed.
12703 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12704 ISD::isIndexTypeSigned(IndexType) &&
12705 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12706 Index = Index.getOperand(0);
12707 return true;
12708 }
12709
12710 return false;
12711}
12712
12713SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12714 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12715 SDValue Mask = MSC->getMask();
12716 SDValue Chain = MSC->getChain();
12717 SDValue Index = MSC->getIndex();
12718 SDValue Scale = MSC->getScale();
12719 SDValue StoreVal = MSC->getValue();
12720 SDValue BasePtr = MSC->getBasePtr();
12721 SDValue VL = MSC->getVectorLength();
12722 ISD::MemIndexType IndexType = MSC->getIndexType();
12723 SDLoc DL(N);
12724
12725 // Zap scatters with a zero mask.
12727 return Chain;
12728
12729 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12730 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12731 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12732 DL, Ops, MSC->getMemOperand(), IndexType);
12733 }
12734
12735 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12736 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12737 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12738 DL, Ops, MSC->getMemOperand(), IndexType);
12739 }
12740
12741 return SDValue();
12742}
12743
12744SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12745 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12746 SDValue Mask = MSC->getMask();
12747 SDValue Chain = MSC->getChain();
12748 SDValue Index = MSC->getIndex();
12749 SDValue Scale = MSC->getScale();
12750 SDValue StoreVal = MSC->getValue();
12751 SDValue BasePtr = MSC->getBasePtr();
12752 ISD::MemIndexType IndexType = MSC->getIndexType();
12753 SDLoc DL(N);
12754
12755 // Zap scatters with a zero mask.
12757 return Chain;
12758
12759 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12760 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12761 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12762 DL, Ops, MSC->getMemOperand(), IndexType,
12763 MSC->isTruncatingStore());
12764 }
12765
12766 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12767 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12768 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12769 DL, Ops, MSC->getMemOperand(), IndexType,
12770 MSC->isTruncatingStore());
12771 }
12772
12773 return SDValue();
12774}
12775
12776SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12777 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12778 SDValue Mask = MST->getMask();
12779 SDValue Chain = MST->getChain();
12780 SDValue Value = MST->getValue();
12781 SDValue Ptr = MST->getBasePtr();
12782
12783 // Zap masked stores with a zero mask.
12785 return Chain;
12786
12787 // Remove a masked store if base pointers and masks are equal.
12788 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12789 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12790 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12791 !MST->getBasePtr().isUndef() &&
12792 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12793 MST1->getMemoryVT().getStoreSize()) ||
12795 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12796 MST->getMemoryVT().getStoreSize())) {
12797 CombineTo(MST1, MST1->getChain());
12798 if (N->getOpcode() != ISD::DELETED_NODE)
12799 AddToWorklist(N);
12800 return SDValue(N, 0);
12801 }
12802 }
12803
12804 // If this is a masked load with an all ones mask, we can use a unmasked load.
12805 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12806 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12807 !MST->isCompressingStore() && !MST->isTruncatingStore())
12808 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12809 MST->getBasePtr(), MST->getPointerInfo(),
12810 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12811 MST->getAAInfo());
12812
12813 // Try transforming N to an indexed store.
12814 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12815 return SDValue(N, 0);
12816
12817 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12818 Value.getValueType().isInteger() &&
12820 !cast<ConstantSDNode>(Value)->isOpaque())) {
12821 APInt TruncDemandedBits =
12822 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12824
12825 // See if we can simplify the operation with
12826 // SimplifyDemandedBits, which only works if the value has a single use.
12827 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12828 // Re-visit the store if anything changed and the store hasn't been merged
12829 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12830 // node back to the worklist if necessary, but we also need to re-visit
12831 // the Store node itself.
12832 if (N->getOpcode() != ISD::DELETED_NODE)
12833 AddToWorklist(N);
12834 return SDValue(N, 0);
12835 }
12836 }
12837
12838 // If this is a TRUNC followed by a masked store, fold this into a masked
12839 // truncating store. We can do this even if this is already a masked
12840 // truncstore.
12841 // TODO: Try combine to masked compress store if possiable.
12842 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12843 MST->isUnindexed() && !MST->isCompressingStore() &&
12844 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12845 MST->getMemoryVT(), LegalOperations)) {
12846 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12847 Value.getOperand(0).getValueType());
12848 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12849 MST->getOffset(), Mask, MST->getMemoryVT(),
12850 MST->getMemOperand(), MST->getAddressingMode(),
12851 /*IsTruncating=*/true);
12852 }
12853
12854 return SDValue();
12855}
12856
12857SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12858 auto *SST = cast<VPStridedStoreSDNode>(N);
12859 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12860 // Combine strided stores with unit-stride to a regular VP store.
12861 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12862 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12863 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12864 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12865 SST->getVectorLength(), SST->getMemoryVT(),
12866 SST->getMemOperand(), SST->getAddressingMode(),
12867 SST->isTruncatingStore(), SST->isCompressingStore());
12868 }
12869 return SDValue();
12870}
12871
12872SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12873 SDLoc DL(N);
12874 SDValue Vec = N->getOperand(0);
12875 SDValue Mask = N->getOperand(1);
12876 SDValue Passthru = N->getOperand(2);
12877 EVT VecVT = Vec.getValueType();
12878
12879 bool HasPassthru = !Passthru.isUndef();
12880
12881 APInt SplatVal;
12882 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12883 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12884
12885 if (Vec.isUndef() || Mask.isUndef())
12886 return Passthru;
12887
12888 // No need for potentially expensive compress if the mask is constant.
12891 EVT ScalarVT = VecVT.getVectorElementType();
12892 unsigned NumSelected = 0;
12893 unsigned NumElmts = VecVT.getVectorNumElements();
12894 for (unsigned I = 0; I < NumElmts; ++I) {
12895 SDValue MaskI = Mask.getOperand(I);
12896 // We treat undef mask entries as "false".
12897 if (MaskI.isUndef())
12898 continue;
12899
12900 if (TLI.isConstTrueVal(MaskI)) {
12901 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12902 DAG.getVectorIdxConstant(I, DL));
12903 Ops.push_back(VecI);
12904 NumSelected++;
12905 }
12906 }
12907 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12908 SDValue Val =
12909 HasPassthru
12910 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12911 DAG.getVectorIdxConstant(Rest, DL))
12912 : DAG.getUNDEF(ScalarVT);
12913 Ops.push_back(Val);
12914 }
12915 return DAG.getBuildVector(VecVT, DL, Ops);
12916 }
12917
12918 return SDValue();
12919}
12920
12921SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12922 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12923 SDValue Mask = MGT->getMask();
12924 SDValue Chain = MGT->getChain();
12925 SDValue Index = MGT->getIndex();
12926 SDValue Scale = MGT->getScale();
12927 SDValue BasePtr = MGT->getBasePtr();
12928 SDValue VL = MGT->getVectorLength();
12929 ISD::MemIndexType IndexType = MGT->getIndexType();
12930 SDLoc DL(N);
12931
12932 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12933 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12934 return DAG.getGatherVP(
12935 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12936 Ops, MGT->getMemOperand(), IndexType);
12937 }
12938
12939 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12940 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12941 return DAG.getGatherVP(
12942 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12943 Ops, MGT->getMemOperand(), IndexType);
12944 }
12945
12946 return SDValue();
12947}
12948
12949SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12950 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12951 SDValue Mask = MGT->getMask();
12952 SDValue Chain = MGT->getChain();
12953 SDValue Index = MGT->getIndex();
12954 SDValue Scale = MGT->getScale();
12955 SDValue PassThru = MGT->getPassThru();
12956 SDValue BasePtr = MGT->getBasePtr();
12957 ISD::MemIndexType IndexType = MGT->getIndexType();
12958 SDLoc DL(N);
12959
12960 // Zap gathers with a zero mask.
12962 return CombineTo(N, PassThru, MGT->getChain());
12963
12964 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12965 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12966 return DAG.getMaskedGather(
12967 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12968 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12969 }
12970
12971 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12972 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12973 return DAG.getMaskedGather(
12974 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12975 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12976 }
12977
12978 return SDValue();
12979}
12980
12981SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12982 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12983 SDValue Mask = MLD->getMask();
12984
12985 // Zap masked loads with a zero mask.
12987 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12988
12989 // If this is a masked load with an all ones mask, we can use a unmasked load.
12990 // FIXME: Can we do this for indexed, expanding, or extending loads?
12991 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12992 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12993 SDValue NewLd = DAG.getLoad(
12994 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12995 MLD->getPointerInfo(), MLD->getBaseAlign(),
12996 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12997 return CombineTo(N, NewLd, NewLd.getValue(1));
12998 }
12999
13000 // Try transforming N to an indexed load.
13001 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13002 return SDValue(N, 0);
13003
13004 return SDValue();
13005}
13006
13007SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
13008 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
13009 SDValue Chain = HG->getChain();
13010 SDValue Inc = HG->getInc();
13011 SDValue Mask = HG->getMask();
13012 SDValue BasePtr = HG->getBasePtr();
13013 SDValue Index = HG->getIndex();
13014 SDLoc DL(HG);
13015
13016 EVT MemVT = HG->getMemoryVT();
13017 EVT DataVT = Index.getValueType();
13018 MachineMemOperand *MMO = HG->getMemOperand();
13019 ISD::MemIndexType IndexType = HG->getIndexType();
13020
13022 return Chain;
13023
13024 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
13025 refineIndexType(Index, IndexType, DataVT, DAG)) {
13026 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
13027 HG->getScale(), HG->getIntID()};
13028 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
13029 MMO, IndexType);
13030 }
13031
13032 return SDValue();
13033}
13034
13035SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
13036 if (SDValue Res = foldPartialReduceMLAMulOp(N))
13037 return Res;
13038 if (SDValue Res = foldPartialReduceAdd(N))
13039 return Res;
13040 return SDValue();
13041}
13042
13043// partial_reduce_*mla(acc, mul(*ext(a), *ext(b)), splat(1))
13044// -> partial_reduce_*mla(acc, a, b)
13045//
13046// partial_reduce_*mla(acc, mul(*ext(x), splat(C)), splat(1))
13047// -> partial_reduce_*mla(acc, x, splat(C))
13048//
13049// partial_reduce_*mla(acc, sel(p, mul(*ext(a), *ext(b)), splat(0)), splat(1))
13050// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), b)
13051//
13052// partial_reduce_*mla(acc, sel(p, mul(*ext(a), splat(C)), splat(0)), splat(1))
13053// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), splat(C))
13054SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
13055 SDLoc DL(N);
13056 auto *Context = DAG.getContext();
13057 SDValue Acc = N->getOperand(0);
13058 SDValue Op1 = N->getOperand(1);
13059 SDValue Op2 = N->getOperand(2);
13060 unsigned Opc = Op1->getOpcode();
13061
13062 // Handle predication by moving the SELECT into the operand of the MUL.
13063 SDValue Pred;
13064 if (Opc == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13065 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13066 Pred = Op1->getOperand(0);
13067 Op1 = Op1->getOperand(1);
13068 Opc = Op1->getOpcode();
13069 }
13070
13071 if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL)
13072 return SDValue();
13073
13074 SDValue LHS = Op1->getOperand(0);
13075 SDValue RHS = Op1->getOperand(1);
13076
13077 // Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
13078 if (Opc == ISD::SHL) {
13079 APInt C;
13080 if (!ISD::isConstantSplatVector(RHS.getNode(), C))
13081 return SDValue();
13082
13083 RHS =
13084 DAG.getSplatVector(RHS.getValueType(), DL,
13085 DAG.getConstant(APInt(C.getBitWidth(), 1).shl(C), DL,
13086 RHS.getValueType().getScalarType()));
13087 Opc = ISD::MUL;
13088 }
13089
13090 if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(Op2)) &&
13092 return SDValue();
13093
13094 auto IsIntOrFPExtOpcode = [](unsigned int Opcode) {
13095 return (ISD::isExtOpcode(Opcode) || Opcode == ISD::FP_EXTEND);
13096 };
13097
13098 unsigned LHSOpcode = LHS->getOpcode();
13099 if (!IsIntOrFPExtOpcode(LHSOpcode))
13100 return SDValue();
13101
13102 SDValue LHSExtOp = LHS->getOperand(0);
13103 EVT LHSExtOpVT = LHSExtOp.getValueType();
13104
13105 // When Pred is non-zero, set Op = select(Pred, Op, splat(0)) and freeze
13106 // OtherOp to keep the same semantics when moving the selects into the MUL
13107 // operands.
13108 auto ApplyPredicate = [&](SDValue &Op, SDValue &OtherOp) {
13109 if (Pred) {
13110 EVT OpVT = Op.getValueType();
13111 SDValue Zero = OpVT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, OpVT)
13112 : DAG.getConstant(0, DL, OpVT);
13113 Op = DAG.getSelect(DL, OpVT, Pred, Op, Zero);
13114 OtherOp = DAG.getFreeze(OtherOp);
13115 }
13116 };
13117
13118 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
13119 // -> partial_reduce_*mla(acc, x, C)
13120 APInt C;
13121 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
13122 // TODO: Make use of partial_reduce_sumla here
13123 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
13124 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13125 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
13126 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
13127 return SDValue();
13128
13129 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13132
13133 // Only perform these combines if the target supports folding
13134 // the extends into the operation.
13136 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13137 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13138 return SDValue();
13139
13140 SDValue C = DAG.getConstant(CTrunc, DL, LHSExtOpVT);
13141 ApplyPredicate(C, LHSExtOp);
13142 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp, C);
13143 }
13144
13145 unsigned RHSOpcode = RHS->getOpcode();
13146 if (!IsIntOrFPExtOpcode(RHSOpcode))
13147 return SDValue();
13148
13149 SDValue RHSExtOp = RHS->getOperand(0);
13150 if (LHSExtOpVT != RHSExtOp.getValueType())
13151 return SDValue();
13152
13153 unsigned NewOpc;
13154 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13155 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13156 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13157 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13158 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13160 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13162 std::swap(LHSExtOp, RHSExtOp);
13163 } else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) {
13164 NewOpc = ISD::PARTIAL_REDUCE_FMLA;
13165 } else
13166 return SDValue();
13167 // For a 2-stage extend the signedness of both of the extends must match
13168 // If the mul has the same type, there is no outer extend, and thus we
13169 // can simply use the inner extends to pick the result node.
13170 // TODO: extend to handle nonneg zext as sext
13171 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13172 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13173 NewOpc != N->getOpcode())
13174 return SDValue();
13175
13176 // Only perform these combines if the target supports folding
13177 // the extends into the operation.
13179 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13180 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13181 return SDValue();
13182
13183 ApplyPredicate(RHSExtOp, LHSExtOp);
13184 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
13185}
13186
13187// partial.reduce.*mla(acc, *ext(op), splat(1))
13188// -> partial.reduce.*mla(acc, op, splat(trunc(1)))
13189// partial.reduce.sumla(acc, sext(op), splat(1))
13190// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13191//
13192// partial.reduce.*mla(acc, sel(p, *ext(op), splat(0)), splat(1))
13193// -> partial.reduce.*mla(acc, sel(p, op, splat(0)), splat(trunc(1)))
13194SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13195 SDLoc DL(N);
13196 SDValue Acc = N->getOperand(0);
13197 SDValue Op1 = N->getOperand(1);
13198 SDValue Op2 = N->getOperand(2);
13199
13201 return SDValue();
13202
13203 SDValue Pred;
13204 unsigned Op1Opcode = Op1.getOpcode();
13205 if (Op1Opcode == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13206 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13207 Pred = Op1->getOperand(0);
13208 Op1 = Op1->getOperand(1);
13209 Op1Opcode = Op1->getOpcode();
13210 }
13211
13212 if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
13213 return SDValue();
13214
13215 bool Op1IsSigned =
13216 Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND;
13217 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13218 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13219 if (Op1IsSigned != NodeIsSigned &&
13220 Op1.getValueType().getVectorElementType() != AccElemVT)
13221 return SDValue();
13222
13223 unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13225 : Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA
13227
13228 SDValue UnextOp1 = Op1.getOperand(0);
13229 EVT UnextOp1VT = UnextOp1.getValueType();
13230 auto *Context = DAG.getContext();
13232 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13233 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13234 return SDValue();
13235
13236 SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13237 ? DAG.getConstantFP(1, DL, UnextOp1VT)
13238 : DAG.getConstant(1, DL, UnextOp1VT);
13239
13240 if (Pred) {
13241 SDValue Zero = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13242 ? DAG.getConstantFP(0, DL, UnextOp1VT)
13243 : DAG.getConstant(0, DL, UnextOp1VT);
13244 Constant = DAG.getSelect(DL, UnextOp1VT, Pred, Constant, Zero);
13245 }
13246 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13247 Constant);
13248}
13249
13250SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13251 auto *SLD = cast<VPStridedLoadSDNode>(N);
13252 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13253 // Combine strided loads with unit-stride to a regular VP load.
13254 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13255 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13256 SDValue NewLd = DAG.getLoadVP(
13257 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13258 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13259 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13260 SLD->getMemOperand(), SLD->isExpandingLoad());
13261 return CombineTo(N, NewLd, NewLd.getValue(1));
13262 }
13263 return SDValue();
13264}
13265
13266/// A vector select of 2 constant vectors can be simplified to math/logic to
13267/// avoid a variable select instruction and possibly avoid constant loads.
13268SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13269 SDValue Cond = N->getOperand(0);
13270 SDValue N1 = N->getOperand(1);
13271 SDValue N2 = N->getOperand(2);
13272 EVT VT = N->getValueType(0);
13273 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13277 return SDValue();
13278
13279 // Check if we can use the condition value to increment/decrement a single
13280 // constant value. This simplifies a select to an add and removes a constant
13281 // load/materialization from the general case.
13282 bool AllAddOne = true;
13283 bool AllSubOne = true;
13284 unsigned Elts = VT.getVectorNumElements();
13285 for (unsigned i = 0; i != Elts; ++i) {
13286 SDValue N1Elt = N1.getOperand(i);
13287 SDValue N2Elt = N2.getOperand(i);
13288 if (N1Elt.isUndef())
13289 continue;
13290 // N2 should not contain undef values since it will be reused in the fold.
13291 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13292 AllAddOne = false;
13293 AllSubOne = false;
13294 break;
13295 }
13296
13297 const APInt &C1 = N1Elt->getAsAPIntVal();
13298 const APInt &C2 = N2Elt->getAsAPIntVal();
13299 if (C1 != C2 + 1)
13300 AllAddOne = false;
13301 if (C1 != C2 - 1)
13302 AllSubOne = false;
13303 }
13304
13305 // Further simplifications for the extra-special cases where the constants are
13306 // all 0 or all -1 should be implemented as folds of these patterns.
13307 SDLoc DL(N);
13308 if (AllAddOne || AllSubOne) {
13309 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13310 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13311 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13312 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13313 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13314 }
13315
13316 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13317 APInt Pow2C;
13318 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13319 isNullOrNullSplat(N2)) {
13320 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13321 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13322 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13323 }
13324
13326 return V;
13327
13328 // The general case for select-of-constants:
13329 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13330 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13331 // leave that to a machine-specific pass.
13332 return SDValue();
13333}
13334
13335SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13336 SDValue N0 = N->getOperand(0);
13337 SDValue N1 = N->getOperand(1);
13338 SDValue N2 = N->getOperand(2);
13339 SDLoc DL(N);
13340
13341 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13342 return V;
13343
13345 return V;
13346
13347 return SDValue();
13348}
13349
13351 SDValue FVal,
13352 const TargetLowering &TLI,
13353 SelectionDAG &DAG,
13354 const SDLoc &DL) {
13355 EVT VT = TVal.getValueType();
13356 if (!TLI.isTypeLegal(VT))
13357 return SDValue();
13358
13359 EVT CondVT = Cond.getValueType();
13360 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13361
13362 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13363 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13364 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13365 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13366
13367 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13368 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13369 return SDValue();
13370
13371 // select Cond, 0, 0 → 0
13372 if (IsTAllZero && IsFAllZero) {
13373 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13374 : DAG.getConstant(0, DL, VT);
13375 }
13376
13377 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13378 APInt TValAPInt;
13379 if (Cond.getOpcode() == ISD::SETCC &&
13380 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13381 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13382 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13383 TValAPInt.isOne() &&
13384 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13386 return SDValue();
13387 }
13388
13389 // To use the condition operand as a bitwise mask, it must have elements that
13390 // are the same size as the select elements. i.e, the condition operand must
13391 // have already been promoted from the IR select condition type <N x i1>.
13392 // Don't check if the types themselves are equal because that excludes
13393 // vector floating-point selects.
13394 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13395 return SDValue();
13396
13397 // Cond value must be 'sign splat' to be converted to a logical op.
13398 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13399 return SDValue();
13400
13401 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13402 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13403 Cond.getOpcode() == ISD::SETCC &&
13404 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13405 CondVT) {
13406 if (IsTAllZero || IsFAllOne) {
13407 SDValue CC = Cond.getOperand(2);
13409 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13410 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13411 InverseCC);
13412 std::swap(TVal, FVal);
13413 std::swap(IsTAllOne, IsFAllOne);
13414 std::swap(IsTAllZero, IsFAllZero);
13415 }
13416 }
13417
13419 "Select condition no longer all-sign bits");
13420
13421 // select Cond, -1, 0 → bitcast Cond
13422 if (IsTAllOne && IsFAllZero)
13423 return DAG.getBitcast(VT, Cond);
13424
13425 // select Cond, -1, x → or Cond, x
13426 if (IsTAllOne) {
13427 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13428 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13429 return DAG.getBitcast(VT, Or);
13430 }
13431
13432 // select Cond, x, 0 → and Cond, x
13433 if (IsFAllZero) {
13434 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13435 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13436 return DAG.getBitcast(VT, And);
13437 }
13438
13439 // select Cond, 0, x -> and not(Cond), x
13440 if (IsTAllZero &&
13442 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13443 SDValue And =
13444 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13445 return DAG.getBitcast(VT, And);
13446 }
13447
13448 return SDValue();
13449}
13450
13451SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13452 SDValue N0 = N->getOperand(0);
13453 SDValue N1 = N->getOperand(1);
13454 SDValue N2 = N->getOperand(2);
13455 EVT VT = N->getValueType(0);
13456 SDLoc DL(N);
13457
13458 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13459 return V;
13460
13462 return V;
13463
13464 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13465 if (!TLI.isTargetCanonicalSelect(N))
13466 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13467 return DAG.getSelect(DL, VT, F, N2, N1);
13468
13469 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13470 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13473 TLI.getBooleanContents(N0.getValueType()) ==
13475 return DAG.getNode(
13476 ISD::ADD, DL, N1.getValueType(), N2,
13477 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13478 }
13479
13480 // Canonicalize integer abs.
13481 // vselect (setg[te] X, 0), X, -X ->
13482 // vselect (setgt X, -1), X, -X ->
13483 // vselect (setl[te] X, 0), -X, X ->
13484 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13485 if (N0.getOpcode() == ISD::SETCC) {
13486 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13488 bool isAbs = false;
13489 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13490
13491 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13492 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13493 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13495 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13496 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13498
13499 if (isAbs) {
13501 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13502
13503 SDValue Shift = DAG.getNode(
13504 ISD::SRA, DL, VT, LHS,
13505 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13506 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13507 AddToWorklist(Shift.getNode());
13508 AddToWorklist(Add.getNode());
13509 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13510 }
13511
13512 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13513 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13514 //
13515 // This is OK if we don't care about what happens if either operand is a
13516 // NaN.
13517 //
13518 if (N0.hasOneUse() &&
13519 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13520 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13521 return FMinMax;
13522 }
13523
13524 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13525 return S;
13526 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13527 return S;
13528
13529 // If this select has a condition (setcc) with narrower operands than the
13530 // select, try to widen the compare to match the select width.
13531 // TODO: This should be extended to handle any constant.
13532 // TODO: This could be extended to handle non-loading patterns, but that
13533 // requires thorough testing to avoid regressions.
13534 if (isNullOrNullSplat(RHS)) {
13535 EVT NarrowVT = LHS.getValueType();
13537 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13538 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13539 unsigned WideWidth = WideVT.getScalarSizeInBits();
13540 bool IsSigned = isSignedIntSetCC(CC);
13541 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13542 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13543 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13544 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13545 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13546 // Both compare operands can be widened for free. The LHS can use an
13547 // extended load, and the RHS is a constant:
13548 // vselect (ext (setcc load(X), C)), N1, N2 -->
13549 // vselect (setcc extload(X), C'), N1, N2
13550 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13551 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13552 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13553 EVT WideSetCCVT = getSetCCResultType(WideVT);
13554 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13555 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13556 }
13557 }
13558
13559 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13560 return ABD;
13561
13562 // Match VSELECTs into add with unsigned saturation.
13563 if (hasOperation(ISD::UADDSAT, VT)) {
13564 // Check if one of the arms of the VSELECT is vector with all bits set.
13565 // If it's on the left side invert the predicate to simplify logic below.
13566 SDValue Other;
13567 ISD::CondCode SatCC = CC;
13569 Other = N2;
13570 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13571 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13572 Other = N1;
13573 }
13574
13575 if (Other && Other.getOpcode() == ISD::ADD) {
13576 SDValue CondLHS = LHS, CondRHS = RHS;
13577 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13578
13579 // Canonicalize condition operands.
13580 if (SatCC == ISD::SETUGE) {
13581 std::swap(CondLHS, CondRHS);
13582 SatCC = ISD::SETULE;
13583 }
13584
13585 // We can test against either of the addition operands.
13586 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13587 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13588 if (SatCC == ISD::SETULE && Other == CondRHS &&
13589 (OpLHS == CondLHS || OpRHS == CondLHS))
13590 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13591
13592 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13593 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13594 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13595 CondLHS == OpLHS) {
13596 // If the RHS is a constant we have to reverse the const
13597 // canonicalization.
13598 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13599 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13600 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13601 };
13602 if (SatCC == ISD::SETULE &&
13603 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13604 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13605 }
13606 }
13607 }
13608
13609 // Match VSELECTs into sub with unsigned saturation.
13610 if (hasOperation(ISD::USUBSAT, VT)) {
13611 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13612 // the left side invert the predicate to simplify logic below.
13613 SDValue Other;
13614 ISD::CondCode SatCC = CC;
13616 Other = N2;
13617 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13619 Other = N1;
13620 }
13621
13622 // zext(x) >= y ? trunc(zext(x) - y) : 0
13623 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13624 // zext(x) > y ? trunc(zext(x) - y) : 0
13625 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13626 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13627 Other.getOperand(0).getOpcode() == ISD::SUB &&
13628 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13629 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13630 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13631 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13632 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13633 DAG, DL))
13634 return R;
13635 }
13636
13637 if (Other && Other.getNumOperands() == 2) {
13638 SDValue CondRHS = RHS;
13639 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13640
13641 if (OpLHS == LHS) {
13642 // Look for a general sub with unsigned saturation first.
13643 // x >= y ? x-y : 0 --> usubsat x, y
13644 // x > y ? x-y : 0 --> usubsat x, y
13645 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13646 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13647 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13648
13649 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13650 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13651 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13652 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13653 // If the RHS is a constant we have to reverse the const
13654 // canonicalization.
13655 // x > C-1 ? x+-C : 0 --> usubsat x, C
13656 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13657 return (!Op && !Cond) ||
13658 (Op && Cond &&
13659 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13660 };
13661 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13662 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13663 /*AllowUndefs*/ true)) {
13664 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13665 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13666 }
13667
13668 // Another special case: If C was a sign bit, the sub has been
13669 // canonicalized into a xor.
13670 // FIXME: Would it be better to use computeKnownBits to
13671 // determine whether it's safe to decanonicalize the xor?
13672 // x s< 0 ? x^C : 0 --> usubsat x, C
13673 APInt SplatValue;
13674 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13675 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13677 SplatValue.isSignMask()) {
13678 // Note that we have to rebuild the RHS constant here to
13679 // ensure we don't rely on particular values of undef lanes.
13680 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13681 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13682 }
13683 }
13684 }
13685 }
13686 }
13687 }
13688
13689 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13690 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13691 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13692 return UMin;
13693 }
13694
13695 if (SimplifySelectOps(N, N1, N2))
13696 return SDValue(N, 0); // Don't revisit N.
13697
13698 // Fold (vselect all_ones, N1, N2) -> N1
13700 return N1;
13701 // Fold (vselect all_zeros, N1, N2) -> N2
13703 return N2;
13704
13705 // The ConvertSelectToConcatVector function is assuming both the above
13706 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13707 // and addressed.
13708 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13711 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13712 return CV;
13713 }
13714
13715 if (SDValue V = foldVSelectOfConstants(N))
13716 return V;
13717
13718 if (hasOperation(ISD::SRA, VT))
13720 return V;
13721
13723 return SDValue(N, 0);
13724
13725 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13726 return V;
13727
13728 return SDValue();
13729}
13730
13731SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13732 SDValue N0 = N->getOperand(0);
13733 SDValue N1 = N->getOperand(1);
13734 SDValue N2 = N->getOperand(2);
13735 SDValue N3 = N->getOperand(3);
13736 SDValue N4 = N->getOperand(4);
13737 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13738 SDLoc DL(N);
13739
13740 // fold select_cc lhs, rhs, x, x, cc -> x
13741 if (N2 == N3)
13742 return N2;
13743
13744 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13745 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13746 isNullConstant(N1))
13747 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13748
13749 // Determine if the condition we're dealing with is constant
13750 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13751 CC, DL, false)) {
13752 AddToWorklist(SCC.getNode());
13753
13754 // cond always true -> true val
13755 // cond always false -> false val
13756 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13757 return SCCC->isZero() ? N3 : N2;
13758
13759 // When the condition is UNDEF, just return the first operand. This is
13760 // coherent the DAG creation, no setcc node is created in this case
13761 if (SCC->isUndef())
13762 return N2;
13763
13764 // Fold to a simpler select_cc
13765 if (SCC.getOpcode() == ISD::SETCC) {
13766 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13767 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13768 SCC.getOperand(2), SCC->getFlags());
13769 }
13770 }
13771
13772 // If we can fold this based on the true/false value, do so.
13773 if (SimplifySelectOps(N, N2, N3))
13774 return SDValue(N, 0); // Don't revisit N.
13775
13776 // fold select_cc into other things, such as min/max/abs
13777 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13778}
13779
13780SDValue DAGCombiner::visitSETCC(SDNode *N) {
13781 // setcc is very commonly used as an argument to brcond. This pattern
13782 // also lend itself to numerous combines and, as a result, it is desired
13783 // we keep the argument to a brcond as a setcc as much as possible.
13784 bool PreferSetCC =
13785 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13786
13787 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13788 EVT VT = N->getValueType(0);
13789 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13790 SDLoc DL(N);
13791
13792 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13793 // If we prefer to have a setcc, and we don't, we'll try our best to
13794 // recreate one using rebuildSetCC.
13795 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13796 SDValue NewSetCC = rebuildSetCC(Combined);
13797
13798 // We don't have anything interesting to combine to.
13799 if (NewSetCC.getNode() == N)
13800 return SDValue();
13801
13802 if (NewSetCC)
13803 return NewSetCC;
13804 }
13805 return Combined;
13806 }
13807
13808 // Optimize
13809 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13810 // or
13811 // 2) (icmp eq/ne X, (rotate X, C1))
13812 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13813 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13814 // Then:
13815 // If C1 is a power of 2, then the rotate and shift+and versions are
13816 // equivilent, so we can interchange them depending on target preference.
13817 // Otherwise, if we have the shift+and version we can interchange srl/shl
13818 // which inturn affects the constant C0. We can use this to get better
13819 // constants again determined by target preference.
13820 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13821 auto IsAndWithShift = [](SDValue A, SDValue B) {
13822 return A.getOpcode() == ISD::AND &&
13823 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13824 A.getOperand(0) == B.getOperand(0);
13825 };
13826 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13827 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13828 B.getOperand(0) == A;
13829 };
13830 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13831 bool IsRotate = false;
13832
13833 // Find either shift+and or rotate pattern.
13834 if (IsAndWithShift(N0, N1)) {
13835 AndOrOp = N0;
13836 ShiftOrRotate = N1;
13837 } else if (IsAndWithShift(N1, N0)) {
13838 AndOrOp = N1;
13839 ShiftOrRotate = N0;
13840 } else if (IsRotateWithOp(N0, N1)) {
13841 IsRotate = true;
13842 AndOrOp = N0;
13843 ShiftOrRotate = N1;
13844 } else if (IsRotateWithOp(N1, N0)) {
13845 IsRotate = true;
13846 AndOrOp = N1;
13847 ShiftOrRotate = N0;
13848 }
13849
13850 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13851 (IsRotate || AndOrOp.hasOneUse())) {
13852 EVT OpVT = N0.getValueType();
13853 // Get constant shift/rotate amount and possibly mask (if its shift+and
13854 // variant).
13855 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13856 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13857 /*AllowTrunc*/ false);
13858 if (CNode == nullptr)
13859 return std::nullopt;
13860 return CNode->getAPIntValue();
13861 };
13862 std::optional<APInt> AndCMask =
13863 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13864 std::optional<APInt> ShiftCAmt =
13865 GetAPIntValue(ShiftOrRotate.getOperand(1));
13866 unsigned NumBits = OpVT.getScalarSizeInBits();
13867
13868 // We found constants.
13869 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13870 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13871 // Check that the constants meet the constraints.
13872 bool CanTransform = IsRotate;
13873 if (!CanTransform) {
13874 // Check that mask and shift compliment eachother
13875 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13876 // Check that we are comparing all bits
13877 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13878 // Check that the and mask is correct for the shift
13879 CanTransform &=
13880 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13881 }
13882
13883 // See if target prefers another shift/rotate opcode.
13884 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13885 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13886 // Transform is valid and we have a new preference.
13887 if (CanTransform && NewShiftOpc != ShiftOpc) {
13888 SDValue NewShiftOrRotate =
13889 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13890 ShiftOrRotate.getOperand(1));
13891 SDValue NewAndOrOp = SDValue();
13892
13893 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13894 APInt NewMask =
13895 NewShiftOpc == ISD::SHL
13896 ? APInt::getHighBitsSet(NumBits,
13897 NumBits - ShiftCAmt->getZExtValue())
13898 : APInt::getLowBitsSet(NumBits,
13899 NumBits - ShiftCAmt->getZExtValue());
13900 NewAndOrOp =
13901 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13902 DAG.getConstant(NewMask, DL, OpVT));
13903 } else {
13904 NewAndOrOp = ShiftOrRotate.getOperand(0);
13905 }
13906
13907 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13908 }
13909 }
13910 }
13911 }
13912 return SDValue();
13913}
13914
13915SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13916 SDValue LHS = N->getOperand(0);
13917 SDValue RHS = N->getOperand(1);
13918 SDValue Carry = N->getOperand(2);
13919 SDValue Cond = N->getOperand(3);
13920
13921 // If Carry is false, fold to a regular SETCC.
13922 if (isNullConstant(Carry))
13923 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13924
13925 return SDValue();
13926}
13927
13928/// Check if N satisfies:
13929/// N is used once.
13930/// N is a Load.
13931/// The load is compatible with ExtOpcode. It means
13932/// If load has explicit zero/sign extension, ExpOpcode must have the same
13933/// extension.
13934/// Otherwise returns true.
13935static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13936 if (!N.hasOneUse())
13937 return false;
13938
13939 if (!isa<LoadSDNode>(N))
13940 return false;
13941
13942 LoadSDNode *Load = cast<LoadSDNode>(N);
13943 ISD::LoadExtType LoadExt = Load->getExtensionType();
13944 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13945 return true;
13946
13947 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13948 // extension.
13949 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13950 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13951 return false;
13952
13953 return true;
13954}
13955
13956/// Fold
13957/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13958/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13959/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13960/// This function is called by the DAGCombiner when visiting sext/zext/aext
13961/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13963 SelectionDAG &DAG, const SDLoc &DL,
13964 CombineLevel Level) {
13965 unsigned Opcode = N->getOpcode();
13966 SDValue N0 = N->getOperand(0);
13967 EVT VT = N->getValueType(0);
13968 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13969 Opcode == ISD::ANY_EXTEND) &&
13970 "Expected EXTEND dag node in input!");
13971
13972 SDValue Cond, Op1, Op2;
13974 m_Value(Op2)))))
13975 return SDValue();
13976
13977 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13978 return SDValue();
13979
13980 auto ExtLoadOpcode = ISD::EXTLOAD;
13981 if (Opcode == ISD::SIGN_EXTEND)
13982 ExtLoadOpcode = ISD::SEXTLOAD;
13983 else if (Opcode == ISD::ZERO_EXTEND)
13984 ExtLoadOpcode = ISD::ZEXTLOAD;
13985
13986 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13987 // Combine2), so we should conservatively check the OperationAction.
13988 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13989 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13990 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13991 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13992 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13994 return SDValue();
13995
13996 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13997 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13998 return DAG.getSelect(DL, VT, Cond, Ext1, Ext2);
13999}
14000
14001/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
14002/// a build_vector of constants.
14003/// This function is called by the DAGCombiner when visiting sext/zext/aext
14004/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14005/// Vector extends are not folded if operations are legal; this is to
14006/// avoid introducing illegal build_vector dag nodes.
14008 const TargetLowering &TLI,
14009 SelectionDAG &DAG, bool LegalTypes) {
14010 unsigned Opcode = N->getOpcode();
14011 SDValue N0 = N->getOperand(0);
14012 EVT VT = N->getValueType(0);
14013
14014 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
14015 "Expected EXTEND dag node in input!");
14016
14017 // fold (sext c1) -> c1
14018 // fold (zext c1) -> c1
14019 // fold (aext c1) -> c1
14020 if (isa<ConstantSDNode>(N0))
14021 return DAG.getNode(Opcode, DL, VT, N0);
14022
14023 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14024 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
14025 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14026 if (N0->getOpcode() == ISD::SELECT) {
14027 SDValue Op1 = N0->getOperand(1);
14028 SDValue Op2 = N0->getOperand(2);
14029 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
14030 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
14031 // For any_extend, choose sign extension of the constants to allow a
14032 // possible further transform to sign_extend_inreg.i.e.
14033 //
14034 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
14035 // t2: i64 = any_extend t1
14036 // -->
14037 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
14038 // -->
14039 // t4: i64 = sign_extend_inreg t3
14040 unsigned FoldOpc = Opcode;
14041 if (FoldOpc == ISD::ANY_EXTEND)
14042 FoldOpc = ISD::SIGN_EXTEND;
14043 return DAG.getSelect(DL, VT, N0->getOperand(0),
14044 DAG.getNode(FoldOpc, DL, VT, Op1),
14045 DAG.getNode(FoldOpc, DL, VT, Op2));
14046 }
14047 }
14048
14049 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
14050 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
14051 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
14052 EVT SVT = VT.getScalarType();
14053 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
14055 return SDValue();
14056
14057 // We can fold this node into a build_vector.
14058 unsigned VTBits = SVT.getSizeInBits();
14059 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
14061 unsigned NumElts = VT.getVectorNumElements();
14062
14063 for (unsigned i = 0; i != NumElts; ++i) {
14064 SDValue Op = N0.getOperand(i);
14065 if (Op.isUndef()) {
14066 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
14067 Elts.push_back(DAG.getUNDEF(SVT));
14068 else
14069 Elts.push_back(DAG.getConstant(0, DL, SVT));
14070 continue;
14071 }
14072
14073 SDLoc DL(Op);
14074 // Get the constant value and if needed trunc it to the size of the type.
14075 // Nodes like build_vector might have constants wider than the scalar type.
14076 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
14077 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
14078 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
14079 else
14080 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
14081 }
14082
14083 return DAG.getBuildVector(VT, DL, Elts);
14084}
14085
14086// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
14087// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
14088// transformation. Returns true if extension are possible and the above
14089// mentioned transformation is profitable.
14091 unsigned ExtOpc,
14092 SmallVectorImpl<SDNode *> &ExtendNodes,
14093 const TargetLowering &TLI) {
14094 bool HasCopyToRegUses = false;
14095 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
14096 for (SDUse &Use : N0->uses()) {
14097 SDNode *User = Use.getUser();
14098 if (User == N)
14099 continue;
14100 if (Use.getResNo() != N0.getResNo())
14101 continue;
14102 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
14103 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
14105 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
14106 // Sign bits will be lost after a zext.
14107 return false;
14108 bool Add = false;
14109 for (unsigned i = 0; i != 2; ++i) {
14110 SDValue UseOp = User->getOperand(i);
14111 if (UseOp == N0)
14112 continue;
14113 if (!isa<ConstantSDNode>(UseOp))
14114 return false;
14115 Add = true;
14116 }
14117 if (Add)
14118 ExtendNodes.push_back(User);
14119 continue;
14120 }
14121 // If truncates aren't free and there are users we can't
14122 // extend, it isn't worthwhile.
14123 if (!isTruncFree)
14124 return false;
14125 // Remember if this value is live-out.
14126 if (User->getOpcode() == ISD::CopyToReg)
14127 HasCopyToRegUses = true;
14128 }
14129
14130 if (HasCopyToRegUses) {
14131 bool BothLiveOut = false;
14132 for (SDUse &Use : N->uses()) {
14133 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
14134 BothLiveOut = true;
14135 break;
14136 }
14137 }
14138 if (BothLiveOut)
14139 // Both unextended and extended values are live out. There had better be
14140 // a good reason for the transformation.
14141 return !ExtendNodes.empty();
14142 }
14143 return true;
14144}
14145
14146void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14147 SDValue OrigLoad, SDValue ExtLoad,
14148 ISD::NodeType ExtType) {
14149 // Extend SetCC uses if necessary.
14150 SDLoc DL(ExtLoad);
14151 for (SDNode *SetCC : SetCCs) {
14153
14154 for (unsigned j = 0; j != 2; ++j) {
14155 SDValue SOp = SetCC->getOperand(j);
14156 if (SOp == OrigLoad)
14157 Ops.push_back(ExtLoad);
14158 else
14159 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
14160 }
14161
14162 Ops.push_back(SetCC->getOperand(2));
14163 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
14164 }
14165}
14166
14167// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14168SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14169 SDValue N0 = N->getOperand(0);
14170 EVT DstVT = N->getValueType(0);
14171 EVT SrcVT = N0.getValueType();
14172
14173 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14174 N->getOpcode() == ISD::ZERO_EXTEND) &&
14175 "Unexpected node type (not an extend)!");
14176
14177 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
14178 // For example, on a target with legal v4i32, but illegal v8i32, turn:
14179 // (v8i32 (sext (v8i16 (load x))))
14180 // into:
14181 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14182 // (v4i32 (sextload (x + 16)))))
14183 // Where uses of the original load, i.e.:
14184 // (v8i16 (load x))
14185 // are replaced with:
14186 // (v8i16 (truncate
14187 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14188 // (v4i32 (sextload (x + 16)))))))
14189 //
14190 // This combine is only applicable to illegal, but splittable, vectors.
14191 // All legal types, and illegal non-vector types, are handled elsewhere.
14192 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14193 //
14194 if (N0->getOpcode() != ISD::LOAD)
14195 return SDValue();
14196
14197 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14198
14199 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14200 !N0.hasOneUse() || !LN0->isSimple() ||
14201 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14203 return SDValue();
14204
14206 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14207 return SDValue();
14208
14209 ISD::LoadExtType ExtType =
14210 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14211
14212 // Try to split the vector types to get down to legal types.
14213 EVT SplitSrcVT = SrcVT;
14214 EVT SplitDstVT = DstVT;
14215 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
14216 SplitSrcVT.getVectorNumElements() > 1) {
14217 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14218 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14219 }
14220
14221 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
14222 return SDValue();
14223
14224 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14225
14226 SDLoc DL(N);
14227 const unsigned NumSplits =
14228 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14229 const unsigned Stride = SplitSrcVT.getStoreSize();
14232
14233 SDValue BasePtr = LN0->getBasePtr();
14234 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14235 const unsigned Offset = Idx * Stride;
14236
14238 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14239 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14240 SplitSrcVT, LN0->getBaseAlign(),
14241 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14242
14243 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14244
14245 Loads.push_back(SplitLoad.getValue(0));
14246 Chains.push_back(SplitLoad.getValue(1));
14247 }
14248
14249 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14250 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14251
14252 // Simplify TF.
14253 AddToWorklist(NewChain.getNode());
14254
14255 CombineTo(N, NewValue);
14256
14257 // Replace uses of the original load (before extension)
14258 // with a truncate of the concatenated sextloaded vectors.
14259 SDValue Trunc =
14260 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14261 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14262 CombineTo(N0.getNode(), Trunc, NewChain);
14263 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14264}
14265
14266// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14267// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14268SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14269 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14270 EVT VT = N->getValueType(0);
14271 EVT OrigVT = N->getOperand(0).getValueType();
14272 if (TLI.isZExtFree(OrigVT, VT))
14273 return SDValue();
14274
14275 // and/or/xor
14276 SDValue N0 = N->getOperand(0);
14277 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14278 N0.getOperand(1).getOpcode() != ISD::Constant ||
14279 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14280 return SDValue();
14281
14282 // shl/shr
14283 SDValue N1 = N0->getOperand(0);
14284 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14285 N1.getOperand(1).getOpcode() != ISD::Constant ||
14286 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14287 return SDValue();
14288
14289 // load
14290 if (!isa<LoadSDNode>(N1.getOperand(0)))
14291 return SDValue();
14292 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14293 EVT MemVT = Load->getMemoryVT();
14294 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14295 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14296 return SDValue();
14297
14298
14299 // If the shift op is SHL, the logic op must be AND, otherwise the result
14300 // will be wrong.
14301 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14302 return SDValue();
14303
14304 if (!N0.hasOneUse() || !N1.hasOneUse())
14305 return SDValue();
14306
14308 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14309 ISD::ZERO_EXTEND, SetCCs, TLI))
14310 return SDValue();
14311
14312 // Actually do the transformation.
14313 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14314 Load->getChain(), Load->getBasePtr(),
14315 Load->getMemoryVT(), Load->getMemOperand());
14316
14317 SDLoc DL1(N1);
14318 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14319 N1.getOperand(1));
14320
14321 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14322 SDLoc DL0(N0);
14323 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14324 DAG.getConstant(Mask, DL0, VT));
14325
14326 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14327 CombineTo(N, And);
14328 if (SDValue(Load, 0).hasOneUse()) {
14329 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14330 } else {
14331 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14332 Load->getValueType(0), ExtLoad);
14333 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14334 }
14335
14336 // N0 is dead at this point.
14337 recursivelyDeleteUnusedNodes(N0.getNode());
14338
14339 return SDValue(N,0); // Return N so it doesn't get rechecked!
14340}
14341
14342/// If we're narrowing or widening the result of a vector select and the final
14343/// size is the same size as a setcc (compare) feeding the select, then try to
14344/// apply the cast operation to the select's operands because matching vector
14345/// sizes for a select condition and other operands should be more efficient.
14346SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14347 unsigned CastOpcode = Cast->getOpcode();
14348 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14349 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14350 CastOpcode == ISD::FP_ROUND) &&
14351 "Unexpected opcode for vector select narrowing/widening");
14352
14353 // We only do this transform before legal ops because the pattern may be
14354 // obfuscated by target-specific operations after legalization. Do not create
14355 // an illegal select op, however, because that may be difficult to lower.
14356 EVT VT = Cast->getValueType(0);
14357 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14358 return SDValue();
14359
14360 SDValue VSel = Cast->getOperand(0);
14361 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14362 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14363 return SDValue();
14364
14365 // Does the setcc have the same vector size as the casted select?
14366 SDValue SetCC = VSel.getOperand(0);
14367 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14368 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14369 return SDValue();
14370
14371 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14372 SDValue A = VSel.getOperand(1);
14373 SDValue B = VSel.getOperand(2);
14374 SDValue CastA, CastB;
14375 SDLoc DL(Cast);
14376 if (CastOpcode == ISD::FP_ROUND) {
14377 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14378 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14379 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14380 } else {
14381 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14382 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14383 }
14384 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14385}
14386
14387// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14388// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14390 const TargetLowering &TLI, EVT VT,
14391 bool LegalOperations, SDNode *N,
14392 SDValue N0, ISD::LoadExtType ExtLoadType) {
14393 SDNode *N0Node = N0.getNode();
14394 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14395 : ISD::isZEXTLoad(N0Node);
14396 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14397 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14398 return SDValue();
14399
14400 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14401 EVT MemVT = LN0->getMemoryVT();
14402 if ((LegalOperations || !LN0->isSimple() ||
14403 VT.isVector()) &&
14404 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14405 return SDValue();
14406
14407 SDValue ExtLoad =
14408 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14409 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14410 Combiner.CombineTo(N, ExtLoad);
14411 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14412 if (LN0->use_empty())
14413 Combiner.recursivelyDeleteUnusedNodes(LN0);
14414 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14415}
14416
14417// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14418// Only generate vector extloads when 1) they're legal, and 2) they are
14419// deemed desirable by the target. NonNegZExt can be set to true if a zero
14420// extend has the nonneg flag to allow use of sextload if profitable.
14422 const TargetLowering &TLI, EVT VT,
14423 bool LegalOperations, SDNode *N, SDValue N0,
14424 ISD::LoadExtType ExtLoadType,
14425 ISD::NodeType ExtOpc,
14426 bool NonNegZExt = false) {
14428 return {};
14429
14430 // If this is zext nneg, see if it would make sense to treat it as a sext.
14431 if (NonNegZExt) {
14432 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14433 "Unexpected load type or opcode");
14434 for (SDNode *User : N0->users()) {
14435 if (User->getOpcode() == ISD::SETCC) {
14437 if (ISD::isSignedIntSetCC(CC)) {
14438 ExtLoadType = ISD::SEXTLOAD;
14439 ExtOpc = ISD::SIGN_EXTEND;
14440 break;
14441 }
14442 }
14443 }
14444 }
14445
14446 // TODO: isFixedLengthVector() should be removed and any negative effects on
14447 // code generation being the result of that target's implementation of
14448 // isVectorLoadExtDesirable().
14449 if ((LegalOperations || VT.isFixedLengthVector() ||
14450 !cast<LoadSDNode>(N0)->isSimple()) &&
14451 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14452 return {};
14453
14454 bool DoXform = true;
14456 if (!N0.hasOneUse())
14457 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14458 if (VT.isVector())
14459 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14460 if (!DoXform)
14461 return {};
14462
14463 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14464 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14465 LN0->getBasePtr(), N0.getValueType(),
14466 LN0->getMemOperand());
14467 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14468 // If the load value is used only by N, replace it via CombineTo N.
14469 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14470 Combiner.CombineTo(N, ExtLoad);
14471 if (NoReplaceTrunc) {
14472 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14473 Combiner.recursivelyDeleteUnusedNodes(LN0);
14474 } else {
14475 SDValue Trunc =
14476 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14477 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14478 }
14479 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14480}
14481
14482static SDValue
14484 bool LegalOperations, SDNode *N, SDValue N0,
14485 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14486 if (!N0.hasOneUse())
14487 return SDValue();
14488
14490 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14491 return SDValue();
14492
14493 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14494 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14495 return SDValue();
14496
14497 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14498 return SDValue();
14499
14500 SDLoc dl(Ld);
14501 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14502 SDValue NewLoad = DAG.getMaskedLoad(
14503 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14504 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14505 ExtLoadType, Ld->isExpandingLoad());
14506 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14507 return NewLoad;
14508}
14509
14510// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14512 const TargetLowering &TLI, EVT VT,
14513 SDValue N0,
14514 ISD::LoadExtType ExtLoadType) {
14515 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14516 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14517 return {};
14518 EVT MemoryVT = ALoad->getMemoryVT();
14519 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14520 return {};
14521 // Can't fold into ALoad if it is already extending differently.
14522 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14523 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14524 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14525 return {};
14526
14527 EVT OrigVT = ALoad->getValueType(0);
14528 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14529 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14530 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14531 ALoad->getBasePtr(), ALoad->getMemOperand()));
14533 SDValue(ALoad, 0),
14534 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14535 // Update the chain uses.
14536 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14537 return SDValue(NewALoad, 0);
14538}
14539
14541 bool LegalOperations) {
14542 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14543 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14544
14545 SDValue SetCC = N->getOperand(0);
14546 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14547 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14548 return SDValue();
14549
14550 SDValue X = SetCC.getOperand(0);
14551 SDValue Ones = SetCC.getOperand(1);
14552 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14553 EVT VT = N->getValueType(0);
14554 EVT XVT = X.getValueType();
14555 // setge X, C is canonicalized to setgt, so we do not need to match that
14556 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14557 // not require the 'not' op.
14558 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14559 // Invert and smear/shift the sign bit:
14560 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14561 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14562 SDLoc DL(N);
14563 unsigned ShCt = VT.getSizeInBits() - 1;
14564 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14565 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14566 SDValue NotX = DAG.getNOT(DL, X, VT);
14567 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14568 auto ShiftOpcode =
14569 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14570 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14571 }
14572 }
14573 return SDValue();
14574}
14575
14576SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14577 SDValue N0 = N->getOperand(0);
14578 if (N0.getOpcode() != ISD::SETCC)
14579 return SDValue();
14580
14581 SDValue N00 = N0.getOperand(0);
14582 SDValue N01 = N0.getOperand(1);
14584 EVT VT = N->getValueType(0);
14585 EVT N00VT = N00.getValueType();
14586 SDLoc DL(N);
14587
14588 // Propagate fast-math-flags.
14589 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14590
14591 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14592 // the same size as the compared operands. Try to optimize sext(setcc())
14593 // if this is the case.
14594 if (VT.isVector() && !LegalOperations &&
14595 TLI.getBooleanContents(N00VT) ==
14597 EVT SVT = getSetCCResultType(N00VT);
14598
14599 // If we already have the desired type, don't change it.
14600 if (SVT != N0.getValueType()) {
14601 // We know that the # elements of the results is the same as the
14602 // # elements of the compare (and the # elements of the compare result
14603 // for that matter). Check to see that they are the same size. If so,
14604 // we know that the element size of the sext'd result matches the
14605 // element size of the compare operands.
14606 if (VT.getSizeInBits() == SVT.getSizeInBits())
14607 return DAG.getSetCC(DL, VT, N00, N01, CC);
14608
14609 // If the desired elements are smaller or larger than the source
14610 // elements, we can use a matching integer vector type and then
14611 // truncate/sign extend.
14612 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14613 if (SVT == MatchingVecType) {
14614 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14615 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14616 }
14617 }
14618
14619 // Try to eliminate the sext of a setcc by zexting the compare operands.
14620 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14622 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14623 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14624 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14625
14626 // We have an unsupported narrow vector compare op that would be legal
14627 // if extended to the destination type. See if the compare operands
14628 // can be freely extended to the destination type.
14629 auto IsFreeToExtend = [&](SDValue V) {
14630 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14631 return true;
14632 // Match a simple, non-extended load that can be converted to a
14633 // legal {z/s}ext-load.
14634 // TODO: Allow widening of an existing {z/s}ext-load?
14635 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14636 ISD::isUNINDEXEDLoad(V.getNode()) &&
14637 cast<LoadSDNode>(V)->isSimple() &&
14638 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14639 return false;
14640
14641 // Non-chain users of this value must either be the setcc in this
14642 // sequence or extends that can be folded into the new {z/s}ext-load.
14643 for (SDUse &Use : V->uses()) {
14644 // Skip uses of the chain and the setcc.
14645 SDNode *User = Use.getUser();
14646 if (Use.getResNo() != 0 || User == N0.getNode())
14647 continue;
14648 // Extra users must have exactly the same cast we are about to create.
14649 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14650 // is enhanced similarly.
14651 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14652 return false;
14653 }
14654 return true;
14655 };
14656
14657 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14658 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14659 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14660 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14661 }
14662 }
14663 }
14664
14665 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14666 // Here, T can be 1 or -1, depending on the type of the setcc and
14667 // getBooleanContents().
14668 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14669
14670 // To determine the "true" side of the select, we need to know the high bit
14671 // of the value returned by the setcc if it evaluates to true.
14672 // If the type of the setcc is i1, then the true case of the select is just
14673 // sext(i1 1), that is, -1.
14674 // If the type of the setcc is larger (say, i8) then the value of the high
14675 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14676 // of the appropriate width.
14677 SDValue ExtTrueVal = (SetCCWidth == 1)
14678 ? DAG.getAllOnesConstant(DL, VT)
14679 : DAG.getBoolConstant(true, DL, VT, N00VT);
14680 SDValue Zero = DAG.getConstant(0, DL, VT);
14681 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14682 return SCC;
14683
14684 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14685 EVT SetCCVT = getSetCCResultType(N00VT);
14686 // Don't do this transform for i1 because there's a select transform
14687 // that would reverse it.
14688 // TODO: We should not do this transform at all without a target hook
14689 // because a sext is likely cheaper than a select?
14690 if (SetCCVT.getScalarSizeInBits() != 1 &&
14691 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14692 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14693 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14694 }
14695 }
14696
14697 return SDValue();
14698}
14699
14700SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14701 SDValue N0 = N->getOperand(0);
14702 EVT VT = N->getValueType(0);
14703 SDLoc DL(N);
14704
14705 if (VT.isVector())
14706 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14707 return FoldedVOp;
14708
14709 // sext(undef) = 0 because the top bit will all be the same.
14710 if (N0.isUndef())
14711 return DAG.getConstant(0, DL, VT);
14712
14713 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14714 return Res;
14715
14716 // fold (sext (sext x)) -> (sext x)
14717 // fold (sext (aext x)) -> (sext x)
14718 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14719 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14720
14721 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14722 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14725 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14726 N0.getOperand(0));
14727
14728 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14729 SDValue N00 = N0.getOperand(0);
14730 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14731 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14732 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14733 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14734 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14735 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14736 }
14737
14738 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14739 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14740 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14741 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14742 N0->getOperand(1));
14743 }
14744 }
14745 }
14746
14747 if (N0.getOpcode() == ISD::TRUNCATE) {
14748 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14749 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14750 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14751 SDNode *oye = N0.getOperand(0).getNode();
14752 if (NarrowLoad.getNode() != N0.getNode()) {
14753 CombineTo(N0.getNode(), NarrowLoad);
14754 // CombineTo deleted the truncate, if needed, but not what's under it.
14755 AddToWorklist(oye);
14756 }
14757 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14758 }
14759
14760 // See if the value being truncated is already sign extended. If so, just
14761 // eliminate the trunc/sext pair.
14762 SDValue Op = N0.getOperand(0);
14763 unsigned OpBits = Op.getScalarValueSizeInBits();
14764 unsigned MidBits = N0.getScalarValueSizeInBits();
14765 unsigned DestBits = VT.getScalarSizeInBits();
14766
14767 if (N0->getFlags().hasNoSignedWrap() ||
14768 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14769 if (OpBits == DestBits) {
14770 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14771 // bits, it is already ready.
14772 return Op;
14773 }
14774
14775 if (OpBits < DestBits) {
14776 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14777 // bits, just sext from i32.
14778 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14779 }
14780
14781 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14782 // bits, just truncate to i32.
14783 SDNodeFlags Flags;
14784 Flags.setNoSignedWrap(true);
14785 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14786 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14787 }
14788
14789 // fold (sext (truncate x)) -> (sextinreg x).
14790 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14791 N0.getValueType())) {
14792 if (OpBits < DestBits)
14793 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14794 else if (OpBits > DestBits)
14795 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14796 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14797 DAG.getValueType(N0.getValueType()));
14798 }
14799 }
14800
14801 // Try to simplify (sext (load x)).
14802 if (SDValue foldedExt =
14803 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14805 return foldedExt;
14806
14807 if (SDValue foldedExt =
14808 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14810 return foldedExt;
14811
14812 // fold (sext (load x)) to multiple smaller sextloads.
14813 // Only on illegal but splittable vectors.
14814 if (SDValue ExtLoad = CombineExtLoad(N))
14815 return ExtLoad;
14816
14817 // Try to simplify (sext (sextload x)).
14818 if (SDValue foldedExt = tryToFoldExtOfExtload(
14819 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14820 return foldedExt;
14821
14822 // Try to simplify (sext (atomic_load x)).
14823 if (SDValue foldedExt =
14824 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14825 return foldedExt;
14826
14827 // fold (sext (and/or/xor (load x), cst)) ->
14828 // (and/or/xor (sextload x), (sext cst))
14829 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14830 isa<LoadSDNode>(N0.getOperand(0)) &&
14831 N0.getOperand(1).getOpcode() == ISD::Constant &&
14832 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14833 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14834 EVT MemVT = LN00->getMemoryVT();
14835 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14836 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14838 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14839 ISD::SIGN_EXTEND, SetCCs, TLI);
14840 if (DoXform) {
14841 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14842 LN00->getChain(), LN00->getBasePtr(),
14843 LN00->getMemoryVT(),
14844 LN00->getMemOperand());
14845 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14846 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14847 ExtLoad, DAG.getConstant(Mask, DL, VT));
14848 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14849 bool NoReplaceTruncAnd = !N0.hasOneUse();
14850 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14851 CombineTo(N, And);
14852 // If N0 has multiple uses, change other uses as well.
14853 if (NoReplaceTruncAnd) {
14854 SDValue TruncAnd =
14856 CombineTo(N0.getNode(), TruncAnd);
14857 }
14858 if (NoReplaceTrunc) {
14859 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14860 } else {
14861 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14862 LN00->getValueType(0), ExtLoad);
14863 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14864 }
14865 return SDValue(N,0); // Return N so it doesn't get rechecked!
14866 }
14867 }
14868 }
14869
14870 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14871 return V;
14872
14873 if (SDValue V = foldSextSetcc(N))
14874 return V;
14875
14876 // fold (sext x) -> (zext x) if the sign bit is known zero.
14877 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14878 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14879 DAG.SignBitIsZero(N0))
14880 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14881
14882 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14883 return NewVSel;
14884
14885 // Eliminate this sign extend by doing a negation in the destination type:
14886 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14887 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14891 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14892 return DAG.getNegative(Zext, DL, VT);
14893 }
14894 // Eliminate this sign extend by doing a decrement in the destination type:
14895 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14896 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14900 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14901 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14902 }
14903
14904 // fold sext (not i1 X) -> add (zext i1 X), -1
14905 // TODO: This could be extended to handle bool vectors.
14906 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14907 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14908 TLI.isOperationLegal(ISD::ADD, VT)))) {
14909 // If we can eliminate the 'not', the sext form should be better
14910 if (SDValue NewXor = visitXOR(N0.getNode())) {
14911 // Returning N0 is a form of in-visit replacement that may have
14912 // invalidated N0.
14913 if (NewXor.getNode() == N0.getNode()) {
14914 // Return SDValue here as the xor should have already been replaced in
14915 // this sext.
14916 return SDValue();
14917 }
14918
14919 // Return a new sext with the new xor.
14920 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14921 }
14922
14923 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14924 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14925 }
14926
14927 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14928 return Res;
14929
14930 return SDValue();
14931}
14932
14933/// Given an extending node with a pop-count operand, if the target does not
14934/// support a pop-count in the narrow source type but does support it in the
14935/// destination type, widen the pop-count to the destination type.
14936static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14937 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14938 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14939 "Expected extend op");
14940
14941 SDValue CtPop = Extend->getOperand(0);
14942 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14943 return SDValue();
14944
14945 EVT VT = Extend->getValueType(0);
14946 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14949 return SDValue();
14950
14951 // zext (ctpop X) --> ctpop (zext X)
14952 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14953 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14954}
14955
14956// If we have (zext (abs X)) where X is a type that will be promoted by type
14957// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14958static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14959 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14960
14961 EVT VT = Extend->getValueType(0);
14962 if (VT.isVector())
14963 return SDValue();
14964
14965 SDValue Abs = Extend->getOperand(0);
14966 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14967 return SDValue();
14968
14969 EVT AbsVT = Abs.getValueType();
14970 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14971 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14973 return SDValue();
14974
14975 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14976
14977 SDValue SExt =
14978 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14979 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14980 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14981}
14982
14983SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14984 SDValue N0 = N->getOperand(0);
14985 EVT VT = N->getValueType(0);
14986 SDLoc DL(N);
14987
14988 if (VT.isVector())
14989 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14990 return FoldedVOp;
14991
14992 // zext(undef) = 0
14993 if (N0.isUndef())
14994 return DAG.getConstant(0, DL, VT);
14995
14996 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14997 return Res;
14998
14999 // fold (zext (zext x)) -> (zext x)
15000 // fold (zext (aext x)) -> (zext x)
15001 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15002 SDNodeFlags Flags;
15003 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15004 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15005 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
15006 }
15007
15008 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15009 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15012 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
15013
15014 // fold (zext (truncate x)) -> (zext x) or
15015 // (zext (truncate x)) -> (truncate x)
15016 // This is valid when the truncated bits of x are already zero.
15017 SDValue Op;
15018 KnownBits Known;
15019 if (isTruncateOf(DAG, N0, Op, Known)) {
15020 APInt TruncatedBits =
15021 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
15022 APInt(Op.getScalarValueSizeInBits(), 0) :
15023 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
15024 N0.getScalarValueSizeInBits(),
15025 std::min(Op.getScalarValueSizeInBits(),
15026 VT.getScalarSizeInBits()));
15027 if (TruncatedBits.isSubsetOf(Known.Zero)) {
15028 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15029 DAG.salvageDebugInfo(*N0.getNode());
15030
15031 return ZExtOrTrunc;
15032 }
15033 }
15034
15035 // fold (zext (truncate x)) -> (and x, mask)
15036 if (N0.getOpcode() == ISD::TRUNCATE) {
15037 // fold (zext (truncate (load x))) -> (zext (smaller load x))
15038 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
15039 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15040 SDNode *oye = N0.getOperand(0).getNode();
15041 if (NarrowLoad.getNode() != N0.getNode()) {
15042 CombineTo(N0.getNode(), NarrowLoad);
15043 // CombineTo deleted the truncate, if needed, but not what's under it.
15044 AddToWorklist(oye);
15045 }
15046 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15047 }
15048
15049 EVT SrcVT = N0.getOperand(0).getValueType();
15050 EVT MinVT = N0.getValueType();
15051
15052 if (N->getFlags().hasNonNeg()) {
15053 SDValue Op = N0.getOperand(0);
15054 unsigned OpBits = SrcVT.getScalarSizeInBits();
15055 unsigned MidBits = MinVT.getScalarSizeInBits();
15056 unsigned DestBits = VT.getScalarSizeInBits();
15057
15058 if (N0->getFlags().hasNoSignedWrap() ||
15059 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
15060 if (OpBits == DestBits) {
15061 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
15062 // bits, it is already ready.
15063 return Op;
15064 }
15065
15066 if (OpBits < DestBits) {
15067 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
15068 // bits, just sext from i32.
15069 // FIXME: This can probably be ZERO_EXTEND nneg?
15070 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
15071 }
15072
15073 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
15074 // bits, just truncate to i32.
15075 SDNodeFlags Flags;
15076 Flags.setNoSignedWrap(true);
15077 Flags.setNoUnsignedWrap(true);
15078 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
15079 }
15080 }
15081
15082 // Try to mask before the extension to avoid having to generate a larger mask,
15083 // possibly over several sub-vectors.
15084 if (SrcVT.bitsLT(VT) && VT.isVector()) {
15085 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
15087 SDValue Op = N0.getOperand(0);
15088 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
15089 AddToWorklist(Op.getNode());
15090 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15091 // Transfer the debug info; the new node is equivalent to N0.
15092 DAG.transferDbgValues(N0, ZExtOrTrunc);
15093 return ZExtOrTrunc;
15094 }
15095 }
15096
15097 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
15098 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15099 AddToWorklist(Op.getNode());
15100 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
15101 // We may safely transfer the debug info describing the truncate node over
15102 // to the equivalent and operation.
15103 DAG.transferDbgValues(N0, And);
15104 return And;
15105 }
15106 }
15107
15108 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
15109 // if either of the casts is not free.
15110 if (N0.getOpcode() == ISD::AND &&
15111 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15112 N0.getOperand(1).getOpcode() == ISD::Constant &&
15113 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
15114 !TLI.isZExtFree(N0.getValueType(), VT))) {
15115 SDValue X = N0.getOperand(0).getOperand(0);
15116 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
15117 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15118 return DAG.getNode(ISD::AND, DL, VT,
15119 X, DAG.getConstant(Mask, DL, VT));
15120 }
15121
15122 // Try to simplify (zext (load x)).
15123 if (SDValue foldedExt = tryToFoldExtOfLoad(
15124 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
15125 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
15126 return foldedExt;
15127
15128 if (SDValue foldedExt =
15129 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15131 return foldedExt;
15132
15133 // fold (zext (load x)) to multiple smaller zextloads.
15134 // Only on illegal but splittable vectors.
15135 if (SDValue ExtLoad = CombineExtLoad(N))
15136 return ExtLoad;
15137
15138 // Try to simplify (zext (atomic_load x)).
15139 if (SDValue foldedExt =
15140 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
15141 return foldedExt;
15142
15143 // fold (zext (and/or/xor (load x), cst)) ->
15144 // (and/or/xor (zextload x), (zext cst))
15145 // Unless (and (load x) cst) will match as a zextload already and has
15146 // additional users, or the zext is already free.
15147 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
15148 isa<LoadSDNode>(N0.getOperand(0)) &&
15149 N0.getOperand(1).getOpcode() == ISD::Constant &&
15150 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15151 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15152 EVT MemVT = LN00->getMemoryVT();
15153 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
15154 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15155 bool DoXform = true;
15157 if (!N0.hasOneUse()) {
15158 if (N0.getOpcode() == ISD::AND) {
15159 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
15160 EVT LoadResultTy = AndC->getValueType(0);
15161 EVT ExtVT;
15162 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
15163 DoXform = false;
15164 }
15165 }
15166 if (DoXform)
15167 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15168 ISD::ZERO_EXTEND, SetCCs, TLI);
15169 if (DoXform) {
15170 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
15171 LN00->getChain(), LN00->getBasePtr(),
15172 LN00->getMemoryVT(),
15173 LN00->getMemOperand());
15174 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15175 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15176 ExtLoad, DAG.getConstant(Mask, DL, VT));
15177 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
15178 bool NoReplaceTruncAnd = !N0.hasOneUse();
15179 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15180 CombineTo(N, And);
15181 // If N0 has multiple uses, change other uses as well.
15182 if (NoReplaceTruncAnd) {
15183 SDValue TruncAnd =
15185 CombineTo(N0.getNode(), TruncAnd);
15186 }
15187 if (NoReplaceTrunc) {
15188 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15189 } else {
15190 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15191 LN00->getValueType(0), ExtLoad);
15192 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15193 }
15194 return SDValue(N,0); // Return N so it doesn't get rechecked!
15195 }
15196 }
15197 }
15198
15199 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15200 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15201 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15202 return ZExtLoad;
15203
15204 // Try to simplify (zext (zextload x)).
15205 if (SDValue foldedExt = tryToFoldExtOfExtload(
15206 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15207 return foldedExt;
15208
15209 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15210 return V;
15211
15212 if (N0.getOpcode() == ISD::SETCC) {
15213 // Propagate fast-math-flags.
15214 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15215
15216 // Only do this before legalize for now.
15217 if (!LegalOperations && VT.isVector() &&
15218 N0.getValueType().getVectorElementType() == MVT::i1) {
15219 EVT N00VT = N0.getOperand(0).getValueType();
15220 if (getSetCCResultType(N00VT) == N0.getValueType())
15221 return SDValue();
15222
15223 // We know that the # elements of the results is the same as the #
15224 // elements of the compare (and the # elements of the compare result for
15225 // that matter). Check to see that they are the same size. If so, we know
15226 // that the element size of the sext'd result matches the element size of
15227 // the compare operands.
15228 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15229 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15230 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15231 N0.getOperand(1), N0.getOperand(2));
15232 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15233 }
15234
15235 // If the desired elements are smaller or larger than the source
15236 // elements we can use a matching integer vector type and then
15237 // truncate/any extend followed by zext_in_reg.
15238 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15239 SDValue VsetCC =
15240 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15241 N0.getOperand(1), N0.getOperand(2));
15242 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15243 N0.getValueType());
15244 }
15245
15246 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15247 EVT N0VT = N0.getValueType();
15248 EVT N00VT = N0.getOperand(0).getValueType();
15249 if (SDValue SCC = SimplifySelectCC(
15250 DL, N0.getOperand(0), N0.getOperand(1),
15251 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15252 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15253 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15254 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15255 }
15256
15257 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15258 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15259 !TLI.isZExtFree(N0, VT)) {
15260 SDValue ShVal = N0.getOperand(0);
15261 SDValue ShAmt = N0.getOperand(1);
15262 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15263 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15264 if (N0.getOpcode() == ISD::SHL) {
15265 // If the original shl may be shifting out bits, do not perform this
15266 // transformation.
15267 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15268 ShVal.getOperand(0).getValueSizeInBits();
15269 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15270 // If the shift is too large, then see if we can deduce that the
15271 // shift is safe anyway.
15272
15273 // Check if the bits being shifted out are known to be zero.
15274 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15275 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15276 return SDValue();
15277 }
15278 }
15279
15280 // Ensure that the shift amount is wide enough for the shifted value.
15281 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15282 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15283
15284 return DAG.getNode(N0.getOpcode(), DL, VT,
15285 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15286 }
15287 }
15288 }
15289
15290 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15291 return NewVSel;
15292
15293 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15294 return NewCtPop;
15295
15296 if (SDValue V = widenAbs(N, DAG))
15297 return V;
15298
15299 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15300 return Res;
15301
15302 // CSE zext nneg with sext if the zext is not free.
15303 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15304 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15305 if (CSENode)
15306 return SDValue(CSENode, 0);
15307 }
15308
15309 return SDValue();
15310}
15311
15312SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15313 SDValue N0 = N->getOperand(0);
15314 EVT VT = N->getValueType(0);
15315 SDLoc DL(N);
15316
15317 // aext(undef) = undef
15318 if (N0.isUndef())
15319 return DAG.getUNDEF(VT);
15320
15321 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15322 return Res;
15323
15324 // fold (aext (aext x)) -> (aext x)
15325 // fold (aext (zext x)) -> (zext x)
15326 // fold (aext (sext x)) -> (sext x)
15327 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15328 N0.getOpcode() == ISD::SIGN_EXTEND) {
15329 SDNodeFlags Flags;
15330 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15331 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15332 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15333 }
15334
15335 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15336 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15337 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15341 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15342
15343 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15344 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15345 if (N0.getOpcode() == ISD::TRUNCATE) {
15346 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15347 SDNode *oye = N0.getOperand(0).getNode();
15348 if (NarrowLoad.getNode() != N0.getNode()) {
15349 CombineTo(N0.getNode(), NarrowLoad);
15350 // CombineTo deleted the truncate, if needed, but not what's under it.
15351 AddToWorklist(oye);
15352 }
15353 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15354 }
15355 }
15356
15357 // fold (aext (truncate x))
15358 if (N0.getOpcode() == ISD::TRUNCATE)
15359 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15360
15361 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15362 // if the trunc is not free.
15363 if (N0.getOpcode() == ISD::AND &&
15364 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15365 N0.getOperand(1).getOpcode() == ISD::Constant &&
15366 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15367 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15368 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15369 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15370 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15371 }
15372
15373 // fold (aext (load x)) -> (aext (truncate (extload x)))
15374 // None of the supported targets knows how to perform load and any_ext
15375 // on vectors in one instruction, so attempt to fold to zext instead.
15376 if (VT.isVector()) {
15377 // Try to simplify (zext (load x)).
15378 if (SDValue foldedExt =
15379 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15381 return foldedExt;
15382 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15385 bool DoXform = true;
15387 if (!N0.hasOneUse())
15388 DoXform =
15389 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15390 if (DoXform) {
15391 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15392 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15393 LN0->getBasePtr(), N0.getValueType(),
15394 LN0->getMemOperand());
15395 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15396 // If the load value is used only by N, replace it via CombineTo N.
15397 bool NoReplaceTrunc = N0.hasOneUse();
15398 CombineTo(N, ExtLoad);
15399 if (NoReplaceTrunc) {
15400 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15401 recursivelyDeleteUnusedNodes(LN0);
15402 } else {
15403 SDValue Trunc =
15404 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15405 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15406 }
15407 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15408 }
15409 }
15410
15411 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15412 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15413 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15414 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15415 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15416 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15417 ISD::LoadExtType ExtType = LN0->getExtensionType();
15418 EVT MemVT = LN0->getMemoryVT();
15419 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15420 SDValue ExtLoad =
15421 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15422 MemVT, LN0->getMemOperand());
15423 CombineTo(N, ExtLoad);
15424 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15425 recursivelyDeleteUnusedNodes(LN0);
15426 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15427 }
15428 }
15429
15430 if (N0.getOpcode() == ISD::SETCC) {
15431 // Propagate fast-math-flags.
15432 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15433
15434 // For vectors:
15435 // aext(setcc) -> vsetcc
15436 // aext(setcc) -> truncate(vsetcc)
15437 // aext(setcc) -> aext(vsetcc)
15438 // Only do this before legalize for now.
15439 if (VT.isVector() && !LegalOperations) {
15440 EVT N00VT = N0.getOperand(0).getValueType();
15441 if (getSetCCResultType(N00VT) == N0.getValueType())
15442 return SDValue();
15443
15444 // We know that the # elements of the results is the same as the
15445 // # elements of the compare (and the # elements of the compare result
15446 // for that matter). Check to see that they are the same size. If so,
15447 // we know that the element size of the sext'd result matches the
15448 // element size of the compare operands.
15449 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15450 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15451 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15452
15453 // If the desired elements are smaller or larger than the source
15454 // elements we can use a matching integer vector type and then
15455 // truncate/any extend
15456 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15457 SDValue VsetCC = DAG.getSetCC(
15458 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15459 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15460 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15461 }
15462
15463 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15464 if (SDValue SCC = SimplifySelectCC(
15465 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15466 DAG.getConstant(0, DL, VT),
15467 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15468 return SCC;
15469 }
15470
15471 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15472 return NewCtPop;
15473
15474 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15475 return Res;
15476
15477 return SDValue();
15478}
15479
15480SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15481 unsigned Opcode = N->getOpcode();
15482 SDValue N0 = N->getOperand(0);
15483 SDValue N1 = N->getOperand(1);
15484 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15485
15486 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15487 if (N0.getOpcode() == Opcode &&
15488 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15489 return N0;
15490
15491 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15492 N0.getOperand(0).getOpcode() == Opcode) {
15493 // We have an assert, truncate, assert sandwich. Make one stronger assert
15494 // by asserting on the smallest asserted type to the larger source type.
15495 // This eliminates the later assert:
15496 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15497 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15498 SDLoc DL(N);
15499 SDValue BigA = N0.getOperand(0);
15500 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15501 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15502 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15503 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15504 BigA.getOperand(0), MinAssertVTVal);
15505 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15506 }
15507
15508 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15509 // than X. Just move the AssertZext in front of the truncate and drop the
15510 // AssertSExt.
15511 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15513 Opcode == ISD::AssertZext) {
15514 SDValue BigA = N0.getOperand(0);
15515 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15516 if (AssertVT.bitsLT(BigA_AssertVT)) {
15517 SDLoc DL(N);
15518 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15519 BigA.getOperand(0), N1);
15520 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15521 }
15522 }
15523
15524 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15526 const APInt &Mask = N0.getConstantOperandAPInt(1);
15527
15528 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15529 // than X, and the And doesn't change the lower iX bits, we can move the
15530 // AssertZext in front of the And and drop the AssertSext.
15531 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15532 SDValue BigA = N0.getOperand(0);
15533 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15534 if (AssertVT.bitsLT(BigA_AssertVT) &&
15535 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15536 SDLoc DL(N);
15537 SDValue NewAssert =
15538 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15539 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15540 N0.getOperand(1));
15541 }
15542 }
15543
15544 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15545 // fail.
15546 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15547 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15548 return N0;
15549 }
15550
15551 return SDValue();
15552}
15553
15554SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15555 SDLoc DL(N);
15556
15557 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15558 SDValue N0 = N->getOperand(0);
15559
15560 // Fold (assertalign (assertalign x, AL0), AL1) ->
15561 // (assertalign x, max(AL0, AL1))
15562 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15563 return DAG.getAssertAlign(DL, N0.getOperand(0),
15564 std::max(AL, AAN->getAlign()));
15565
15566 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15567 // this assert down to source operands so that those arithmetic ops could be
15568 // exposed to the DAG combining.
15569 switch (N0.getOpcode()) {
15570 default:
15571 break;
15572 case ISD::ADD:
15573 case ISD::PTRADD:
15574 case ISD::SUB: {
15575 unsigned AlignShift = Log2(AL);
15576 SDValue LHS = N0.getOperand(0);
15577 SDValue RHS = N0.getOperand(1);
15578 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15579 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15580 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15581 if (LHSAlignShift < AlignShift)
15582 LHS = DAG.getAssertAlign(DL, LHS, AL);
15583 if (RHSAlignShift < AlignShift)
15584 RHS = DAG.getAssertAlign(DL, RHS, AL);
15585 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15586 }
15587 break;
15588 }
15589 }
15590
15591 return SDValue();
15592}
15593
15594/// If the result of a load is shifted/masked/truncated to an effectively
15595/// narrower type, try to transform the load to a narrower type and/or
15596/// use an extending load.
15597SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15598 unsigned Opc = N->getOpcode();
15599
15601 SDValue N0 = N->getOperand(0);
15602 EVT VT = N->getValueType(0);
15603 EVT ExtVT = VT;
15604
15605 // This transformation isn't valid for vector loads.
15606 if (VT.isVector())
15607 return SDValue();
15608
15609 // The ShAmt variable is used to indicate that we've consumed a right
15610 // shift. I.e. we want to narrow the width of the load by skipping to load the
15611 // ShAmt least significant bits.
15612 unsigned ShAmt = 0;
15613 // A special case is when the least significant bits from the load are masked
15614 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15615 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15616 // the result.
15617 unsigned ShiftedOffset = 0;
15618 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15619 // extended to VT.
15620 if (Opc == ISD::SIGN_EXTEND_INREG) {
15621 ExtType = ISD::SEXTLOAD;
15622 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15623 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15624 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15625 // value, or it may be shifting a higher subword, half or byte into the
15626 // lowest bits.
15627
15628 // Only handle shift with constant shift amount, and the shiftee must be a
15629 // load.
15630 auto *LN = dyn_cast<LoadSDNode>(N0);
15631 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15632 if (!N1C || !LN)
15633 return SDValue();
15634 // If the shift amount is larger than the memory type then we're not
15635 // accessing any of the loaded bytes.
15636 ShAmt = N1C->getZExtValue();
15637 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15638 if (MemoryWidth <= ShAmt)
15639 return SDValue();
15640 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15641 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15642 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15643 // If original load is a SEXTLOAD then we can't simply replace it by a
15644 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15645 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15646 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15647 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15648 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15649 LN->getExtensionType() != ExtType)
15650 return SDValue();
15651 } else if (Opc == ISD::AND) {
15652 // An AND with a constant mask is the same as a truncate + zero-extend.
15653 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15654 if (!AndC)
15655 return SDValue();
15656
15657 const APInt &Mask = AndC->getAPIntValue();
15658 unsigned ActiveBits = 0;
15659 if (Mask.isMask()) {
15660 ActiveBits = Mask.countr_one();
15661 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15662 ShiftedOffset = ShAmt;
15663 } else {
15664 return SDValue();
15665 }
15666
15667 ExtType = ISD::ZEXTLOAD;
15668 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15669 }
15670
15671 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15672 // a right shift. Here we redo some of those checks, to possibly adjust the
15673 // ExtVT even further based on "a masking AND". We could also end up here for
15674 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15675 // need to be done here as well.
15676 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15677 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15678 // Bail out when the SRL has more than one use. This is done for historical
15679 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15680 // check below? And maybe it could be non-profitable to do the transform in
15681 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15682 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15683 if (!SRL.hasOneUse())
15684 return SDValue();
15685
15686 // Only handle shift with constant shift amount, and the shiftee must be a
15687 // load.
15688 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15689 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15690 if (!SRL1C || !LN)
15691 return SDValue();
15692
15693 // If the shift amount is larger than the input type then we're not
15694 // accessing any of the loaded bytes. If the load was a zextload/extload
15695 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15696 ShAmt = SRL1C->getZExtValue();
15697 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15698 if (ShAmt >= MemoryWidth)
15699 return SDValue();
15700
15701 // Because a SRL must be assumed to *need* to zero-extend the high bits
15702 // (as opposed to anyext the high bits), we can't combine the zextload
15703 // lowering of SRL and an sextload.
15704 if (LN->getExtensionType() == ISD::SEXTLOAD)
15705 return SDValue();
15706
15707 // Avoid reading outside the memory accessed by the original load (could
15708 // happened if we only adjust the load base pointer by ShAmt). Instead we
15709 // try to narrow the load even further. The typical scenario here is:
15710 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15711 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15712 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15713 // Don't replace sextload by zextload.
15714 if (ExtType == ISD::SEXTLOAD)
15715 return SDValue();
15716 // Narrow the load.
15717 ExtType = ISD::ZEXTLOAD;
15718 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15719 }
15720
15721 // If the SRL is only used by a masking AND, we may be able to adjust
15722 // the ExtVT to make the AND redundant.
15723 SDNode *Mask = *(SRL->user_begin());
15724 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15725 isa<ConstantSDNode>(Mask->getOperand(1))) {
15726 unsigned Offset, ActiveBits;
15727 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15728 if (ShiftMask.isMask()) {
15729 EVT MaskedVT =
15730 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15731 // If the mask is smaller, recompute the type.
15732 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15733 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15734 ExtVT = MaskedVT;
15735 } else if (ExtType == ISD::ZEXTLOAD &&
15736 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15737 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15738 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15739 // If the mask is shifted we can use a narrower load and a shl to insert
15740 // the trailing zeros.
15741 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15742 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15743 ExtVT = MaskedVT;
15744 ShAmt = Offset + ShAmt;
15745 ShiftedOffset = Offset;
15746 }
15747 }
15748 }
15749
15750 N0 = SRL.getOperand(0);
15751 }
15752
15753 // If the load is shifted left (and the result isn't shifted back right), we
15754 // can fold a truncate through the shift. The typical scenario is that N
15755 // points at a TRUNCATE here so the attempted fold is:
15756 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15757 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15758 unsigned ShLeftAmt = 0;
15759 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15760 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15761 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15762 ShLeftAmt = N01->getZExtValue();
15763 N0 = N0.getOperand(0);
15764 }
15765 }
15766
15767 // If we haven't found a load, we can't narrow it.
15768 if (!isa<LoadSDNode>(N0))
15769 return SDValue();
15770
15771 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15772 // Reducing the width of a volatile load is illegal. For atomics, we may be
15773 // able to reduce the width provided we never widen again. (see D66309)
15774 if (!LN0->isSimple() ||
15775 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15776 return SDValue();
15777
15778 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15779 unsigned LVTStoreBits =
15781 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15782 return LVTStoreBits - EVTStoreBits - ShAmt;
15783 };
15784
15785 // We need to adjust the pointer to the load by ShAmt bits in order to load
15786 // the correct bytes.
15787 unsigned PtrAdjustmentInBits =
15788 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15789
15790 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15791 SDLoc DL(LN0);
15792 // The original load itself didn't wrap, so an offset within it doesn't.
15793 SDValue NewPtr =
15796 AddToWorklist(NewPtr.getNode());
15797
15798 SDValue Load;
15799 if (ExtType == ISD::NON_EXTLOAD) {
15800 const MDNode *OldRanges = LN0->getRanges();
15801 const MDNode *NewRanges = nullptr;
15802 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15803 // metadata is not the full-set for the new width then create a NewRanges
15804 // metadata for the truncated load
15805 if (ShAmt == 0 && OldRanges) {
15806 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15807 unsigned BitSize = VT.getScalarSizeInBits();
15808
15809 // It is possible for an 8-bit extending load with 8-bit range
15810 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15811 // ensure that truncation is strictly smaller.
15812 if (CR.getBitWidth() > BitSize) {
15813 ConstantRange TruncatedCR = CR.truncate(BitSize);
15814 if (!TruncatedCR.isFullSet()) {
15815 Metadata *Bounds[2] = {
15817 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15819 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15820 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15821 }
15822 } else if (CR.getBitWidth() == BitSize)
15823 NewRanges = OldRanges;
15824 }
15825 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15826 LN0->getPointerInfo().getWithOffset(PtrOff),
15827 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15828 LN0->getAAInfo(), NewRanges);
15829 } else
15830 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15831 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15832 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15833 LN0->getAAInfo());
15834
15835 // Replace the old load's chain with the new load's chain.
15836 WorklistRemover DeadNodes(*this);
15837 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15838
15839 // Shift the result left, if we've swallowed a left shift.
15841 if (ShLeftAmt != 0) {
15842 // If the shift amount is as large as the result size (but, presumably,
15843 // no larger than the source) then the useful bits of the result are
15844 // zero; we can't simply return the shortened shift, because the result
15845 // of that operation is undefined.
15846 if (ShLeftAmt >= VT.getScalarSizeInBits())
15847 Result = DAG.getConstant(0, DL, VT);
15848 else
15849 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15850 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15851 }
15852
15853 if (ShiftedOffset != 0) {
15854 // We're using a shifted mask, so the load now has an offset. This means
15855 // that data has been loaded into the lower bytes than it would have been
15856 // before, so we need to shl the loaded data into the correct position in the
15857 // register.
15858 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15859 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15860 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15861 }
15862
15863 // Return the new loaded value.
15864 return Result;
15865}
15866
15867SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15868 SDValue N0 = N->getOperand(0);
15869 SDValue N1 = N->getOperand(1);
15870 EVT VT = N->getValueType(0);
15871 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15872 unsigned VTBits = VT.getScalarSizeInBits();
15873 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15874 SDLoc DL(N);
15875
15876 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15877 if (N0.isUndef())
15878 return DAG.getConstant(0, DL, VT);
15879
15880 // fold (sext_in_reg c1) -> c1
15881 if (SDValue C =
15883 return C;
15884
15885 // If the input is already sign extended, just drop the extension.
15886 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15887 return N0;
15888
15889 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15890 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15891 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15892 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15893
15894 // fold (sext_in_reg (sext x)) -> (sext x)
15895 // fold (sext_in_reg (aext x)) -> (sext x)
15896 // if x is small enough or if we know that x has more than 1 sign bit and the
15897 // sign_extend_inreg is extending from one of them.
15898 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15899 SDValue N00 = N0.getOperand(0);
15900 unsigned N00Bits = N00.getScalarValueSizeInBits();
15901 if ((N00Bits <= ExtVTBits ||
15902 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15903 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15904 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15905 }
15906
15907 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15908 // if x is small enough or if we know that x has more than 1 sign bit and the
15909 // sign_extend_inreg is extending from one of them.
15911 SDValue N00 = N0.getOperand(0);
15912 unsigned N00Bits = N00.getScalarValueSizeInBits();
15913 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15914 if ((N00Bits == ExtVTBits ||
15915 (!IsZext && (N00Bits < ExtVTBits ||
15916 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15917 (!LegalOperations ||
15919 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15920 }
15921
15922 // fold (sext_in_reg (zext x)) -> (sext x)
15923 // iff we are extending the source sign bit.
15924 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15925 SDValue N00 = N0.getOperand(0);
15926 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15927 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15928 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15929 }
15930
15931 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15932 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15933 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15934
15935 // fold operands of sext_in_reg based on knowledge that the top bits are not
15936 // demanded.
15938 return SDValue(N, 0);
15939
15940 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15941 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15942 if (SDValue NarrowLoad = reduceLoadWidth(N))
15943 return NarrowLoad;
15944
15945 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15946 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15947 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15948 if (N0.getOpcode() == ISD::SRL) {
15949 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15950 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15951 // We can turn this into an SRA iff the input to the SRL is already sign
15952 // extended enough.
15953 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15954 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15955 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15956 N0.getOperand(1));
15957 }
15958 }
15959
15960 // fold (sext_inreg (extload x)) -> (sextload x)
15961 // If sextload is not supported by target, we can only do the combine when
15962 // load has one use. Doing otherwise can block folding the extload with other
15963 // extends that the target does support.
15965 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15966 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15967 N0.hasOneUse()) ||
15968 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15969 auto *LN0 = cast<LoadSDNode>(N0);
15970 SDValue ExtLoad =
15971 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15972 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15973 CombineTo(N, ExtLoad);
15974 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15975 AddToWorklist(ExtLoad.getNode());
15976 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15977 }
15978
15979 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15981 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15982 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15983 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15984 auto *LN0 = cast<LoadSDNode>(N0);
15985 SDValue ExtLoad =
15986 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15987 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15988 CombineTo(N, ExtLoad);
15989 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15990 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15991 }
15992
15993 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15994 // ignore it if the masked load is already sign extended
15995 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15996 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15997 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15998 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15999 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
16000 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
16001 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
16002 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
16003 CombineTo(N, ExtMaskedLoad);
16004 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
16005 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16006 }
16007 }
16008
16009 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
16010 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
16011 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
16013 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
16014 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
16015
16016 SDValue ExtLoad = DAG.getMaskedGather(
16017 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
16018 GN0->getIndexType(), ISD::SEXTLOAD);
16019
16020 CombineTo(N, ExtLoad);
16021 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16022 AddToWorklist(ExtLoad.getNode());
16023 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16024 }
16025 }
16026
16027 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
16028 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
16029 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
16030 N0.getOperand(1), false))
16031 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
16032 }
16033
16034 // Fold (iM_signext_inreg
16035 // (extract_subvector (zext|anyext|sext iN_v to _) _)
16036 // from iN)
16037 // -> (extract_subvector (signext iN_v to iM))
16038 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
16040 SDValue InnerExt = N0.getOperand(0);
16041 EVT InnerExtVT = InnerExt->getValueType(0);
16042 SDValue Extendee = InnerExt->getOperand(0);
16043
16044 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
16045 (!LegalOperations ||
16046 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
16047 SDValue SignExtExtendee =
16048 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
16049 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
16050 N0.getOperand(1));
16051 }
16052 }
16053
16054 return SDValue();
16055}
16056
16058 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
16059 bool LegalOperations) {
16060 unsigned InregOpcode = N->getOpcode();
16061 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
16062
16063 SDValue Src = N->getOperand(0);
16064 EVT VT = N->getValueType(0);
16065 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
16066 Src.getValueType().getVectorElementType(),
16068
16069 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
16070 "Expected EXTEND_VECTOR_INREG dag node in input!");
16071
16072 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
16073 // FIXME: one-use check may be overly restrictive
16074 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
16075 return SDValue();
16076
16077 // Profitability check: we must be extending exactly one of it's operands.
16078 // FIXME: this is probably overly restrictive.
16079 Src = Src.getOperand(0);
16080 if (Src.getValueType() != SrcVT)
16081 return SDValue();
16082
16083 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
16084 return SDValue();
16085
16086 return DAG.getNode(Opcode, DL, VT, Src);
16087}
16088
16089SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
16090 SDValue N0 = N->getOperand(0);
16091 EVT VT = N->getValueType(0);
16092 SDLoc DL(N);
16093
16094 if (N0.isUndef()) {
16095 // aext_vector_inreg(undef) = undef because the top bits are undefined.
16096 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
16097 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
16098 ? DAG.getUNDEF(VT)
16099 : DAG.getConstant(0, DL, VT);
16100 }
16101
16102 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
16103 return Res;
16104
16106 return SDValue(N, 0);
16107
16109 LegalOperations))
16110 return R;
16111
16112 return SDValue();
16113}
16114
16115SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
16116 EVT VT = N->getValueType(0);
16117 SDValue N0 = N->getOperand(0);
16118
16119 SDValue FPVal;
16120 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
16122 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
16123 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
16124 DAG.getValueType(VT.getScalarType()));
16125
16126 return SDValue();
16127}
16128
16129/// Detect patterns of truncation with unsigned saturation:
16130///
16131/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
16132/// Return the source value x to be truncated or SDValue() if the pattern was
16133/// not matched.
16134///
16136 unsigned NumDstBits = VT.getScalarSizeInBits();
16137 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16138 // Saturation with truncation. We truncate from InVT to VT.
16139 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16140
16141 SDValue Min;
16142 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16143 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
16144 return Min;
16145
16146 return SDValue();
16147}
16148
16149/// Detect patterns of truncation with signed saturation:
16150/// (truncate (smin (smax (x, signed_min_of_dest_type),
16151/// signed_max_of_dest_type)) to dest_type)
16152/// or:
16153/// (truncate (smax (smin (x, signed_max_of_dest_type),
16154/// signed_min_of_dest_type)) to dest_type).
16155///
16156/// Return the source value to be truncated or SDValue() if the pattern was not
16157/// matched.
16159 unsigned NumDstBits = VT.getScalarSizeInBits();
16160 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16161 // Saturation with truncation. We truncate from InVT to VT.
16162 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16163
16164 SDValue Val;
16165 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16166 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16167
16168 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
16169 m_SpecificInt(SignedMax))))
16170 return Val;
16171
16172 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
16173 m_SpecificInt(SignedMin))))
16174 return Val;
16175
16176 return SDValue();
16177}
16178
16179/// Detect patterns of truncation with unsigned saturation:
16181 const SDLoc &DL) {
16182 unsigned NumDstBits = VT.getScalarSizeInBits();
16183 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16184 // Saturation with truncation. We truncate from InVT to VT.
16185 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16186
16187 SDValue Val;
16188 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16189 // Min == 0, Max is unsigned max of destination type.
16190 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16191 m_Zero())))
16192 return Val;
16193
16194 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16195 m_SpecificInt(UnsignedMax))))
16196 return Val;
16197
16198 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16199 m_SpecificInt(UnsignedMax))))
16200 return Val;
16201
16202 return SDValue();
16203}
16204
16205static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16206 SDLoc &DL, const TargetLowering &TLI,
16207 SelectionDAG &DAG) {
16208 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16209 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
16210 TLI.isTypeDesirableForOp(Opc, VT));
16211 };
16212
16213 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
16214 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16215 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
16216 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
16217 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16218 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16219 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16220 } else if (Src.getOpcode() == ISD::UMIN) {
16221 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16222 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16223 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16224 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
16225 if (SDValue USatVal = detectUSatUPattern(Src, VT))
16226 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
16227 }
16228
16229 return SDValue();
16230}
16231
16232SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16233 SDValue N0 = N->getOperand(0);
16234 EVT VT = N->getValueType(0);
16235 EVT SrcVT = N0.getValueType();
16236 bool isLE = DAG.getDataLayout().isLittleEndian();
16237 SDLoc DL(N);
16238
16239 // trunc(undef) = undef
16240 if (N0.isUndef())
16241 return DAG.getUNDEF(VT);
16242
16243 // fold (truncate (truncate x)) -> (truncate x)
16244 if (N0.getOpcode() == ISD::TRUNCATE)
16245 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16246
16247 // fold saturated truncate
16248 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16249 return SaturatedTR;
16250
16251 // fold (truncate c1) -> c1
16252 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16253 return C;
16254
16255 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16256 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16257 N0.getOpcode() == ISD::SIGN_EXTEND ||
16258 N0.getOpcode() == ISD::ANY_EXTEND) {
16259 // if the source is smaller than the dest, we still need an extend.
16260 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16261 SDNodeFlags Flags;
16262 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16263 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16264 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16265 }
16266 // if the source is larger than the dest, than we just need the truncate.
16267 if (N0.getOperand(0).getValueType().bitsGT(VT))
16268 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16269 // if the source and dest are the same type, we can drop both the extend
16270 // and the truncate.
16271 return N0.getOperand(0);
16272 }
16273
16274 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16275 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16276 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16277 N0.hasOneUse()) {
16278 SDValue X = N0.getOperand(0);
16279 SDValue ExtVal = N0.getOperand(1);
16280 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16281 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16282 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16283 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16284 }
16285 }
16286
16287 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16288 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16289 return SDValue();
16290
16291 // Fold extract-and-trunc into a narrow extract. For example:
16292 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16293 // i32 y = TRUNCATE(i64 x)
16294 // -- becomes --
16295 // v16i8 b = BITCAST (v2i64 val)
16296 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16297 //
16298 // Note: We only run this optimization after type legalization (which often
16299 // creates this pattern) and before operation legalization after which
16300 // we need to be more careful about the vector instructions that we generate.
16301 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16302 N0->hasOneUse()) {
16303 EVT TrTy = N->getValueType(0);
16304 SDValue Src = N0;
16305
16306 // Check for cases where we shift down an upper element before truncation.
16307 int EltOffset = 0;
16308 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16309 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16310 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16311 Src = Src.getOperand(0);
16312 EltOffset = *ShAmt / TrTy.getSizeInBits();
16313 }
16314 }
16315 }
16316
16317 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16318 EVT VecTy = Src.getOperand(0).getValueType();
16319 EVT ExTy = Src.getValueType();
16320
16321 auto EltCnt = VecTy.getVectorElementCount();
16322 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16323 auto NewEltCnt = EltCnt * SizeRatio;
16324
16325 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16326 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16327
16328 SDValue EltNo = Src->getOperand(1);
16329 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16330 int Elt = EltNo->getAsZExtVal();
16331 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16332 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16333 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16334 DAG.getBitcast(NVT, Src.getOperand(0)),
16335 DAG.getVectorIdxConstant(Index, DL));
16336 }
16337 }
16338 }
16339
16340 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16341 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16342 TLI.isTruncateFree(SrcVT, VT)) {
16343 if (!LegalOperations ||
16344 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16345 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16346 SDLoc SL(N0);
16347 SDValue Cond = N0.getOperand(0);
16348 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16349 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16350 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16351 }
16352 }
16353
16354 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16355 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16356 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16357 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16358 SDValue Amt = N0.getOperand(1);
16359 KnownBits Known = DAG.computeKnownBits(Amt);
16360 unsigned Size = VT.getScalarSizeInBits();
16361 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16362 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16363 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16364 if (AmtVT != Amt.getValueType()) {
16365 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16366 AddToWorklist(Amt.getNode());
16367 }
16368 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16369 }
16370 }
16371
16372 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16373 return V;
16374
16375 if (SDValue ABD = foldABSToABD(N, DL))
16376 return ABD;
16377
16378 // Attempt to pre-truncate BUILD_VECTOR sources.
16379 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16380 N0.hasOneUse() &&
16381 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16382 // Avoid creating illegal types if running after type legalizer.
16383 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16384 EVT SVT = VT.getScalarType();
16385 SmallVector<SDValue, 8> TruncOps;
16386 for (const SDValue &Op : N0->op_values()) {
16387 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16388 TruncOps.push_back(TruncOp);
16389 }
16390 return DAG.getBuildVector(VT, DL, TruncOps);
16391 }
16392
16393 // trunc (splat_vector x) -> splat_vector (trunc x)
16394 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16395 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16396 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16397 EVT SVT = VT.getScalarType();
16398 return DAG.getSplatVector(
16399 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16400 }
16401
16402 // Fold a series of buildvector, bitcast, and truncate if possible.
16403 // For example fold
16404 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16405 // (2xi32 (buildvector x, y)).
16406 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16407 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16409 N0.getOperand(0).hasOneUse()) {
16410 SDValue BuildVect = N0.getOperand(0);
16411 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16412 EVT TruncVecEltTy = VT.getVectorElementType();
16413
16414 // Check that the element types match.
16415 if (BuildVectEltTy == TruncVecEltTy) {
16416 // Now we only need to compute the offset of the truncated elements.
16417 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16418 unsigned TruncVecNumElts = VT.getVectorNumElements();
16419 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16420 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16421
16422 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16423 "Invalid number of elements");
16424
16426 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16427 i += TruncEltOffset)
16428 Opnds.push_back(BuildVect.getOperand(i));
16429
16430 return DAG.getBuildVector(VT, DL, Opnds);
16431 }
16432 }
16433
16434 // fold (truncate (load x)) -> (smaller load x)
16435 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16436 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16437 if (SDValue Reduced = reduceLoadWidth(N))
16438 return Reduced;
16439
16440 // Handle the case where the truncated result is at least as wide as the
16441 // loaded type.
16442 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16443 auto *LN0 = cast<LoadSDNode>(N0);
16444 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16445 SDValue NewLoad = DAG.getExtLoad(
16446 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16447 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16448 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16449 return NewLoad;
16450 }
16451 }
16452 }
16453
16454 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16455 // where ... are all 'undef'.
16456 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16458 SDValue V;
16459 unsigned Idx = 0;
16460 unsigned NumDefs = 0;
16461
16462 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16463 SDValue X = N0.getOperand(i);
16464 if (!X.isUndef()) {
16465 V = X;
16466 Idx = i;
16467 NumDefs++;
16468 }
16469 // Stop if more than one members are non-undef.
16470 if (NumDefs > 1)
16471 break;
16472
16475 X.getValueType().getVectorElementCount()));
16476 }
16477
16478 if (NumDefs == 0)
16479 return DAG.getUNDEF(VT);
16480
16481 if (NumDefs == 1) {
16482 assert(V.getNode() && "The single defined operand is empty!");
16484 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16485 if (i != Idx) {
16486 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16487 continue;
16488 }
16489 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16490 AddToWorklist(NV.getNode());
16491 Opnds.push_back(NV);
16492 }
16493 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16494 }
16495 }
16496
16497 // Fold truncate of a bitcast of a vector to an extract of the low vector
16498 // element.
16499 //
16500 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16501 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16502 SDValue VecSrc = N0.getOperand(0);
16503 EVT VecSrcVT = VecSrc.getValueType();
16504 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16505 (!LegalOperations ||
16506 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16507 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16508 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16509 DAG.getVectorIdxConstant(Idx, DL));
16510 }
16511 }
16512
16513 // Simplify the operands using demanded-bits information.
16515 return SDValue(N, 0);
16516
16517 // fold (truncate (extract_subvector(ext x))) ->
16518 // (extract_subvector x)
16519 // TODO: This can be generalized to cover cases where the truncate and extract
16520 // do not fully cancel each other out.
16521 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16522 SDValue N00 = N0.getOperand(0);
16523 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16524 N00.getOpcode() == ISD::ZERO_EXTEND ||
16525 N00.getOpcode() == ISD::ANY_EXTEND) {
16526 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16528 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16529 N00.getOperand(0), N0.getOperand(1));
16530 }
16531 }
16532
16533 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16534 return NewVSel;
16535
16536 // Narrow a suitable binary operation with a non-opaque constant operand by
16537 // moving it ahead of the truncate. This is limited to pre-legalization
16538 // because targets may prefer a wider type during later combines and invert
16539 // this transform.
16540 switch (N0.getOpcode()) {
16541 case ISD::ADD:
16542 case ISD::SUB:
16543 case ISD::MUL:
16544 case ISD::AND:
16545 case ISD::OR:
16546 case ISD::XOR:
16547 if (!LegalOperations && N0.hasOneUse() &&
16548 (N0.getOperand(0) == N0.getOperand(1) ||
16550 isConstantOrConstantVector(N0.getOperand(1), true))) {
16551 // TODO: We already restricted this to pre-legalization, but for vectors
16552 // we are extra cautious to not create an unsupported operation.
16553 // Target-specific changes are likely needed to avoid regressions here.
16554 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16555 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16556 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16557 SDNodeFlags Flags;
16558 // Propagate nuw for sub.
16559 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16561 N0->getOperand(0),
16563 VT.getScalarSizeInBits())))
16564 Flags.setNoUnsignedWrap(true);
16565 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16566 }
16567 }
16568 break;
16569 case ISD::ADDE:
16570 case ISD::UADDO_CARRY:
16571 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16572 // (trunc uaddo_carry(X, Y, Carry)) ->
16573 // (uaddo_carry trunc(X), trunc(Y), Carry)
16574 // When the adde's carry is not used.
16575 // We only do for uaddo_carry before legalize operation
16576 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16577 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16578 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16579 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16580 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16581 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16582 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16583 }
16584 break;
16585 case ISD::USUBSAT:
16586 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16587 // enough to know that the upper bits are zero we must ensure that we don't
16588 // introduce an extra truncate.
16589 if (!LegalOperations && N0.hasOneUse() &&
16592 VT.getScalarSizeInBits() &&
16593 hasOperation(N0.getOpcode(), VT)) {
16594 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16595 DAG, DL);
16596 }
16597 break;
16598 case ISD::AVGCEILS:
16599 case ISD::AVGCEILU:
16600 // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
16601 // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
16602 if (N0.hasOneUse()) {
16603 SDValue Op0 = N0.getOperand(0);
16604 SDValue Op1 = N0.getOperand(1);
16605 if (N0.getOpcode() == ISD::AVGCEILU) {
16607 Op0.getOpcode() == ISD::SIGN_EXTEND &&
16608 Op1.getOpcode() == ISD::SIGN_EXTEND &&
16609 Op0.getOperand(0).getValueType() == VT &&
16610 Op1.getOperand(0).getValueType() == VT)
16611 return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0),
16612 Op1.getOperand(0));
16613 } else {
16615 Op0.getOpcode() == ISD::ZERO_EXTEND &&
16616 Op1.getOpcode() == ISD::ZERO_EXTEND &&
16617 Op0.getOperand(0).getValueType() == VT &&
16618 Op1.getOperand(0).getValueType() == VT)
16619 return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0),
16620 Op1.getOperand(0));
16621 }
16622 }
16623 [[fallthrough]];
16624 case ISD::AVGFLOORS:
16625 case ISD::AVGFLOORU:
16626 case ISD::ABDS:
16627 case ISD::ABDU:
16628 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16629 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16630 if (!LegalOperations && N0.hasOneUse() &&
16631 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16632 EVT TruncVT = VT;
16633 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16634 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16635
16636 SDValue A = N0.getOperand(0);
16637 SDValue B = N0.getOperand(1);
16638 bool CanFold = false;
16639
16640 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16641 N0.getOpcode() == ISD::ABDU) {
16642 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16643 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16644 DAG.MaskedValueIsZero(A, UpperBits);
16645 } else {
16646 unsigned NeededBits = SrcBits - TruncBits;
16647 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16648 DAG.ComputeNumSignBits(A) > NeededBits;
16649 }
16650
16651 if (CanFold) {
16652 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16653 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16654 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16655 }
16656 }
16657 break;
16658 }
16659
16660 return SDValue();
16661}
16662
16663static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16664 SDValue Elt = N->getOperand(i);
16665 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16666 return Elt.getNode();
16667 return Elt.getOperand(Elt.getResNo()).getNode();
16668}
16669
16670/// build_pair (load, load) -> load
16671/// if load locations are consecutive.
16672SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16673 assert(N->getOpcode() == ISD::BUILD_PAIR);
16674
16675 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16676 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16677
16678 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16679 // most significant part in elt 1. So when combining into one large load, we
16680 // need to consider the endianness.
16681 if (DAG.getDataLayout().isBigEndian())
16682 std::swap(LD1, LD2);
16683
16684 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16685 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16686 LD1->getAddressSpace() != LD2->getAddressSpace())
16687 return SDValue();
16688
16689 unsigned LD1Fast = 0;
16690 EVT LD1VT = LD1->getValueType(0);
16691 unsigned LD1Bytes = LD1VT.getStoreSize();
16692 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16693 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16694 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16695 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16696 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16697 LD1->getPointerInfo(), LD1->getAlign());
16698
16699 return SDValue();
16700}
16701
16702static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16703 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16704 // and Lo parts; on big-endian machines it doesn't.
16705 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16706}
16707
16708SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16709 const TargetLowering &TLI) {
16710 // If this is not a bitcast to an FP type or if the target doesn't have
16711 // IEEE754-compliant FP logic, we're done.
16712 EVT VT = N->getValueType(0);
16713 SDValue N0 = N->getOperand(0);
16714 EVT SourceVT = N0.getValueType();
16715
16716 if (!VT.isFloatingPoint())
16717 return SDValue();
16718
16719 // TODO: Handle cases where the integer constant is a different scalar
16720 // bitwidth to the FP.
16721 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16722 return SDValue();
16723
16724 unsigned FPOpcode;
16725 APInt SignMask;
16726 switch (N0.getOpcode()) {
16727 case ISD::AND:
16728 FPOpcode = ISD::FABS;
16729 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16730 break;
16731 case ISD::XOR:
16732 FPOpcode = ISD::FNEG;
16733 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16734 break;
16735 case ISD::OR:
16736 FPOpcode = ISD::FABS;
16737 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16738 break;
16739 default:
16740 return SDValue();
16741 }
16742
16743 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16744 return SDValue();
16745
16746 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16747 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16748 // removing this would require more changes.
16749 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16750 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16751 return true;
16752
16753 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16754 };
16755
16756 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16757 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16758 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16759 // fneg (fabs X)
16760 SDValue LogicOp0 = N0.getOperand(0);
16761 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16762 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16763 IsBitCastOrFree(LogicOp0, VT)) {
16764 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16765 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16766 NumFPLogicOpsConv++;
16767 if (N0.getOpcode() == ISD::OR)
16768 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16769 return FPOp;
16770 }
16771
16772 return SDValue();
16773}
16774
16775SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16776 SDValue N0 = N->getOperand(0);
16777 EVT VT = N->getValueType(0);
16778
16779 if (N0.isUndef())
16780 return DAG.getUNDEF(VT);
16781
16782 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16783 // Only do this before legalize types, unless both types are integer and the
16784 // scalar type is legal. Only do this before legalize ops, since the target
16785 // maybe depending on the bitcast.
16786 // First check to see if this is all constant.
16787 // TODO: Support FP bitcasts after legalize types.
16788 if (VT.isVector() &&
16789 (!LegalTypes ||
16790 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16791 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16792 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16793 cast<BuildVectorSDNode>(N0)->isConstant())
16794 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16796
16797 // If the input is a constant, let getNode fold it.
16798 if (isIntOrFPConstant(N0)) {
16799 // If we can't allow illegal operations, we need to check that this is just
16800 // a fp -> int or int -> conversion and that the resulting operation will
16801 // be legal.
16802 if (!LegalOperations ||
16803 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16805 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16806 TLI.isOperationLegal(ISD::Constant, VT))) {
16807 SDValue C = DAG.getBitcast(VT, N0);
16808 if (C.getNode() != N)
16809 return C;
16810 }
16811 }
16812
16813 // (conv (conv x, t1), t2) -> (conv x, t2)
16814 if (N0.getOpcode() == ISD::BITCAST)
16815 return DAG.getBitcast(VT, N0.getOperand(0));
16816
16817 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16818 // iff the current bitwise logicop type isn't legal
16819 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16820 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16821 auto IsFreeBitcast = [VT](SDValue V) {
16822 return (V.getOpcode() == ISD::BITCAST &&
16823 V.getOperand(0).getValueType() == VT) ||
16825 V->hasOneUse());
16826 };
16827 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16828 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16829 DAG.getBitcast(VT, N0.getOperand(0)),
16830 DAG.getBitcast(VT, N0.getOperand(1)));
16831 }
16832
16833 // fold (conv (load x)) -> (load (conv*)x)
16834 // fold (conv (freeze (load x))) -> (freeze (load (conv*)x))
16835 // If the resultant load doesn't need a higher alignment than the original!
16836 auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) {
16837 if (!ISD::isNormalLoad(N0.getNode()) || !N0.hasOneUse())
16838 return SDValue();
16839
16840 // Do not remove the cast if the types differ in endian layout.
16843 return SDValue();
16844
16845 // If the load is volatile, we only want to change the load type if the
16846 // resulting load is legal. Otherwise we might increase the number of
16847 // memory accesses. We don't care if the original type was legal or not
16848 // as we assume software couldn't rely on the number of accesses of an
16849 // illegal type.
16850 auto *LN0 = cast<LoadSDNode>(N0);
16851 if ((LegalOperations || !LN0->isSimple()) &&
16852 !TLI.isOperationLegal(ISD::LOAD, VT))
16853 return SDValue();
16854
16855 if (!TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16856 *LN0->getMemOperand()))
16857 return SDValue();
16858
16859 // If the range metadata type does not match the new memory
16860 // operation type, remove the range metadata.
16861 if (const MDNode *MD = LN0->getRanges()) {
16862 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16863 if (Lower->getBitWidth() != VT.getScalarSizeInBits() || !VT.isInteger()) {
16864 LN0->getMemOperand()->clearRanges();
16865 }
16866 }
16867 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
16868 LN0->getMemOperand());
16869 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16870 return Load;
16871 };
16872
16873 if (SDValue NewLd = CastLoad(N0, SDLoc(N)))
16874 return NewLd;
16875
16876 if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
16877 if (SDValue NewLd = CastLoad(N0.getOperand(0), SDLoc(N)))
16878 return DAG.getFreeze(NewLd);
16879
16880 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16881 return V;
16882
16883 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16884 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16885 //
16886 // For ppc_fp128:
16887 // fold (bitcast (fneg x)) ->
16888 // flipbit = signbit
16889 // (xor (bitcast x) (build_pair flipbit, flipbit))
16890 //
16891 // fold (bitcast (fabs x)) ->
16892 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16893 // (xor (bitcast x) (build_pair flipbit, flipbit))
16894 // This often reduces constant pool loads.
16895 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16896 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16897 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16898 !N0.getValueType().isVector()) {
16899 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16900 AddToWorklist(NewConv.getNode());
16901
16902 SDLoc DL(N);
16903 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16904 assert(VT.getSizeInBits() == 128);
16905 SDValue SignBit = DAG.getConstant(
16906 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16907 SDValue FlipBit;
16908 if (N0.getOpcode() == ISD::FNEG) {
16909 FlipBit = SignBit;
16910 AddToWorklist(FlipBit.getNode());
16911 } else {
16912 assert(N0.getOpcode() == ISD::FABS);
16913 SDValue Hi =
16914 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16916 SDLoc(NewConv)));
16917 AddToWorklist(Hi.getNode());
16918 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16919 AddToWorklist(FlipBit.getNode());
16920 }
16921 SDValue FlipBits =
16922 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16923 AddToWorklist(FlipBits.getNode());
16924 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16925 }
16926 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16927 if (N0.getOpcode() == ISD::FNEG)
16928 return DAG.getNode(ISD::XOR, DL, VT,
16929 NewConv, DAG.getConstant(SignBit, DL, VT));
16930 assert(N0.getOpcode() == ISD::FABS);
16931 return DAG.getNode(ISD::AND, DL, VT,
16932 NewConv, DAG.getConstant(~SignBit, DL, VT));
16933 }
16934
16935 // fold (bitconvert (fcopysign cst, x)) ->
16936 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16937 // Note that we don't handle (copysign x, cst) because this can always be
16938 // folded to an fneg or fabs.
16939 //
16940 // For ppc_fp128:
16941 // fold (bitcast (fcopysign cst, x)) ->
16942 // flipbit = (and (extract_element
16943 // (xor (bitcast cst), (bitcast x)), 0),
16944 // signbit)
16945 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16946 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16948 !VT.isVector()) {
16949 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16950 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16951 if (isTypeLegal(IntXVT)) {
16952 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16953 AddToWorklist(X.getNode());
16954
16955 // If X has a different width than the result/lhs, sext it or truncate it.
16956 unsigned VTWidth = VT.getSizeInBits();
16957 if (OrigXWidth < VTWidth) {
16958 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16959 AddToWorklist(X.getNode());
16960 } else if (OrigXWidth > VTWidth) {
16961 // To get the sign bit in the right place, we have to shift it right
16962 // before truncating.
16963 SDLoc DL(X);
16964 X = DAG.getNode(ISD::SRL, DL,
16965 X.getValueType(), X,
16966 DAG.getConstant(OrigXWidth-VTWidth, DL,
16967 X.getValueType()));
16968 AddToWorklist(X.getNode());
16969 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16970 AddToWorklist(X.getNode());
16971 }
16972
16973 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16974 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
16975 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16976 AddToWorklist(Cst.getNode());
16977 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
16978 AddToWorklist(X.getNode());
16979 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
16980 AddToWorklist(XorResult.getNode());
16981 SDValue XorResult64 = DAG.getNode(
16982 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
16984 SDLoc(XorResult)));
16985 AddToWorklist(XorResult64.getNode());
16986 SDValue FlipBit =
16987 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
16988 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
16989 AddToWorklist(FlipBit.getNode());
16990 SDValue FlipBits =
16991 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16992 AddToWorklist(FlipBits.getNode());
16993 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
16994 }
16995 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16996 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
16997 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16998 AddToWorklist(X.getNode());
16999
17000 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
17001 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
17002 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
17003 AddToWorklist(Cst.getNode());
17004
17005 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
17006 }
17007 }
17008
17009 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
17010 if (N0.getOpcode() == ISD::BUILD_PAIR)
17011 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
17012 return CombineLD;
17013
17014 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
17015 // => int_vt (any_extend elt_vt:x)
17016 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
17017 SDValue SrcScalar = N0.getOperand(0);
17018 if (SrcScalar.getValueType().isScalarInteger())
17019 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
17020 }
17021
17022 // Remove double bitcasts from shuffles - this is often a legacy of
17023 // XformToShuffleWithZero being used to combine bitmaskings (of
17024 // float vectors bitcast to integer vectors) into shuffles.
17025 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
17026 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
17027 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
17030 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
17031
17032 // If operands are a bitcast, peek through if it casts the original VT.
17033 // If operands are a constant, just bitcast back to original VT.
17034 auto PeekThroughBitcast = [&](SDValue Op) {
17035 if (Op.getOpcode() == ISD::BITCAST &&
17036 Op.getOperand(0).getValueType() == VT)
17037 return SDValue(Op.getOperand(0));
17038 if (Op.isUndef() || isAnyConstantBuildVector(Op))
17039 return DAG.getBitcast(VT, Op);
17040 return SDValue();
17041 };
17042
17043 // FIXME: If either input vector is bitcast, try to convert the shuffle to
17044 // the result type of this bitcast. This would eliminate at least one
17045 // bitcast. See the transform in InstCombine.
17046 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
17047 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
17048 if (!(SV0 && SV1))
17049 return SDValue();
17050
17051 int MaskScale =
17053 SmallVector<int, 8> NewMask;
17054 for (int M : SVN->getMask())
17055 for (int i = 0; i != MaskScale; ++i)
17056 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
17057
17058 SDValue LegalShuffle =
17059 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
17060 if (LegalShuffle)
17061 return LegalShuffle;
17062 }
17063
17064 return SDValue();
17065}
17066
17067SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
17068 EVT VT = N->getValueType(0);
17069 return CombineConsecutiveLoads(N, VT);
17070}
17071
17072SDValue DAGCombiner::visitFREEZE(SDNode *N) {
17073 SDValue N0 = N->getOperand(0);
17074
17075 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
17076 return N0;
17077
17078 // If we have frozen and unfrozen users of N0, update so everything uses N.
17079 if (!N0.isUndef() && !N0.hasOneUse()) {
17080 SDValue FrozenN0(N, 0);
17081 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
17082 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
17083 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
17084 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
17085 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17086 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
17087 DAG.UpdateNodeOperands(N, N0);
17088 return FrozenN0;
17089 }
17090
17091 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
17092 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
17093 // example https://reviews.llvm.org/D136529#4120959.
17094 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
17095 return SDValue();
17096
17097 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
17098 // Try to push freeze through instructions that propagate but don't produce
17099 // poison as far as possible. If an operand of freeze follows three
17100 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
17101 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
17102 // the freeze through to the operands that are not guaranteed non-poison.
17103 // NOTE: we will strip poison-generating flags, so ignore them here.
17104 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
17105 /*ConsiderFlags*/ false) ||
17106 N0->getNumValues() != 1 || !N0->hasOneUse())
17107 return SDValue();
17108
17109 // TOOD: we should always allow multiple operands, however this increases the
17110 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
17111 // below causing later nodes that share frozen operands to fold again and no
17112 // longer being able to confirm other operands are not poison due to recursion
17113 // depth limits on isGuaranteedNotToBeUndefOrPoison.
17114 bool AllowMultipleMaybePoisonOperands =
17115 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
17116 N0.getOpcode() == ISD::BUILD_VECTOR ||
17118 N0.getOpcode() == ISD::BUILD_PAIR ||
17121
17122 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
17123 // ones" or "constant" into something that depends on FrozenUndef. We can
17124 // instead pick undef values to keep those properties, while at the same time
17125 // folding away the freeze.
17126 // If we implement a more general solution for folding away freeze(undef) in
17127 // the future, then this special handling can be removed.
17128 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
17129 SDLoc DL(N0);
17130 EVT VT = N0.getValueType();
17132 return DAG.getAllOnesConstant(DL, VT);
17135 for (const SDValue &Op : N0->op_values())
17136 NewVecC.push_back(
17137 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
17138 return DAG.getBuildVector(VT, DL, NewVecC);
17139 }
17140 }
17141
17142 SmallSet<SDValue, 8> MaybePoisonOperands;
17143 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
17144 for (auto [OpNo, Op] : enumerate(N0->ops())) {
17145 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
17146 continue;
17147 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
17148 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
17149 if (IsNewMaybePoisonOperand)
17150 MaybePoisonOperandNumbers.push_back(OpNo);
17151 if (!HadMaybePoisonOperands)
17152 continue;
17153 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
17154 // Multiple maybe-poison ops when not allowed - bail out.
17155 return SDValue();
17156 }
17157 }
17158 // NOTE: the whole op may be not guaranteed to not be undef or poison because
17159 // it could create undef or poison due to it's poison-generating flags.
17160 // So not finding any maybe-poison operands is fine.
17161
17162 for (unsigned OpNo : MaybePoisonOperandNumbers) {
17163 // N0 can mutate during iteration, so make sure to refetch the maybe poison
17164 // operands via the operand numbers. The typical scenario is that we have
17165 // something like this
17166 // t262: i32 = freeze t181
17167 // t150: i32 = ctlz_zero_undef t262
17168 // t184: i32 = ctlz_zero_undef t181
17169 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
17170 // When freezing the t181 operand we get t262 back, and then the
17171 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
17172 // also recursively replace t184 by t150.
17173 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
17174 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
17175 if (MaybePoisonOperand.isUndef())
17176 continue;
17177 // First, freeze each offending operand.
17178 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
17179 // Then, change all other uses of unfrozen operand to use frozen operand.
17180 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
17181 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
17182 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
17183 // But, that also updated the use in the freeze we just created, thus
17184 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17185 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
17186 MaybePoisonOperand);
17187 }
17188
17189 // This node has been merged with another.
17190 if (N->getOpcode() == ISD::DELETED_NODE)
17191 return SDValue(N, 0);
17192 }
17193
17194 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17195
17196 // The whole node may have been updated, so the value we were holding
17197 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
17198 N0 = N->getOperand(0);
17199
17200 // Finally, recreate the node, it's operands were updated to use
17201 // frozen operands, so we just need to use it's "original" operands.
17203 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
17204 // leave for a future patch.
17205 for (SDValue &Op : Ops) {
17206 if (Op.isUndef())
17207 Op = DAG.getFreeze(Op);
17208 }
17209
17210 SDLoc DL(N0);
17211
17212 // Special case handling for ShuffleVectorSDNode nodes.
17213 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
17214 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
17215 SVN->getMask());
17216
17217 // NOTE: this strips poison generating flags.
17218 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
17219 // ninf, nsz, or fast.
17220 // However, contract, reassoc, afn, and arcp should be preserved,
17221 // as these fast-math flags do not introduce poison values.
17222 SDNodeFlags SrcFlags = N0->getFlags();
17223 SDNodeFlags SafeFlags;
17224 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17225 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17226 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17227 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17228 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
17229}
17230
17231// Returns true if floating point contraction is allowed on the FMUL-SDValue
17232// `N`
17234 assert(N.getOpcode() == ISD::FMUL);
17235
17236 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
17237 N->getFlags().hasAllowContract();
17238}
17239
17240/// Try to perform FMA combining on a given FADD node.
17241template <class MatchContextClass>
17242SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17243 SDValue N0 = N->getOperand(0);
17244 SDValue N1 = N->getOperand(1);
17245 EVT VT = N->getValueType(0);
17246 SDLoc SL(N);
17247 MatchContextClass matcher(DAG, TLI, N);
17248 const TargetOptions &Options = DAG.getTarget().Options;
17249
17250 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17251
17252 // Floating-point multiply-add with intermediate rounding.
17253 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17254 // FIXME: Add VP_FMAD opcode.
17255 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17256
17257 // Floating-point multiply-add without intermediate rounding.
17258 bool HasFMA =
17259 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17261
17262 // No valid opcode, do not combine.
17263 if (!HasFMAD && !HasFMA)
17264 return SDValue();
17265
17266 bool AllowFusionGlobally =
17267 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17268 // If the addition is not contractable, do not combine.
17269 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17270 return SDValue();
17271
17272 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17273 // beneficial. It does not reduce latency. It increases register pressure. It
17274 // replaces an fadd with an fma which is a more complex instruction, so is
17275 // likely to have a larger encoding, use more functional units, etc.
17276 if (N0 == N1)
17277 return SDValue();
17278
17279 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17280 return SDValue();
17281
17282 // Always prefer FMAD to FMA for precision.
17283 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17285
17286 auto isFusedOp = [&](SDValue N) {
17287 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17288 };
17289
17290 // Is the node an FMUL and contractable either due to global flags or
17291 // SDNodeFlags.
17292 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17293 if (!matcher.match(N, ISD::FMUL))
17294 return false;
17295 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17296 };
17297 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17298 // prefer to fold the multiply with fewer uses.
17300 if (N0->use_size() > N1->use_size())
17301 std::swap(N0, N1);
17302 }
17303
17304 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17305 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17306 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17307 N0.getOperand(1), N1);
17308 }
17309
17310 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17311 // Note: Commutes FADD operands.
17312 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17313 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17314 N1.getOperand(1), N0);
17315 }
17316
17317 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17318 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17319 // This also works with nested fma instructions:
17320 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17321 // fma A, B, (fma C, D, fma (E, F, G))
17322 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17323 // fma A, B, (fma C, D, fma (E, F, G)).
17324 // This requires reassociation because it changes the order of operations.
17325 bool CanReassociate = N->getFlags().hasAllowReassociation();
17326 if (CanReassociate) {
17327 SDValue FMA, E;
17328 if (isFusedOp(N0) && N0.hasOneUse()) {
17329 FMA = N0;
17330 E = N1;
17331 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17332 FMA = N1;
17333 E = N0;
17334 }
17335
17336 SDValue TmpFMA = FMA;
17337 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17338 SDValue FMul = TmpFMA->getOperand(2);
17339 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17340 SDValue C = FMul.getOperand(0);
17341 SDValue D = FMul.getOperand(1);
17342 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17344 // Replacing the inner FMul could cause the outer FMA to be simplified
17345 // away.
17346 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17347 }
17348
17349 TmpFMA = TmpFMA->getOperand(2);
17350 }
17351 }
17352
17353 // Look through FP_EXTEND nodes to do more combining.
17354
17355 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17356 if (matcher.match(N0, ISD::FP_EXTEND)) {
17357 SDValue N00 = N0.getOperand(0);
17358 if (isContractableFMUL(N00) &&
17359 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17360 N00.getValueType())) {
17361 return matcher.getNode(
17362 PreferredFusedOpcode, SL, VT,
17363 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17364 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17365 }
17366 }
17367
17368 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17369 // Note: Commutes FADD operands.
17370 if (matcher.match(N1, ISD::FP_EXTEND)) {
17371 SDValue N10 = N1.getOperand(0);
17372 if (isContractableFMUL(N10) &&
17373 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17374 N10.getValueType())) {
17375 return matcher.getNode(
17376 PreferredFusedOpcode, SL, VT,
17377 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17378 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17379 }
17380 }
17381
17382 // More folding opportunities when target permits.
17383 if (Aggressive) {
17384 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17385 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17386 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17387 SDValue Z) {
17388 return matcher.getNode(
17389 PreferredFusedOpcode, SL, VT, X, Y,
17390 matcher.getNode(PreferredFusedOpcode, SL, VT,
17391 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17392 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17393 };
17394 if (isFusedOp(N0)) {
17395 SDValue N02 = N0.getOperand(2);
17396 if (matcher.match(N02, ISD::FP_EXTEND)) {
17397 SDValue N020 = N02.getOperand(0);
17398 if (isContractableFMUL(N020) &&
17399 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17400 N020.getValueType())) {
17401 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17402 N020.getOperand(0), N020.getOperand(1),
17403 N1);
17404 }
17405 }
17406 }
17407
17408 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17409 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17410 // FIXME: This turns two single-precision and one double-precision
17411 // operation into two double-precision operations, which might not be
17412 // interesting for all targets, especially GPUs.
17413 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17414 SDValue Z) {
17415 return matcher.getNode(
17416 PreferredFusedOpcode, SL, VT,
17417 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17418 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17419 matcher.getNode(PreferredFusedOpcode, SL, VT,
17420 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17421 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17422 };
17423 if (N0.getOpcode() == ISD::FP_EXTEND) {
17424 SDValue N00 = N0.getOperand(0);
17425 if (isFusedOp(N00)) {
17426 SDValue N002 = N00.getOperand(2);
17427 if (isContractableFMUL(N002) &&
17428 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17429 N00.getValueType())) {
17430 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17431 N002.getOperand(0), N002.getOperand(1),
17432 N1);
17433 }
17434 }
17435 }
17436
17437 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17438 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17439 if (isFusedOp(N1)) {
17440 SDValue N12 = N1.getOperand(2);
17441 if (N12.getOpcode() == ISD::FP_EXTEND) {
17442 SDValue N120 = N12.getOperand(0);
17443 if (isContractableFMUL(N120) &&
17444 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17445 N120.getValueType())) {
17446 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17447 N120.getOperand(0), N120.getOperand(1),
17448 N0);
17449 }
17450 }
17451 }
17452
17453 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17454 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17455 // FIXME: This turns two single-precision and one double-precision
17456 // operation into two double-precision operations, which might not be
17457 // interesting for all targets, especially GPUs.
17458 if (N1.getOpcode() == ISD::FP_EXTEND) {
17459 SDValue N10 = N1.getOperand(0);
17460 if (isFusedOp(N10)) {
17461 SDValue N102 = N10.getOperand(2);
17462 if (isContractableFMUL(N102) &&
17463 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17464 N10.getValueType())) {
17465 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17466 N102.getOperand(0), N102.getOperand(1),
17467 N0);
17468 }
17469 }
17470 }
17471 }
17472
17473 return SDValue();
17474}
17475
17476/// Try to perform FMA combining on a given FSUB node.
17477template <class MatchContextClass>
17478SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17479 SDValue N0 = N->getOperand(0);
17480 SDValue N1 = N->getOperand(1);
17481 EVT VT = N->getValueType(0);
17482 SDLoc SL(N);
17483 MatchContextClass matcher(DAG, TLI, N);
17484 const TargetOptions &Options = DAG.getTarget().Options;
17485
17486 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17487
17488 // Floating-point multiply-add with intermediate rounding.
17489 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17490 // FIXME: Add VP_FMAD opcode.
17491 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17492
17493 // Floating-point multiply-add without intermediate rounding.
17494 bool HasFMA =
17495 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17497
17498 // No valid opcode, do not combine.
17499 if (!HasFMAD && !HasFMA)
17500 return SDValue();
17501
17502 const SDNodeFlags Flags = N->getFlags();
17503 bool AllowFusionGlobally =
17504 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17505
17506 // If the subtraction is not contractable, do not combine.
17507 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17508 return SDValue();
17509
17510 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17511 return SDValue();
17512
17513 // Always prefer FMAD to FMA for precision.
17514 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17516 bool NoSignedZero = Flags.hasNoSignedZeros();
17517
17518 // Is the node an FMUL and contractable either due to global flags or
17519 // SDNodeFlags.
17520 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17521 if (!matcher.match(N, ISD::FMUL))
17522 return false;
17523 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17524 };
17525
17526 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17527 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17528 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17529 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17530 XY.getOperand(1),
17531 matcher.getNode(ISD::FNEG, SL, VT, Z));
17532 }
17533 return SDValue();
17534 };
17535
17536 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17537 // Note: Commutes FSUB operands.
17538 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17539 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17540 return matcher.getNode(
17541 PreferredFusedOpcode, SL, VT,
17542 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17543 YZ.getOperand(1), X);
17544 }
17545 return SDValue();
17546 };
17547
17548 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17549 // prefer to fold the multiply with fewer uses.
17550 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17551 (N0->use_size() > N1->use_size())) {
17552 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17553 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17554 return V;
17555 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17556 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17557 return V;
17558 } else {
17559 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17560 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17561 return V;
17562 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17563 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17564 return V;
17565 }
17566
17567 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17568 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17569 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17570 SDValue N00 = N0.getOperand(0).getOperand(0);
17571 SDValue N01 = N0.getOperand(0).getOperand(1);
17572 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17573 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17574 matcher.getNode(ISD::FNEG, SL, VT, N1));
17575 }
17576
17577 // Look through FP_EXTEND nodes to do more combining.
17578
17579 // fold (fsub (fpext (fmul x, y)), z)
17580 // -> (fma (fpext x), (fpext y), (fneg z))
17581 if (matcher.match(N0, ISD::FP_EXTEND)) {
17582 SDValue N00 = N0.getOperand(0);
17583 if (isContractableFMUL(N00) &&
17584 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17585 N00.getValueType())) {
17586 return matcher.getNode(
17587 PreferredFusedOpcode, SL, VT,
17588 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17589 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17590 matcher.getNode(ISD::FNEG, SL, VT, N1));
17591 }
17592 }
17593
17594 // fold (fsub x, (fpext (fmul y, z)))
17595 // -> (fma (fneg (fpext y)), (fpext z), x)
17596 // Note: Commutes FSUB operands.
17597 if (matcher.match(N1, ISD::FP_EXTEND)) {
17598 SDValue N10 = N1.getOperand(0);
17599 if (isContractableFMUL(N10) &&
17600 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17601 N10.getValueType())) {
17602 return matcher.getNode(
17603 PreferredFusedOpcode, SL, VT,
17604 matcher.getNode(
17605 ISD::FNEG, SL, VT,
17606 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17607 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17608 }
17609 }
17610
17611 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17612 // -> (fneg (fma (fpext x), (fpext y), z))
17613 // Note: This could be removed with appropriate canonicalization of the
17614 // input expression into (fneg (fadd (fpext (fmul, x, y)), z)). However, the
17615 // command line flag -fp-contract=fast and fast-math flag contract prevent
17616 // from implementing the canonicalization in visitFSUB.
17617 if (matcher.match(N0, ISD::FP_EXTEND)) {
17618 SDValue N00 = N0.getOperand(0);
17619 if (matcher.match(N00, ISD::FNEG)) {
17620 SDValue N000 = N00.getOperand(0);
17621 if (isContractableFMUL(N000) &&
17622 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17623 N00.getValueType())) {
17624 return matcher.getNode(
17625 ISD::FNEG, SL, VT,
17626 matcher.getNode(
17627 PreferredFusedOpcode, SL, VT,
17628 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17629 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17630 N1));
17631 }
17632 }
17633 }
17634
17635 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17636 // -> (fneg (fma (fpext x)), (fpext y), z)
17637 // Note: This could be removed with appropriate canonicalization of the
17638 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17639 // command line flag -fp-contract=fast and fast-math flag contract prevent
17640 // from implementing the canonicalization in visitFSUB.
17641 if (matcher.match(N0, ISD::FNEG)) {
17642 SDValue N00 = N0.getOperand(0);
17643 if (matcher.match(N00, ISD::FP_EXTEND)) {
17644 SDValue N000 = N00.getOperand(0);
17645 if (isContractableFMUL(N000) &&
17646 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17647 N000.getValueType())) {
17648 return matcher.getNode(
17649 ISD::FNEG, SL, VT,
17650 matcher.getNode(
17651 PreferredFusedOpcode, SL, VT,
17652 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17653 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17654 N1));
17655 }
17656 }
17657 }
17658
17659 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17660 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17661 };
17662
17663 auto isFusedOp = [&](SDValue N) {
17664 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17665 };
17666
17667 // More folding opportunities when target permits.
17668 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17669 bool CanFuse = N->getFlags().hasAllowContract();
17670 // fold (fsub (fma x, y, (fmul u, v)), z)
17671 // -> (fma x, y (fma u, v, (fneg z)))
17672 if (CanFuse && isFusedOp(N0) &&
17673 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17674 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17675 return matcher.getNode(
17676 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17677 matcher.getNode(PreferredFusedOpcode, SL, VT,
17678 N0.getOperand(2).getOperand(0),
17679 N0.getOperand(2).getOperand(1),
17680 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17681 }
17682
17683 // fold (fsub x, (fma y, z, (fmul u, v)))
17684 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17685 if (CanFuse && isFusedOp(N1) &&
17686 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17687 N1->hasOneUse() && NoSignedZero) {
17688 SDValue N20 = N1.getOperand(2).getOperand(0);
17689 SDValue N21 = N1.getOperand(2).getOperand(1);
17690 return matcher.getNode(
17691 PreferredFusedOpcode, SL, VT,
17692 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17693 N1.getOperand(1),
17694 matcher.getNode(PreferredFusedOpcode, SL, VT,
17695 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17696 }
17697
17698 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17699 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17700 if (isFusedOp(N0) && N0->hasOneUse()) {
17701 SDValue N02 = N0.getOperand(2);
17702 if (matcher.match(N02, ISD::FP_EXTEND)) {
17703 SDValue N020 = N02.getOperand(0);
17704 if (isContractableAndReassociableFMUL(N020) &&
17705 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17706 N020.getValueType())) {
17707 return matcher.getNode(
17708 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17709 matcher.getNode(
17710 PreferredFusedOpcode, SL, VT,
17711 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17712 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17713 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17714 }
17715 }
17716 }
17717
17718 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17719 // -> (fma (fpext x), (fpext y),
17720 // (fma (fpext u), (fpext v), (fneg z)))
17721 // FIXME: This turns two single-precision and one double-precision
17722 // operation into two double-precision operations, which might not be
17723 // interesting for all targets, especially GPUs.
17724 if (matcher.match(N0, ISD::FP_EXTEND)) {
17725 SDValue N00 = N0.getOperand(0);
17726 if (isFusedOp(N00)) {
17727 SDValue N002 = N00.getOperand(2);
17728 if (isContractableAndReassociableFMUL(N002) &&
17729 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17730 N00.getValueType())) {
17731 return matcher.getNode(
17732 PreferredFusedOpcode, SL, VT,
17733 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17734 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17735 matcher.getNode(
17736 PreferredFusedOpcode, SL, VT,
17737 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17738 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17739 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17740 }
17741 }
17742 }
17743
17744 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17745 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17746 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17747 N1->hasOneUse()) {
17748 SDValue N120 = N1.getOperand(2).getOperand(0);
17749 if (isContractableAndReassociableFMUL(N120) &&
17750 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17751 N120.getValueType())) {
17752 SDValue N1200 = N120.getOperand(0);
17753 SDValue N1201 = N120.getOperand(1);
17754 return matcher.getNode(
17755 PreferredFusedOpcode, SL, VT,
17756 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17757 N1.getOperand(1),
17758 matcher.getNode(
17759 PreferredFusedOpcode, SL, VT,
17760 matcher.getNode(ISD::FNEG, SL, VT,
17761 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17762 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17763 }
17764 }
17765
17766 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17767 // -> (fma (fneg (fpext y)), (fpext z),
17768 // (fma (fneg (fpext u)), (fpext v), x))
17769 // FIXME: This turns two single-precision and one double-precision
17770 // operation into two double-precision operations, which might not be
17771 // interesting for all targets, especially GPUs.
17772 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17773 SDValue CvtSrc = N1.getOperand(0);
17774 SDValue N100 = CvtSrc.getOperand(0);
17775 SDValue N101 = CvtSrc.getOperand(1);
17776 SDValue N102 = CvtSrc.getOperand(2);
17777 if (isContractableAndReassociableFMUL(N102) &&
17778 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17779 CvtSrc.getValueType())) {
17780 SDValue N1020 = N102.getOperand(0);
17781 SDValue N1021 = N102.getOperand(1);
17782 return matcher.getNode(
17783 PreferredFusedOpcode, SL, VT,
17784 matcher.getNode(ISD::FNEG, SL, VT,
17785 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17786 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17787 matcher.getNode(
17788 PreferredFusedOpcode, SL, VT,
17789 matcher.getNode(ISD::FNEG, SL, VT,
17790 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17791 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17792 }
17793 }
17794 }
17795
17796 return SDValue();
17797}
17798
17799/// Try to perform FMA combining on a given FMUL node based on the distributive
17800/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17801/// subtraction instead of addition).
17802SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17803 SDValue N0 = N->getOperand(0);
17804 SDValue N1 = N->getOperand(1);
17805 EVT VT = N->getValueType(0);
17806 SDLoc SL(N);
17807
17808 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17809
17810 const TargetOptions &Options = DAG.getTarget().Options;
17811
17812 // The transforms below are incorrect when x == 0 and y == inf, because the
17813 // intermediate multiplication produces a nan.
17814 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17815 if (!FAdd->getFlags().hasNoInfs())
17816 return SDValue();
17817
17818 // Floating-point multiply-add without intermediate rounding.
17819 bool HasFMA =
17821 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17823
17824 // Floating-point multiply-add with intermediate rounding. This can result
17825 // in a less precise result due to the changed rounding order.
17826 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17827
17828 // No valid opcode, do not combine.
17829 if (!HasFMAD && !HasFMA)
17830 return SDValue();
17831
17832 // Always prefer FMAD to FMA for precision.
17833 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17835
17836 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17837 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17838 auto FuseFADD = [&](SDValue X, SDValue Y) {
17839 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17840 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17841 if (C->isExactlyValue(+1.0))
17842 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17843 Y);
17844 if (C->isExactlyValue(-1.0))
17845 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17846 DAG.getNode(ISD::FNEG, SL, VT, Y));
17847 }
17848 }
17849 return SDValue();
17850 };
17851
17852 if (SDValue FMA = FuseFADD(N0, N1))
17853 return FMA;
17854 if (SDValue FMA = FuseFADD(N1, N0))
17855 return FMA;
17856
17857 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17858 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17859 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17860 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17861 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17862 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17863 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17864 if (C0->isExactlyValue(+1.0))
17865 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17866 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17867 Y);
17868 if (C0->isExactlyValue(-1.0))
17869 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17870 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17871 DAG.getNode(ISD::FNEG, SL, VT, Y));
17872 }
17873 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17874 if (C1->isExactlyValue(+1.0))
17875 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17876 DAG.getNode(ISD::FNEG, SL, VT, Y));
17877 if (C1->isExactlyValue(-1.0))
17878 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17879 Y);
17880 }
17881 }
17882 return SDValue();
17883 };
17884
17885 if (SDValue FMA = FuseFSUB(N0, N1))
17886 return FMA;
17887 if (SDValue FMA = FuseFSUB(N1, N0))
17888 return FMA;
17889
17890 return SDValue();
17891}
17892
17893SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17894 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17895
17896 // FADD -> FMA combines:
17897 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17898 if (Fused.getOpcode() != ISD::DELETED_NODE)
17899 AddToWorklist(Fused.getNode());
17900 return Fused;
17901 }
17902 return SDValue();
17903}
17904
17905SDValue DAGCombiner::visitFADD(SDNode *N) {
17906 SDValue N0 = N->getOperand(0);
17907 SDValue N1 = N->getOperand(1);
17908 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17909 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17910 EVT VT = N->getValueType(0);
17911 SDLoc DL(N);
17912 SDNodeFlags Flags = N->getFlags();
17913 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17914
17915 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17916 return R;
17917
17918 // fold (fadd c1, c2) -> c1 + c2
17919 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17920 return C;
17921
17922 // canonicalize constant to RHS
17923 if (N0CFP && !N1CFP)
17924 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17925
17926 // fold vector ops
17927 if (VT.isVector())
17928 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17929 return FoldedVOp;
17930
17931 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17932 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17933 if (N1C && N1C->isZero())
17934 if (N1C->isNegative() || Flags.hasNoSignedZeros() ||
17936 return N0;
17937
17938 if (SDValue NewSel = foldBinOpIntoSelect(N))
17939 return NewSel;
17940
17941 // fold (fadd A, (fneg B)) -> (fsub A, B)
17942 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17943 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17944 N1, DAG, LegalOperations, ForCodeSize))
17945 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17946
17947 // fold (fadd (fneg A), B) -> (fsub B, A)
17948 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17949 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17950 N0, DAG, LegalOperations, ForCodeSize))
17951 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17952
17953 auto isFMulNegTwo = [](SDValue FMul) {
17954 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17955 return false;
17956 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17957 return C && C->isExactlyValue(-2.0);
17958 };
17959
17960 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17961 if (isFMulNegTwo(N0)) {
17962 SDValue B = N0.getOperand(0);
17963 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17964 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17965 }
17966 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17967 if (isFMulNegTwo(N1)) {
17968 SDValue B = N1.getOperand(0);
17969 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17970 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17971 }
17972
17973 // No FP constant should be created after legalization as Instruction
17974 // Selection pass has a hard time dealing with FP constants.
17975 bool AllowNewConst = (Level < AfterLegalizeDAG);
17976
17977 // If nnan is enabled, fold lots of things.
17978 if (Flags.hasNoNaNs() && AllowNewConst) {
17979 // If allowed, fold (fadd (fneg x), x) -> 0.0
17980 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17981 return DAG.getConstantFP(0.0, DL, VT);
17982
17983 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17984 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17985 return DAG.getConstantFP(0.0, DL, VT);
17986 }
17987
17988 // If reassoc and nsz, fold lots of things.
17989 // TODO: break out portions of the transformations below for which Unsafe is
17990 // considered and which do not require both nsz and reassoc
17991 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
17992 AllowNewConst) {
17993 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17994 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17996 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17997 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17998 }
17999
18000 // We can fold chains of FADD's of the same value into multiplications.
18001 // This transform is not safe in general because we are reducing the number
18002 // of rounding steps.
18003 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
18004 if (N0.getOpcode() == ISD::FMUL) {
18005 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18006 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
18007
18008 // (fadd (fmul x, c), x) -> (fmul x, c+1)
18009 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
18010 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18011 DAG.getConstantFP(1.0, DL, VT));
18012 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
18013 }
18014
18015 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
18016 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
18017 N1.getOperand(0) == N1.getOperand(1) &&
18018 N0.getOperand(0) == N1.getOperand(0)) {
18019 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18020 DAG.getConstantFP(2.0, DL, VT));
18021 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
18022 }
18023 }
18024
18025 if (N1.getOpcode() == ISD::FMUL) {
18026 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18027 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
18028
18029 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
18030 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
18031 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18032 DAG.getConstantFP(1.0, DL, VT));
18033 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
18034 }
18035
18036 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
18037 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
18038 N0.getOperand(0) == N0.getOperand(1) &&
18039 N1.getOperand(0) == N0.getOperand(0)) {
18040 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18041 DAG.getConstantFP(2.0, DL, VT));
18042 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
18043 }
18044 }
18045
18046 if (N0.getOpcode() == ISD::FADD) {
18047 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18048 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
18049 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
18050 (N0.getOperand(0) == N1)) {
18051 return DAG.getNode(ISD::FMUL, DL, VT, N1,
18052 DAG.getConstantFP(3.0, DL, VT));
18053 }
18054 }
18055
18056 if (N1.getOpcode() == ISD::FADD) {
18057 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18058 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
18059 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
18060 N1.getOperand(0) == N0) {
18061 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18062 DAG.getConstantFP(3.0, DL, VT));
18063 }
18064 }
18065
18066 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
18067 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
18068 N0.getOperand(0) == N0.getOperand(1) &&
18069 N1.getOperand(0) == N1.getOperand(1) &&
18070 N0.getOperand(0) == N1.getOperand(0)) {
18071 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
18072 DAG.getConstantFP(4.0, DL, VT));
18073 }
18074 }
18075 } // reassoc && nsz && AllowNewConst
18076
18077 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
18078 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
18079 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
18080 VT, N0, N1, Flags))
18081 return SD;
18082 }
18083
18084 // FADD -> FMA combines:
18085 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
18086 if (Fused.getOpcode() != ISD::DELETED_NODE)
18087 AddToWorklist(Fused.getNode());
18088 return Fused;
18089 }
18090 return SDValue();
18091}
18092
18093SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
18094 SDValue Chain = N->getOperand(0);
18095 SDValue N0 = N->getOperand(1);
18096 SDValue N1 = N->getOperand(2);
18097 EVT VT = N->getValueType(0);
18098 EVT ChainVT = N->getValueType(1);
18099 SDLoc DL(N);
18100 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18101
18102 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
18103 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18104 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18105 N1, DAG, LegalOperations, ForCodeSize)) {
18106 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18107 {Chain, N0, NegN1});
18108 }
18109
18110 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
18111 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18112 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18113 N0, DAG, LegalOperations, ForCodeSize)) {
18114 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18115 {Chain, N1, NegN0});
18116 }
18117 return SDValue();
18118}
18119
18120SDValue DAGCombiner::visitFSUB(SDNode *N) {
18121 SDValue N0 = N->getOperand(0);
18122 SDValue N1 = N->getOperand(1);
18123 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
18124 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18125 EVT VT = N->getValueType(0);
18126 SDLoc DL(N);
18127 const SDNodeFlags Flags = N->getFlags();
18128 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18129
18130 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18131 return R;
18132
18133 // fold (fsub c1, c2) -> c1-c2
18134 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
18135 return C;
18136
18137 // fold vector ops
18138 if (VT.isVector())
18139 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18140 return FoldedVOp;
18141
18142 if (SDValue NewSel = foldBinOpIntoSelect(N))
18143 return NewSel;
18144
18145 // (fsub A, 0) -> A
18146 if (N1CFP && N1CFP->isZero()) {
18147 if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() ||
18148 DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18149 return N0;
18150 }
18151 }
18152
18153 if (N0 == N1) {
18154 // (fsub x, x) -> 0.0
18155 if (Flags.hasNoNaNs())
18156 return DAG.getConstantFP(0.0f, DL, VT);
18157 }
18158
18159 // (fsub -0.0, N1) -> -N1
18160 if (N0CFP && N0CFP->isZero()) {
18161 if (N0CFP->isNegative() || Flags.hasNoSignedZeros() ||
18162 DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18163 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
18164 // flushed to zero, unless all users treat denorms as zero (DAZ).
18165 // FIXME: This transform will change the sign of a NaN and the behavior
18166 // of a signaling NaN. It is only valid when a NoNaN flag is present.
18167 DenormalMode DenormMode = DAG.getDenormalMode(VT);
18168 if (DenormMode == DenormalMode::getIEEE()) {
18169 if (SDValue NegN1 =
18170 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18171 return NegN1;
18172 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18173 return DAG.getNode(ISD::FNEG, DL, VT, N1);
18174 }
18175 }
18176 }
18177
18178 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18179 N1.getOpcode() == ISD::FADD) {
18180 // X - (X + Y) -> -Y
18181 if (N0 == N1->getOperand(0))
18182 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
18183 // X - (Y + X) -> -Y
18184 if (N0 == N1->getOperand(1))
18185 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
18186 }
18187
18188 // fold (fsub A, (fneg B)) -> (fadd A, B)
18189 if (SDValue NegN1 =
18190 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18191 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
18192
18193 // FSUB -> FMA combines:
18194 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18195 AddToWorklist(Fused.getNode());
18196 return Fused;
18197 }
18198
18199 return SDValue();
18200}
18201
18202// Transform IEEE Floats:
18203// (fmul C, (uitofp Pow2))
18204// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
18205// (fdiv C, (uitofp Pow2))
18206// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
18207//
18208// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
18209// there is no need for more than an add/sub.
18210//
18211// This is valid under the following circumstances:
18212// 1) We are dealing with IEEE floats
18213// 2) C is normal
18214// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18215// TODO: Much of this could also be used for generating `ldexp` on targets the
18216// prefer it.
18217SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18218 EVT VT = N->getValueType(0);
18220 return SDValue();
18221
18222 SDValue ConstOp, Pow2Op;
18223
18224 std::optional<int> Mantissa;
18225 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18226 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
18227 return false;
18228
18229 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
18230 Pow2Op = N->getOperand(1 - ConstOpIdx);
18231 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18232 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
18233 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
18234 return false;
18235
18236 Pow2Op = Pow2Op.getOperand(0);
18237
18238 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18239 // TODO: We could use knownbits to make this bound more precise.
18240 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18241
18242 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18243 if (CFP == nullptr)
18244 return false;
18245
18246 const APFloat &APF = CFP->getValueAPF();
18247
18248 // Make sure we have normal constant.
18249 if (!APF.isNormal())
18250 return false;
18251
18252 // Make sure the floats exponent is within the bounds that this transform
18253 // produces bitwise equals value.
18254 int CurExp = ilogb(APF);
18255 // FMul by pow2 will only increase exponent.
18256 int MinExp =
18257 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18258 // FDiv by pow2 will only decrease exponent.
18259 int MaxExp =
18260 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18261 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18263 return false;
18264
18265 // Finally make sure we actually know the mantissa for the float type.
18266 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18267 if (!Mantissa)
18268 Mantissa = ThisMantissa;
18269
18270 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18271 };
18272
18273 // TODO: We may be able to include undefs.
18274 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18275 };
18276
18277 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18278 return SDValue();
18279
18280 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18281 return SDValue();
18282
18283 // Get log2 after all other checks have taken place. This is because
18284 // BuildLogBase2 may create a new node.
18285 SDLoc DL(N);
18286 // Get Log2 type with same bitwidth as the float type (VT).
18287 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18288 if (VT.isVector())
18289 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18291
18292 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18293 /*InexpensiveOnly*/ true, NewIntVT);
18294 if (!Log2)
18295 return SDValue();
18296
18297 // Perform actual transform.
18298 SDValue MantissaShiftCnt =
18299 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18300 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18301 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18302 // cast. We could implement that by handle here to handle the casts.
18303 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18304 SDValue ResAsInt =
18305 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18306 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18307 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18308 return ResAsFP;
18309}
18310
18311SDValue DAGCombiner::visitFMUL(SDNode *N) {
18312 SDValue N0 = N->getOperand(0);
18313 SDValue N1 = N->getOperand(1);
18314 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18315 EVT VT = N->getValueType(0);
18316 SDLoc DL(N);
18317 const SDNodeFlags Flags = N->getFlags();
18318 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18319
18320 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18321 return R;
18322
18323 // fold (fmul c1, c2) -> c1*c2
18324 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18325 return C;
18326
18327 // canonicalize constant to RHS
18330 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18331
18332 // fold vector ops
18333 if (VT.isVector())
18334 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18335 return FoldedVOp;
18336
18337 if (SDValue NewSel = foldBinOpIntoSelect(N))
18338 return NewSel;
18339
18340 if (Flags.hasAllowReassociation()) {
18341 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18343 N0.getOpcode() == ISD::FMUL) {
18344 SDValue N00 = N0.getOperand(0);
18345 SDValue N01 = N0.getOperand(1);
18346 // Avoid an infinite loop by making sure that N00 is not a constant
18347 // (the inner multiply has not been constant folded yet).
18350 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18351 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18352 }
18353 }
18354
18355 // Match a special-case: we convert X * 2.0 into fadd.
18356 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18357 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18358 N0.getOperand(0) == N0.getOperand(1)) {
18359 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18360 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18361 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18362 }
18363
18364 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18365 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18366 VT, N0, N1, Flags))
18367 return SD;
18368 }
18369
18370 // fold (fmul X, 2.0) -> (fadd X, X)
18371 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18372 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18373
18374 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18375 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18376 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18377 return DAG.getNode(ISD::FSUB, DL, VT,
18378 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18379 }
18380 }
18381
18382 // -N0 * -N1 --> N0 * N1
18387 SDValue NegN0 =
18388 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18389 if (NegN0) {
18390 HandleSDNode NegN0Handle(NegN0);
18391 SDValue NegN1 =
18392 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18393 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18395 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18396 }
18397
18398 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18399 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18400 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18401 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18402 TLI.isOperationLegal(ISD::FABS, VT)) {
18403 SDValue Select = N0, X = N1;
18404 if (Select.getOpcode() != ISD::SELECT)
18405 std::swap(Select, X);
18406
18407 SDValue Cond = Select.getOperand(0);
18408 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18409 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18410
18411 if (TrueOpnd && FalseOpnd &&
18412 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18413 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18414 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18415 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18416 switch (CC) {
18417 default: break;
18418 case ISD::SETOLT:
18419 case ISD::SETULT:
18420 case ISD::SETOLE:
18421 case ISD::SETULE:
18422 case ISD::SETLT:
18423 case ISD::SETLE:
18424 std::swap(TrueOpnd, FalseOpnd);
18425 [[fallthrough]];
18426 case ISD::SETOGT:
18427 case ISD::SETUGT:
18428 case ISD::SETOGE:
18429 case ISD::SETUGE:
18430 case ISD::SETGT:
18431 case ISD::SETGE:
18432 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18433 TLI.isOperationLegal(ISD::FNEG, VT))
18434 return DAG.getNode(ISD::FNEG, DL, VT,
18435 DAG.getNode(ISD::FABS, DL, VT, X));
18436 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18437 return DAG.getNode(ISD::FABS, DL, VT, X);
18438
18439 break;
18440 }
18441 }
18442 }
18443
18444 // FMUL -> FMA combines:
18445 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18446 AddToWorklist(Fused.getNode());
18447 return Fused;
18448 }
18449
18450 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18451 // able to run.
18452 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18453 return R;
18454
18455 return SDValue();
18456}
18457
18458template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18459 SDValue N0 = N->getOperand(0);
18460 SDValue N1 = N->getOperand(1);
18461 SDValue N2 = N->getOperand(2);
18462 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18463 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18464 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18465 EVT VT = N->getValueType(0);
18466 SDLoc DL(N);
18467 // FMA nodes have flags that propagate to the created nodes.
18468 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18469 MatchContextClass matcher(DAG, TLI, N);
18470
18471 // Constant fold FMA.
18472 if (SDValue C =
18473 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18474 return C;
18475
18476 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18481 SDValue NegN0 =
18482 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18483 if (NegN0) {
18484 HandleSDNode NegN0Handle(NegN0);
18485 SDValue NegN1 =
18486 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18487 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18489 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18490 }
18491
18492 if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
18493 if (N->getFlags().hasNoSignedZeros() ||
18494 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18495 if (N0CFP && N0CFP->isZero())
18496 return N2;
18497 if (N1CFP && N1CFP->isZero())
18498 return N2;
18499 }
18500 }
18501
18502 // FIXME: Support splat of constant.
18503 if (N0CFP && N0CFP->isExactlyValue(1.0))
18504 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18505 if (N1CFP && N1CFP->isExactlyValue(1.0))
18506 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18507
18508 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18511 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18512
18513 bool CanReassociate = N->getFlags().hasAllowReassociation();
18514 if (CanReassociate) {
18515 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18516 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18519 return matcher.getNode(
18520 ISD::FMUL, DL, VT, N0,
18521 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18522 }
18523
18524 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18525 if (matcher.match(N0, ISD::FMUL) &&
18528 return matcher.getNode(
18529 ISD::FMA, DL, VT, N0.getOperand(0),
18530 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18531 }
18532 }
18533
18534 // (fma x, -1, y) -> (fadd (fneg x), y)
18535 // FIXME: Support splat of constant.
18536 if (N1CFP) {
18537 if (N1CFP->isExactlyValue(1.0))
18538 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18539
18540 if (N1CFP->isExactlyValue(-1.0) &&
18541 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18542 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18543 AddToWorklist(RHSNeg.getNode());
18544 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18545 }
18546
18547 // fma (fneg x), K, y -> fma x -K, y
18548 if (matcher.match(N0, ISD::FNEG) &&
18550 (N1.hasOneUse() &&
18551 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18552 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18553 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18554 }
18555 }
18556
18557 // FIXME: Support splat of constant.
18558 if (CanReassociate) {
18559 // (fma x, c, x) -> (fmul x, (c+1))
18560 if (N1CFP && N0 == N2) {
18561 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18562 matcher.getNode(ISD::FADD, DL, VT, N1,
18563 DAG.getConstantFP(1.0, DL, VT)));
18564 }
18565
18566 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18567 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18568 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18569 matcher.getNode(ISD::FADD, DL, VT, N1,
18570 DAG.getConstantFP(-1.0, DL, VT)));
18571 }
18572 }
18573
18574 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18575 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18576 if (!TLI.isFNegFree(VT))
18578 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18579 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18580 return SDValue();
18581}
18582
18583SDValue DAGCombiner::visitFMAD(SDNode *N) {
18584 SDValue N0 = N->getOperand(0);
18585 SDValue N1 = N->getOperand(1);
18586 SDValue N2 = N->getOperand(2);
18587 EVT VT = N->getValueType(0);
18588 SDLoc DL(N);
18589
18590 // Constant fold FMAD.
18591 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18592 return C;
18593
18594 return SDValue();
18595}
18596
18597SDValue DAGCombiner::visitFMULADD(SDNode *N) {
18598 SDValue N0 = N->getOperand(0);
18599 SDValue N1 = N->getOperand(1);
18600 SDValue N2 = N->getOperand(2);
18601 EVT VT = N->getValueType(0);
18602 SDLoc DL(N);
18603
18604 // Constant fold FMULADD.
18605 if (SDValue C =
18606 DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
18607 return C;
18608
18609 return SDValue();
18610}
18611
18612// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18613// reciprocal.
18614// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18615// Notice that this is not always beneficial. One reason is different targets
18616// may have different costs for FDIV and FMUL, so sometimes the cost of two
18617// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18618// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18619SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18620 // TODO: Limit this transform based on optsize/minsize - it always creates at
18621 // least 1 extra instruction. But the perf win may be substantial enough
18622 // that only minsize should restrict this.
18623 const SDNodeFlags Flags = N->getFlags();
18624 if (LegalDAG || !Flags.hasAllowReciprocal())
18625 return SDValue();
18626
18627 // Skip if current node is a reciprocal/fneg-reciprocal.
18628 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18629 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18630 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18631 return SDValue();
18632
18633 // Exit early if the target does not want this transform or if there can't
18634 // possibly be enough uses of the divisor to make the transform worthwhile.
18635 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18636
18637 // For splat vectors, scale the number of uses by the splat factor. If we can
18638 // convert the division into a scalar op, that will likely be much faster.
18639 unsigned NumElts = 1;
18640 EVT VT = N->getValueType(0);
18641 if (VT.isVector() && DAG.isSplatValue(N1))
18642 NumElts = VT.getVectorMinNumElements();
18643
18644 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18645 return SDValue();
18646
18647 // Find all FDIV users of the same divisor.
18648 // Use a set because duplicates may be present in the user list.
18649 SetVector<SDNode *> Users;
18650 for (auto *U : N1->users()) {
18651 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18652 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18653 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18654 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18655 U->getFlags().hasAllowReassociation() &&
18656 U->getFlags().hasNoSignedZeros())
18657 continue;
18658
18659 // This division is eligible for optimization only if global unsafe math
18660 // is enabled or if this division allows reciprocal formation.
18661 if (U->getFlags().hasAllowReciprocal())
18662 Users.insert(U);
18663 }
18664 }
18665
18666 // Now that we have the actual number of divisor uses, make sure it meets
18667 // the minimum threshold specified by the target.
18668 if ((Users.size() * NumElts) < MinUses)
18669 return SDValue();
18670
18671 SDLoc DL(N);
18672 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18673 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18674
18675 // Dividend / Divisor -> Dividend * Reciprocal
18676 for (auto *U : Users) {
18677 SDValue Dividend = U->getOperand(0);
18678 if (Dividend != FPOne) {
18679 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18680 Reciprocal, Flags);
18681 CombineTo(U, NewNode);
18682 } else if (U != Reciprocal.getNode()) {
18683 // In the absence of fast-math-flags, this user node is always the
18684 // same node as Reciprocal, but with FMF they may be different nodes.
18685 CombineTo(U, Reciprocal);
18686 }
18687 }
18688 return SDValue(N, 0); // N was replaced.
18689}
18690
18691SDValue DAGCombiner::visitFDIV(SDNode *N) {
18692 SDValue N0 = N->getOperand(0);
18693 SDValue N1 = N->getOperand(1);
18694 EVT VT = N->getValueType(0);
18695 SDLoc DL(N);
18696 SDNodeFlags Flags = N->getFlags();
18697 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18698
18699 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18700 return R;
18701
18702 // fold (fdiv c1, c2) -> c1/c2
18703 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18704 return C;
18705
18706 // fold vector ops
18707 if (VT.isVector())
18708 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18709 return FoldedVOp;
18710
18711 if (SDValue NewSel = foldBinOpIntoSelect(N))
18712 return NewSel;
18713
18715 return V;
18716
18717 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18718 // the loss is acceptable with AllowReciprocal.
18719 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18720 // Compute the reciprocal 1.0 / c2.
18721 const APFloat &N1APF = N1CFP->getValueAPF();
18722 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18724 // Only do the transform if the reciprocal is a legal fp immediate that
18725 // isn't too nasty (eg NaN, denormal, ...).
18726 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18727 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18728 (!LegalOperations ||
18729 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18730 // backend)... we should handle this gracefully after Legalize.
18731 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18733 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18734 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18735 DAG.getConstantFP(Recip, DL, VT));
18736 }
18737
18738 if (Flags.hasAllowReciprocal()) {
18739 // If this FDIV is part of a reciprocal square root, it may be folded
18740 // into a target-specific square root estimate instruction.
18741 bool N1AllowReciprocal = N1->getFlags().hasAllowReciprocal();
18742 if (N1.getOpcode() == ISD::FSQRT) {
18743 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
18744 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18745 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18746 N1.getOperand(0).getOpcode() == ISD::FSQRT &&
18747 N1AllowReciprocal) {
18748 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18749 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18750 AddToWorklist(RV.getNode());
18751 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18752 }
18753 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18754 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18755 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18756 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18757 AddToWorklist(RV.getNode());
18758 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18759 }
18760 } else if (N1.getOpcode() == ISD::FMUL) {
18761 // Look through an FMUL. Even though this won't remove the FDIV directly,
18762 // it's still worthwhile to get rid of the FSQRT if possible.
18763 SDValue Sqrt, Y;
18764 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18765 Sqrt = N1.getOperand(0);
18766 Y = N1.getOperand(1);
18767 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18768 Sqrt = N1.getOperand(1);
18769 Y = N1.getOperand(0);
18770 }
18771 if (Sqrt.getNode()) {
18772 // If the other multiply operand is known positive, pull it into the
18773 // sqrt. That will eliminate the division if we convert to an estimate.
18774 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18775 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18776 SDValue A;
18777 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18778 A = Y.getOperand(0);
18779 else if (Y == Sqrt.getOperand(0))
18780 A = Y;
18781 if (A) {
18782 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18783 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18784 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18785 SDValue AAZ =
18786 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18787 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
18788 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18789
18790 // Estimate creation failed. Clean up speculatively created nodes.
18791 recursivelyDeleteUnusedNodes(AAZ.getNode());
18792 }
18793 }
18794
18795 // We found a FSQRT, so try to make this fold:
18796 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18797 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
18798 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18799 AddToWorklist(Div.getNode());
18800 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18801 }
18802 }
18803 }
18804
18805 // Fold into a reciprocal estimate and multiply instead of a real divide.
18806 if (Flags.hasNoInfs())
18807 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18808 return RV;
18809 }
18810
18811 // Fold X/Sqrt(X) -> Sqrt(X)
18812 if ((Flags.hasNoSignedZeros() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) &&
18813 Flags.hasAllowReassociation())
18814 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18815 return N1;
18816
18817 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18822 SDValue NegN0 =
18823 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18824 if (NegN0) {
18825 HandleSDNode NegN0Handle(NegN0);
18826 SDValue NegN1 =
18827 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18828 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18830 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18831 }
18832
18833 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18834 return R;
18835
18836 return SDValue();
18837}
18838
18839SDValue DAGCombiner::visitFREM(SDNode *N) {
18840 SDValue N0 = N->getOperand(0);
18841 SDValue N1 = N->getOperand(1);
18842 EVT VT = N->getValueType(0);
18843 SDNodeFlags Flags = N->getFlags();
18844 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18845 SDLoc DL(N);
18846
18847 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18848 return R;
18849
18850 // fold (frem c1, c2) -> fmod(c1,c2)
18851 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18852 return C;
18853
18854 if (SDValue NewSel = foldBinOpIntoSelect(N))
18855 return NewSel;
18856
18857 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18858 // power of 2.
18859 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18863 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18864 bool NeedsCopySign = !Flags.hasNoSignedZeros() &&
18865 !DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) &&
18867 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18868 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18869 SDValue MLA;
18871 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18872 N1, N0);
18873 } else {
18874 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18875 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18876 }
18877 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18878 }
18879
18880 return SDValue();
18881}
18882
18883SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18884 SDNodeFlags Flags = N->getFlags();
18885
18886 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18887 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18888 if (!Flags.hasApproximateFuncs() || !Flags.hasNoInfs())
18889 return SDValue();
18890
18891 SDValue N0 = N->getOperand(0);
18892 if (TLI.isFsqrtCheap(N0, DAG))
18893 return SDValue();
18894
18895 // FSQRT nodes have flags that propagate to the created nodes.
18896 SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
18897 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18898 // transform the fdiv, we may produce a sub-optimal estimate sequence
18899 // because the reciprocal calculation may not have to filter out a
18900 // 0.0 input.
18901 return buildSqrtEstimate(N0);
18902}
18903
18904/// copysign(x, fp_extend(y)) -> copysign(x, y)
18905/// copysign(x, fp_round(y)) -> copysign(x, y)
18906/// Operands to the functions are the type of X and Y respectively.
18907static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18908 // Always fold no-op FP casts.
18909 if (XTy == YTy)
18910 return true;
18911
18912 // Do not optimize out type conversion of f128 type yet.
18913 // For some targets like x86_64, configuration is changed to keep one f128
18914 // value in one SSE register, but instruction selection cannot handle
18915 // FCOPYSIGN on SSE registers yet.
18916 if (YTy == MVT::f128)
18917 return false;
18918
18919 // Avoid mismatched vector operand types, for better instruction selection.
18920 return !YTy.isVector();
18921}
18922
18924 SDValue N1 = N->getOperand(1);
18925 if (N1.getOpcode() != ISD::FP_EXTEND &&
18926 N1.getOpcode() != ISD::FP_ROUND)
18927 return false;
18928 EVT N1VT = N1->getValueType(0);
18929 EVT N1Op0VT = N1->getOperand(0).getValueType();
18930 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18931}
18932
18933SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18934 SDValue N0 = N->getOperand(0);
18935 SDValue N1 = N->getOperand(1);
18936 EVT VT = N->getValueType(0);
18937 SDLoc DL(N);
18938
18939 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18940 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18941 return C;
18942
18943 // copysign(x, fp_extend(y)) -> copysign(x, y)
18944 // copysign(x, fp_round(y)) -> copysign(x, y)
18946 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18947
18949 return SDValue(N, 0);
18950
18951 if (VT != N1.getValueType())
18952 return SDValue();
18953
18954 // If this is equivalent to a disjoint or, replace it with one. This can
18955 // happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
18956 if (DAG.SignBitIsZeroFP(N0) &&
18958 // TODO: Just directly match the shift pattern. computeKnownBits is heavy
18959 // for a such a narrowly targeted case.
18960 EVT IntVT = VT.changeTypeToInteger();
18961 // TODO: It appears to be profitable in some situations to unconditionally
18962 // emit a fabs(n0) to perform this combine.
18963 SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0);
18964 SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1);
18965
18966 SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1,
18968 return DAG.getNode(ISD::BITCAST, DL, VT, SignOr);
18969 }
18970
18971 return SDValue();
18972}
18973
18974SDValue DAGCombiner::visitFPOW(SDNode *N) {
18975 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18976 if (!ExponentC)
18977 return SDValue();
18978 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18979
18980 // Try to convert x ** (1/3) into cube root.
18981 // TODO: Handle the various flavors of long double.
18982 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18983 // Some range near 1/3 should be fine.
18984 EVT VT = N->getValueType(0);
18985 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18986 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18987 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18988 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18989 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18990 // For regular numbers, rounding may cause the results to differ.
18991 // Therefore, we require { nsz ninf nnan afn } for this transform.
18992 // TODO: We could select out the special cases if we don't have nsz/ninf.
18993 SDNodeFlags Flags = N->getFlags();
18994 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18995 !Flags.hasApproximateFuncs())
18996 return SDValue();
18997
18998 // Do not create a cbrt() libcall if the target does not have it, and do not
18999 // turn a pow that has lowering support into a cbrt() libcall.
19000 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
19003 return SDValue();
19004
19005 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
19006 }
19007
19008 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
19009 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
19010 // TODO: This could be extended (using a target hook) to handle smaller
19011 // power-of-2 fractional exponents.
19012 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
19013 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
19014 if (ExponentIs025 || ExponentIs075) {
19015 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
19016 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
19017 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
19018 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
19019 // For regular numbers, rounding may cause the results to differ.
19020 // Therefore, we require { nsz ninf afn } for this transform.
19021 // TODO: We could select out the special cases if we don't have nsz/ninf.
19022 SDNodeFlags Flags = N->getFlags();
19023
19024 // We only need no signed zeros for the 0.25 case.
19025 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
19026 !Flags.hasApproximateFuncs())
19027 return SDValue();
19028
19029 // Don't double the number of libcalls. We are trying to inline fast code.
19031 return SDValue();
19032
19033 // Assume that libcalls are the smallest code.
19034 // TODO: This restriction should probably be lifted for vectors.
19035 if (ForCodeSize)
19036 return SDValue();
19037
19038 // pow(X, 0.25) --> sqrt(sqrt(X))
19039 SDLoc DL(N);
19040 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
19041 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
19042 if (ExponentIs025)
19043 return SqrtSqrt;
19044 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
19045 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
19046 }
19047
19048 return SDValue();
19049}
19050
19052 const TargetLowering &TLI) {
19053 // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
19054 // If NoSignedZerosFPMath is enabled, this is a direct replacement.
19055 // Otherwise, for strict math, we must handle edge cases:
19056 // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
19057 // as example, it first becomes integer 0, and is converted back to +0.0.
19058 // FTRUNC on its own could produce -0.0.
19059
19060 // FIXME: We should be able to use node-level FMF here.
19061 EVT VT = N->getValueType(0);
19062 if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
19063 return SDValue();
19064
19065 // fptosi/fptoui round towards zero, so converting from FP to integer and
19066 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
19067 SDValue N0 = N->getOperand(0);
19068 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
19069 N0.getOperand(0).getValueType() == VT) {
19071 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
19072 }
19073
19074 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
19075 N0.getOperand(0).getValueType() == VT) {
19077 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
19078
19079 // Strict math: use FABS to handle negative inputs correctly.
19080 if (TLI.isFAbsFree(VT)) {
19081 SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
19082 return DAG.getNode(ISD::FTRUNC, DL, VT, Abs);
19083 }
19084 }
19085
19086 return SDValue();
19087}
19088
19089SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
19090 SDValue N0 = N->getOperand(0);
19091 EVT VT = N->getValueType(0);
19092 EVT OpVT = N0.getValueType();
19093 SDLoc DL(N);
19094
19095 // [us]itofp(undef) = 0, because the result value is bounded.
19096 if (N0.isUndef())
19097 return DAG.getConstantFP(0.0, DL, VT);
19098
19099 // fold (sint_to_fp c1) -> c1fp
19100 // ...but only if the target supports immediate floating-point values
19101 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19102 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
19103 return C;
19104
19105 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
19106 // but UINT_TO_FP is legal on this target, try to convert.
19107 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
19108 hasOperation(ISD::UINT_TO_FP, OpVT)) {
19109 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
19110 if (DAG.SignBitIsZero(N0))
19111 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
19112 }
19113
19114 // The next optimizations are desirable only if SELECT_CC can be lowered.
19115 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
19116 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
19117 !VT.isVector() &&
19118 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19119 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
19120 DAG.getConstantFP(0.0, DL, VT));
19121
19122 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
19123 // (select (setcc x, y, cc), 1.0, 0.0)
19124 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
19125 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
19126 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19127 return DAG.getSelect(DL, VT, N0.getOperand(0),
19128 DAG.getConstantFP(1.0, DL, VT),
19129 DAG.getConstantFP(0.0, DL, VT));
19130
19131 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19132 return FTrunc;
19133
19134 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
19135 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
19137 N0.getOperand(0).getValueType()))
19138 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
19139
19140 return SDValue();
19141}
19142
19143SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
19144 SDValue N0 = N->getOperand(0);
19145 EVT VT = N->getValueType(0);
19146 EVT OpVT = N0.getValueType();
19147 SDLoc DL(N);
19148
19149 // [us]itofp(undef) = 0, because the result value is bounded.
19150 if (N0.isUndef())
19151 return DAG.getConstantFP(0.0, DL, VT);
19152
19153 // fold (uint_to_fp c1) -> c1fp
19154 // ...but only if the target supports immediate floating-point values
19155 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19156 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
19157 return C;
19158
19159 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
19160 // but SINT_TO_FP is legal on this target, try to convert.
19161 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
19162 hasOperation(ISD::SINT_TO_FP, OpVT)) {
19163 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
19164 if (DAG.SignBitIsZero(N0))
19165 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
19166 }
19167
19168 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
19169 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
19170 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19171 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
19172 DAG.getConstantFP(0.0, DL, VT));
19173
19174 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19175 return FTrunc;
19176
19177 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
19178 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
19180 N0.getOperand(0).getValueType()))
19181 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
19182
19183 return SDValue();
19184}
19185
19186// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
19188 SDValue N0 = N->getOperand(0);
19189 EVT VT = N->getValueType(0);
19190
19191 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
19192 return SDValue();
19193
19194 SDValue Src = N0.getOperand(0);
19195 EVT SrcVT = Src.getValueType();
19196 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
19197 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
19198
19199 // We can safely assume the conversion won't overflow the output range,
19200 // because (for example) (uint8_t)18293.f is undefined behavior.
19201
19202 // Since we can assume the conversion won't overflow, our decision as to
19203 // whether the input will fit in the float should depend on the minimum
19204 // of the input range and output range.
19205
19206 // This means this is also safe for a signed input and unsigned output, since
19207 // a negative input would lead to undefined behavior.
19208 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
19209 unsigned OutputSize = (int)VT.getScalarSizeInBits();
19210 unsigned ActualSize = std::min(InputSize, OutputSize);
19211 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
19212
19213 // We can only fold away the float conversion if the input range can be
19214 // represented exactly in the float range.
19215 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
19216 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
19217 unsigned ExtOp =
19218 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
19219 return DAG.getNode(ExtOp, DL, VT, Src);
19220 }
19221 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
19222 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
19223 return DAG.getBitcast(VT, Src);
19224 }
19225 return SDValue();
19226}
19227
19228SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
19229 SDValue N0 = N->getOperand(0);
19230 EVT VT = N->getValueType(0);
19231 SDLoc DL(N);
19232
19233 // fold (fp_to_sint undef) -> undef
19234 if (N0.isUndef())
19235 return DAG.getUNDEF(VT);
19236
19237 // fold (fp_to_sint c1fp) -> c1
19238 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
19239 return C;
19240
19241 return FoldIntToFPToInt(N, DL, DAG);
19242}
19243
19244SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
19245 SDValue N0 = N->getOperand(0);
19246 EVT VT = N->getValueType(0);
19247 SDLoc DL(N);
19248
19249 // fold (fp_to_uint undef) -> undef
19250 if (N0.isUndef())
19251 return DAG.getUNDEF(VT);
19252
19253 // fold (fp_to_uint c1fp) -> c1
19254 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
19255 return C;
19256
19257 return FoldIntToFPToInt(N, DL, DAG);
19258}
19259
19260SDValue DAGCombiner::visitXROUND(SDNode *N) {
19261 SDValue N0 = N->getOperand(0);
19262 EVT VT = N->getValueType(0);
19263
19264 // fold (lrint|llrint undef) -> undef
19265 // fold (lround|llround undef) -> undef
19266 if (N0.isUndef())
19267 return DAG.getUNDEF(VT);
19268
19269 // fold (lrint|llrint c1fp) -> c1
19270 // fold (lround|llround c1fp) -> c1
19271 if (SDValue C =
19272 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
19273 return C;
19274
19275 return SDValue();
19276}
19277
19278SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19279 SDValue N0 = N->getOperand(0);
19280 SDValue N1 = N->getOperand(1);
19281 EVT VT = N->getValueType(0);
19282 SDLoc DL(N);
19283
19284 // fold (fp_round c1fp) -> c1fp
19285 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
19286 return C;
19287
19288 // fold (fp_round (fp_extend x)) -> x
19289 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
19290 return N0.getOperand(0);
19291
19292 // fold (fp_round (fp_round x)) -> (fp_round x)
19293 if (N0.getOpcode() == ISD::FP_ROUND) {
19294 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
19295 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
19296
19297 // Avoid folding legal fp_rounds into non-legal ones.
19298 if (!hasOperation(ISD::FP_ROUND, VT))
19299 return SDValue();
19300
19301 // Skip this folding if it results in an fp_round from f80 to f16.
19302 //
19303 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19304 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19305 // instructions from f32 or f64. Moreover, the first (value-preserving)
19306 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19307 // x86.
19308 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19309 return SDValue();
19310
19311 // If the first fp_round isn't a value preserving truncation, it might
19312 // introduce a tie in the second fp_round, that wouldn't occur in the
19313 // single-step fp_round we want to fold to.
19314 // In other words, double rounding isn't the same as rounding.
19315 // Also, this is a value preserving truncation iff both fp_round's are.
19316 if ((N->getFlags().hasAllowContract() &&
19317 N0->getFlags().hasAllowContract()) ||
19318 N0IsTrunc)
19319 return DAG.getNode(
19320 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19321 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19322 }
19323
19324 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19325 // Note: From a legality perspective, this is a two step transform. First,
19326 // we duplicate the fp_round to the arguments of the copysign, then we
19327 // eliminate the fp_round on Y. The second step requires an additional
19328 // predicate to match the implementation above.
19329 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19331 N0.getValueType())) {
19332 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19333 N0.getOperand(0), N1);
19334 AddToWorklist(Tmp.getNode());
19335 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19336 }
19337
19338 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19339 return NewVSel;
19340
19341 return SDValue();
19342}
19343
19344// Eliminate a floating-point widening of a narrowed value if the fast math
19345// flags allow it.
19347 SDValue N0 = N->getOperand(0);
19348 EVT VT = N->getValueType(0);
19349
19350 unsigned NarrowingOp;
19351 switch (N->getOpcode()) {
19352 case ISD::FP16_TO_FP:
19353 NarrowingOp = ISD::FP_TO_FP16;
19354 break;
19355 case ISD::BF16_TO_FP:
19356 NarrowingOp = ISD::FP_TO_BF16;
19357 break;
19358 case ISD::FP_EXTEND:
19359 NarrowingOp = ISD::FP_ROUND;
19360 break;
19361 default:
19362 llvm_unreachable("Expected widening FP cast");
19363 }
19364
19365 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19366 const SDNodeFlags NarrowFlags = N0->getFlags();
19367 const SDNodeFlags WidenFlags = N->getFlags();
19368 // Narrowing can introduce inf and change the encoding of a nan, so the
19369 // widen must have the nnan and ninf flags to indicate that we don't need to
19370 // care about that. We are also removing a rounding step, and that requires
19371 // both the narrow and widen to allow contraction.
19372 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19373 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19374 return N0.getOperand(0);
19375 }
19376 }
19377
19378 return SDValue();
19379}
19380
19381SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19382 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19383 SDValue N0 = N->getOperand(0);
19384 EVT VT = N->getValueType(0);
19385 SDLoc DL(N);
19386
19387 if (VT.isVector())
19388 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19389 return FoldedVOp;
19390
19391 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19392 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19393 return SDValue();
19394
19395 // fold (fp_extend c1fp) -> c1fp
19396 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19397 return C;
19398
19399 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19400 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19402 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19403
19404 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19405 // value of X.
19406 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19407 SDValue In = N0.getOperand(0);
19408 if (In.getValueType() == VT) return In;
19409 if (VT.bitsLT(In.getValueType()))
19410 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19411 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19412 }
19413
19414 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19415 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19417 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19418 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19419 LN0->getChain(),
19420 LN0->getBasePtr(), N0.getValueType(),
19421 LN0->getMemOperand());
19422 CombineTo(N, ExtLoad);
19423 CombineTo(
19424 N0.getNode(),
19425 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19426 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19427 ExtLoad.getValue(1));
19428 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19429 }
19430
19431 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19432 return NewVSel;
19433
19434 if (SDValue CastEliminated = eliminateFPCastPair(N))
19435 return CastEliminated;
19436
19437 return SDValue();
19438}
19439
19440SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19441 SDValue N0 = N->getOperand(0);
19442 EVT VT = N->getValueType(0);
19443
19444 // fold (fceil c1) -> fceil(c1)
19445 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19446 return C;
19447
19448 return SDValue();
19449}
19450
19451SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19452 SDValue N0 = N->getOperand(0);
19453 EVT VT = N->getValueType(0);
19454
19455 // fold (ftrunc c1) -> ftrunc(c1)
19456 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19457 return C;
19458
19459 // fold ftrunc (known rounded int x) -> x
19460 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19461 // likely to be generated to extract integer from a rounded floating value.
19462 switch (N0.getOpcode()) {
19463 default: break;
19464 case ISD::FRINT:
19465 case ISD::FTRUNC:
19466 case ISD::FNEARBYINT:
19467 case ISD::FROUNDEVEN:
19468 case ISD::FFLOOR:
19469 case ISD::FCEIL:
19470 return N0;
19471 }
19472
19473 return SDValue();
19474}
19475
19476SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19477 SDValue N0 = N->getOperand(0);
19478
19479 // fold (ffrexp c1) -> ffrexp(c1)
19481 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19482 return SDValue();
19483}
19484
19485SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19486 SDValue N0 = N->getOperand(0);
19487 EVT VT = N->getValueType(0);
19488
19489 // fold (ffloor c1) -> ffloor(c1)
19490 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19491 return C;
19492
19493 return SDValue();
19494}
19495
19496SDValue DAGCombiner::visitFNEG(SDNode *N) {
19497 SDValue N0 = N->getOperand(0);
19498 EVT VT = N->getValueType(0);
19499 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19500
19501 // Constant fold FNEG.
19502 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19503 return C;
19504
19505 if (SDValue NegN0 =
19506 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19507 return NegN0;
19508
19509 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19510 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19511 // know it was called from a context with a nsz flag if the input fsub does
19512 // not.
19513 if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
19514 N0.hasOneUse()) {
19515 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19516 N0.getOperand(0));
19517 }
19518
19520 return SDValue(N, 0);
19521
19522 if (SDValue Cast = foldSignChangeInBitcast(N))
19523 return Cast;
19524
19525 return SDValue();
19526}
19527
19528SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19529 SDValue N0 = N->getOperand(0);
19530 SDValue N1 = N->getOperand(1);
19531 EVT VT = N->getValueType(0);
19532 const SDNodeFlags Flags = N->getFlags();
19533 unsigned Opc = N->getOpcode();
19534 bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19535 bool ReturnsOtherForAllNaNs =
19537 bool IsMin =
19539 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19540
19541 // Constant fold.
19542 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19543 return C;
19544
19545 // Canonicalize to constant on RHS.
19548 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19549
19550 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19551 const APFloat &AF = N1CFP->getValueAPF();
19552
19553 // minnum(X, qnan) -> X
19554 // maxnum(X, qnan) -> X
19555 // minimum(X, nan) -> qnan
19556 // maximum(X, nan) -> qnan
19557 // minimumnum(X, nan) -> X
19558 // maximumnum(X, nan) -> X
19559 if (AF.isNaN()) {
19560 if (PropAllNaNsToQNaNs) {
19561 if (AF.isSignaling())
19562 return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
19563 return N->getOperand(1);
19564 } else if (ReturnsOtherForAllNaNs || !AF.isSignaling()) {
19565 return N->getOperand(0);
19566 }
19567 return SDValue();
19568 }
19569
19570 // In the following folds, inf can be replaced with the largest finite
19571 // float, if the ninf flag is set.
19572 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19573 // minimum(X, -inf) -> -inf if nnan
19574 // maximum(X, +inf) -> +inf if nnan
19575 // minimumnum(X, -inf) -> -inf
19576 // maximumnum(X, +inf) -> +inf
19577 if (IsMin == AF.isNegative() &&
19578 (ReturnsOtherForAllNaNs || Flags.hasNoNaNs()))
19579 return N->getOperand(1);
19580
19581 // minnum(X, +inf) -> X if nnan
19582 // maxnum(X, -inf) -> X if nnan
19583 // minimum(X, +inf) -> X (ignoring quieting of sNaNs)
19584 // maximum(X, -inf) -> X (ignoring quieting of sNaNs)
19585 // minimumnum(X, +inf) -> X if nnan
19586 // maximumnum(X, -inf) -> X if nnan
19587 if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19588 return N->getOperand(0);
19589 }
19590 }
19591
19592 // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
19594 return SDValue();
19595
19596 if (SDValue SD = reassociateReduction(
19597 PropAllNaNsToQNaNs
19600 Opc, SDLoc(N), VT, N0, N1, Flags))
19601 return SD;
19602
19603 return SDValue();
19604}
19605
19606SDValue DAGCombiner::visitFABS(SDNode *N) {
19607 SDValue N0 = N->getOperand(0);
19608 EVT VT = N->getValueType(0);
19609 SDLoc DL(N);
19610
19611 // fold (fabs c1) -> fabs(c1)
19612 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19613 return C;
19614
19616 return SDValue(N, 0);
19617
19618 if (SDValue Cast = foldSignChangeInBitcast(N))
19619 return Cast;
19620
19621 return SDValue();
19622}
19623
19624SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19625 SDValue Chain = N->getOperand(0);
19626 SDValue N1 = N->getOperand(1);
19627 SDValue N2 = N->getOperand(2);
19628
19629 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19630 // nondeterministic jumps).
19631 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19632 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19633 N1->getOperand(0), N2, N->getFlags());
19634 }
19635
19636 // Variant of the previous fold where there is a SETCC in between:
19637 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19638 // =>
19639 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19640 // =>
19641 // BRCOND(SETCC(X, CONST, Cond))
19642 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19643 // isn't equivalent to true or false.
19644 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19645 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19646 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19647 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19649 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19650 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19651 bool Updated = false;
19652
19653 // Is 'X Cond C' always true or false?
19654 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19655 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19656 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19657 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19658 (Cond == ISD::SETGT && C->isMaxSignedValue());
19659 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19660 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19661 (Cond == ISD::SETUGE && C->isZero()) ||
19662 (Cond == ISD::SETGE && C->isMinSignedValue());
19663 return True || False;
19664 };
19665
19666 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19667 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19668 S0 = S0->getOperand(0);
19669 Updated = true;
19670 }
19671 }
19672 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19673 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19674 S1 = S1->getOperand(0);
19675 Updated = true;
19676 }
19677 }
19678
19679 if (Updated)
19680 return DAG.getNode(
19681 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19682 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19683 N->getFlags());
19684 }
19685
19686 // If N is a constant we could fold this into a fallthrough or unconditional
19687 // branch. However that doesn't happen very often in normal code, because
19688 // Instcombine/SimplifyCFG should have handled the available opportunities.
19689 // If we did this folding here, it would be necessary to update the
19690 // MachineBasicBlock CFG, which is awkward.
19691
19692 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19693 // on the target, also copy fast math flags.
19694 if (N1.getOpcode() == ISD::SETCC &&
19696 N1.getOperand(0).getValueType())) {
19697 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19698 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19699 N1->getFlags());
19700 }
19701
19702 if (N1.hasOneUse()) {
19703 // rebuildSetCC calls visitXor which may change the Chain when there is a
19704 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19705 HandleSDNode ChainHandle(Chain);
19706 if (SDValue NewN1 = rebuildSetCC(N1))
19707 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19708 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19709 }
19710
19711 return SDValue();
19712}
19713
19714SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19715 if (N.getOpcode() == ISD::SRL ||
19716 (N.getOpcode() == ISD::TRUNCATE &&
19717 (N.getOperand(0).hasOneUse() &&
19718 N.getOperand(0).getOpcode() == ISD::SRL))) {
19719 // Look pass the truncate.
19720 if (N.getOpcode() == ISD::TRUNCATE)
19721 N = N.getOperand(0);
19722
19723 // Match this pattern so that we can generate simpler code:
19724 //
19725 // %a = ...
19726 // %b = and i32 %a, 2
19727 // %c = srl i32 %b, 1
19728 // brcond i32 %c ...
19729 //
19730 // into
19731 //
19732 // %a = ...
19733 // %b = and i32 %a, 2
19734 // %c = setcc eq %b, 0
19735 // brcond %c ...
19736 //
19737 // This applies only when the AND constant value has one bit set and the
19738 // SRL constant is equal to the log2 of the AND constant. The back-end is
19739 // smart enough to convert the result into a TEST/JMP sequence.
19740 SDValue Op0 = N.getOperand(0);
19741 SDValue Op1 = N.getOperand(1);
19742
19743 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19744 SDValue AndOp1 = Op0.getOperand(1);
19745
19746 if (AndOp1.getOpcode() == ISD::Constant) {
19747 const APInt &AndConst = AndOp1->getAsAPIntVal();
19748
19749 if (AndConst.isPowerOf2() &&
19750 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19751 SDLoc DL(N);
19752 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19753 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19754 ISD::SETNE);
19755 }
19756 }
19757 }
19758 }
19759
19760 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19761 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19762 if (N.getOpcode() == ISD::XOR) {
19763 // Because we may call this on a speculatively constructed
19764 // SimplifiedSetCC Node, we need to simplify this node first.
19765 // Ideally this should be folded into SimplifySetCC and not
19766 // here. For now, grab a handle to N so we don't lose it from
19767 // replacements interal to the visit.
19768 HandleSDNode XORHandle(N);
19769 while (N.getOpcode() == ISD::XOR) {
19770 SDValue Tmp = visitXOR(N.getNode());
19771 // No simplification done.
19772 if (!Tmp.getNode())
19773 break;
19774 // Returning N is form in-visit replacement that may invalidated
19775 // N. Grab value from Handle.
19776 if (Tmp.getNode() == N.getNode())
19777 N = XORHandle.getValue();
19778 else // Node simplified. Try simplifying again.
19779 N = Tmp;
19780 }
19781
19782 if (N.getOpcode() != ISD::XOR)
19783 return N;
19784
19785 SDValue Op0 = N->getOperand(0);
19786 SDValue Op1 = N->getOperand(1);
19787
19788 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19789 bool Equal = false;
19790 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19791 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19792 Op0.getValueType() == MVT::i1) {
19793 N = Op0;
19794 Op0 = N->getOperand(0);
19795 Op1 = N->getOperand(1);
19796 Equal = true;
19797 }
19798
19799 EVT SetCCVT = N.getValueType();
19800 if (LegalTypes)
19801 SetCCVT = getSetCCResultType(SetCCVT);
19802 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19803 // it would introduce illegal operations post-legalization as this can
19804 // result in infinite looping between converting xor->setcc here, and
19805 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19807 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19808 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19809 }
19810 }
19811
19812 return SDValue();
19813}
19814
19815// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19816//
19817SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19818 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19819 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19820
19821 // If N is a constant we could fold this into a fallthrough or unconditional
19822 // branch. However that doesn't happen very often in normal code, because
19823 // Instcombine/SimplifyCFG should have handled the available opportunities.
19824 // If we did this folding here, it would be necessary to update the
19825 // MachineBasicBlock CFG, which is awkward.
19826
19827 // Use SimplifySetCC to simplify SETCC's.
19829 CondLHS, CondRHS, CC->get(), SDLoc(N),
19830 false);
19831 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19832
19833 // fold to a simpler setcc
19834 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19835 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19836 N->getOperand(0), Simp.getOperand(2),
19837 Simp.getOperand(0), Simp.getOperand(1),
19838 N->getOperand(4));
19839
19840 return SDValue();
19841}
19842
19843static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19844 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19845 const TargetLowering &TLI) {
19846 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19847 if (LD->isIndexed())
19848 return false;
19849 EVT VT = LD->getMemoryVT();
19850 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19851 return false;
19852 Ptr = LD->getBasePtr();
19853 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19854 if (ST->isIndexed())
19855 return false;
19856 EVT VT = ST->getMemoryVT();
19857 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19858 return false;
19859 Ptr = ST->getBasePtr();
19860 IsLoad = false;
19861 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19862 if (LD->isIndexed())
19863 return false;
19864 EVT VT = LD->getMemoryVT();
19865 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19866 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19867 return false;
19868 Ptr = LD->getBasePtr();
19869 IsMasked = true;
19871 if (ST->isIndexed())
19872 return false;
19873 EVT VT = ST->getMemoryVT();
19874 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19875 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19876 return false;
19877 Ptr = ST->getBasePtr();
19878 IsLoad = false;
19879 IsMasked = true;
19880 } else {
19881 return false;
19882 }
19883 return true;
19884}
19885
19886/// Try turning a load/store into a pre-indexed load/store when the base
19887/// pointer is an add or subtract and it has other uses besides the load/store.
19888/// After the transformation, the new indexed load/store has effectively folded
19889/// the add/subtract in and all of its other uses are redirected to the
19890/// new load/store.
19891bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19892 if (Level < AfterLegalizeDAG)
19893 return false;
19894
19895 bool IsLoad = true;
19896 bool IsMasked = false;
19897 SDValue Ptr;
19898 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19899 Ptr, TLI))
19900 return false;
19901
19902 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19903 // out. There is no reason to make this a preinc/predec.
19904 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19905 Ptr->hasOneUse())
19906 return false;
19907
19908 // Ask the target to do addressing mode selection.
19912 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19913 return false;
19914
19915 // Backends without true r+i pre-indexed forms may need to pass a
19916 // constant base with a variable offset so that constant coercion
19917 // will work with the patterns in canonical form.
19918 bool Swapped = false;
19919 if (isa<ConstantSDNode>(BasePtr)) {
19920 std::swap(BasePtr, Offset);
19921 Swapped = true;
19922 }
19923
19924 // Don't create a indexed load / store with zero offset.
19926 return false;
19927
19928 // Try turning it into a pre-indexed load / store except when:
19929 // 1) The new base ptr is a frame index.
19930 // 2) If N is a store and the new base ptr is either the same as or is a
19931 // predecessor of the value being stored.
19932 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
19933 // that would create a cycle.
19934 // 4) All uses are load / store ops that use it as old base ptr.
19935
19936 // Check #1. Preinc'ing a frame index would require copying the stack pointer
19937 // (plus the implicit offset) to a register to preinc anyway.
19938 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19939 return false;
19940
19941 // Check #2.
19942 if (!IsLoad) {
19943 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
19944 : cast<StoreSDNode>(N)->getValue();
19945
19946 // Would require a copy.
19947 if (Val == BasePtr)
19948 return false;
19949
19950 // Would create a cycle.
19951 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
19952 return false;
19953 }
19954
19955 // Caches for hasPredecessorHelper.
19956 SmallPtrSet<const SDNode *, 32> Visited;
19958 Worklist.push_back(N);
19959
19960 // If the offset is a constant, there may be other adds of constants that
19961 // can be folded with this one. We should do this to avoid having to keep
19962 // a copy of the original base pointer.
19963 SmallVector<SDNode *, 16> OtherUses;
19966 for (SDUse &Use : BasePtr->uses()) {
19967 // Skip the use that is Ptr and uses of other results from BasePtr's
19968 // node (important for nodes that return multiple results).
19969 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
19970 continue;
19971
19972 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
19973 MaxSteps))
19974 continue;
19975
19976 if (Use.getUser()->getOpcode() != ISD::ADD &&
19977 Use.getUser()->getOpcode() != ISD::SUB) {
19978 OtherUses.clear();
19979 break;
19980 }
19981
19982 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
19983 if (!isa<ConstantSDNode>(Op1)) {
19984 OtherUses.clear();
19985 break;
19986 }
19987
19988 // FIXME: In some cases, we can be smarter about this.
19989 if (Op1.getValueType() != Offset.getValueType()) {
19990 OtherUses.clear();
19991 break;
19992 }
19993
19994 OtherUses.push_back(Use.getUser());
19995 }
19996
19997 if (Swapped)
19998 std::swap(BasePtr, Offset);
19999
20000 // Now check for #3 and #4.
20001 bool RealUse = false;
20002
20003 for (SDNode *User : Ptr->users()) {
20004 if (User == N)
20005 continue;
20006 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
20007 return false;
20008
20009 // If Ptr may be folded in addressing mode of other use, then it's
20010 // not profitable to do this transformation.
20011 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
20012 RealUse = true;
20013 }
20014
20015 if (!RealUse)
20016 return false;
20017
20019 if (!IsMasked) {
20020 if (IsLoad)
20021 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
20022 else
20023 Result =
20024 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
20025 } else {
20026 if (IsLoad)
20027 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
20028 Offset, AM);
20029 else
20030 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
20031 Offset, AM);
20032 }
20033 ++PreIndexedNodes;
20034 ++NodesCombined;
20035 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
20036 Result.dump(&DAG); dbgs() << '\n');
20037 WorklistRemover DeadNodes(*this);
20038 if (IsLoad) {
20039 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
20040 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
20041 } else {
20042 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
20043 }
20044
20045 // Finally, since the node is now dead, remove it from the graph.
20046 deleteAndRecombine(N);
20047
20048 if (Swapped)
20049 std::swap(BasePtr, Offset);
20050
20051 // Replace other uses of BasePtr that can be updated to use Ptr
20052 for (SDNode *OtherUse : OtherUses) {
20053 unsigned OffsetIdx = 1;
20054 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
20055 OffsetIdx = 0;
20056 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
20057 "Expected BasePtr operand");
20058
20059 // We need to replace ptr0 in the following expression:
20060 // x0 * offset0 + y0 * ptr0 = t0
20061 // knowing that
20062 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
20063 //
20064 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
20065 // indexed load/store and the expression that needs to be re-written.
20066 //
20067 // Therefore, we have:
20068 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
20069
20070 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
20071 const APInt &Offset0 = CN->getAPIntValue();
20072 const APInt &Offset1 = Offset->getAsAPIntVal();
20073 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
20074 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
20075 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
20076 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
20077
20078 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
20079
20080 APInt CNV = Offset0;
20081 if (X0 < 0) CNV = -CNV;
20082 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
20083 else CNV = CNV - Offset1;
20084
20085 SDLoc DL(OtherUse);
20086
20087 // We can now generate the new expression.
20088 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
20089 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
20090
20091 SDValue NewUse =
20092 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
20093 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
20094 deleteAndRecombine(OtherUse);
20095 }
20096
20097 // Replace the uses of Ptr with uses of the updated base value.
20098 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
20099 deleteAndRecombine(Ptr.getNode());
20100 AddToWorklist(Result.getNode());
20101
20102 return true;
20103}
20104
20105static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
20106 SDValue &BasePtr, SDValue &Offset,
20108 SelectionDAG &DAG,
20109 const TargetLowering &TLI) {
20110 if (PtrUse == N ||
20111 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
20112 return false;
20113
20114 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
20115 return false;
20116
20117 // Don't create a indexed load / store with zero offset.
20119 return false;
20120
20121 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
20122 return false;
20123
20126 for (SDNode *User : BasePtr->users()) {
20127 if (User == Ptr.getNode())
20128 continue;
20129
20130 // No if there's a later user which could perform the index instead.
20131 if (isa<MemSDNode>(User)) {
20132 bool IsLoad = true;
20133 bool IsMasked = false;
20134 SDValue OtherPtr;
20136 IsMasked, OtherPtr, TLI)) {
20138 Worklist.push_back(User);
20139 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
20140 return false;
20141 }
20142 }
20143
20144 // If all the uses are load / store addresses, then don't do the
20145 // transformation.
20146 if (User->getOpcode() == ISD::ADD || User->getOpcode() ==