LLVM 20.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/Metadata.h"
60#include "llvm/Support/Debug.h"
68#include <algorithm>
69#include <cassert>
70#include <cstdint>
71#include <functional>
72#include <iterator>
73#include <optional>
74#include <string>
75#include <tuple>
76#include <utility>
77#include <variant>
78
79#include "MatchContext.h"
80
81using namespace llvm;
82using namespace llvm::SDPatternMatch;
83
84#define DEBUG_TYPE "dagcombine"
85
86STATISTIC(NodesCombined , "Number of dag nodes combined");
87STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
88STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
89STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
90STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
91STATISTIC(SlicedLoads, "Number of load sliced");
92STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
93
94DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
95 "Controls whether a DAG combine is performed for a node");
96
97static cl::opt<bool>
98CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
99 cl::desc("Enable DAG combiner's use of IR alias analysis"));
100
101static cl::opt<bool>
102UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
103 cl::desc("Enable DAG combiner's use of TBAA"));
104
105#ifndef NDEBUG
107CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
108 cl::desc("Only use DAG-combiner alias analysis in this"
109 " function"));
110#endif
111
112/// Hidden option to stress test load slicing, i.e., when this option
113/// is enabled, load slicing bypasses most of its profitability guards.
114static cl::opt<bool>
115StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
116 cl::desc("Bypass the profitability model of load slicing"),
117 cl::init(false));
118
119static cl::opt<bool>
120 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
121 cl::desc("DAG combiner may split indexing from loads"));
122
123static cl::opt<bool>
124 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
125 cl::desc("DAG combiner enable merging multiple stores "
126 "into a wider store"));
127
129 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
130 cl::desc("Limit the number of operands to inline for Token Factors"));
131
133 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
134 cl::desc("Limit the number of times for the same StoreNode and RootNode "
135 "to bail out in store merging dependence check"));
136
138 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
139 cl::desc("DAG combiner enable reducing the width of load/op/store "
140 "sequence"));
142 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
143 cl::Hidden, cl::init(false),
144 cl::desc("DAG combiner force override the narrowing profitable check when "
145 "reducing the width of load/op/store sequences"));
146
148 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
149 cl::desc("DAG combiner enable load/<replace bytes>/store with "
150 "a narrower store"));
151
153 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
154 cl::desc(
155 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
156namespace {
157
158 class DAGCombiner {
159 SelectionDAG &DAG;
160 const TargetLowering &TLI;
161 const SelectionDAGTargetInfo *STI;
163 CodeGenOptLevel OptLevel;
164 bool LegalDAG = false;
165 bool LegalOperations = false;
166 bool LegalTypes = false;
167 bool ForCodeSize;
168 bool DisableGenericCombines;
169
170 /// Worklist of all of the nodes that need to be simplified.
171 ///
172 /// This must behave as a stack -- new nodes to process are pushed onto the
173 /// back and when processing we pop off of the back.
174 ///
175 /// The worklist will not contain duplicates but may contain null entries
176 /// due to nodes being deleted from the underlying DAG. For fast lookup and
177 /// deduplication, the index of the node in this vector is stored in the
178 /// node in SDNode::CombinerWorklistIndex.
180
181 /// This records all nodes attempted to be added to the worklist since we
182 /// considered a new worklist entry. As we keep do not add duplicate nodes
183 /// in the worklist, this is different from the tail of the worklist.
185
186 /// Map from candidate StoreNode to the pair of RootNode and count.
187 /// The count is used to track how many times we have seen the StoreNode
188 /// with the same RootNode bail out in dependence check. If we have seen
189 /// the bail out for the same pair many times over a limit, we won't
190 /// consider the StoreNode with the same RootNode as store merging
191 /// candidate again.
193
194 // AA - Used for DAG load/store alias analysis.
195 AliasAnalysis *AA;
196
197 /// This caches all chains that have already been processed in
198 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
199 /// stores candidates.
200 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
201
202 /// When an instruction is simplified, add all users of the instruction to
203 /// the work lists because they might get more simplified now.
204 void AddUsersToWorklist(SDNode *N) {
205 for (SDNode *Node : N->users())
206 AddToWorklist(Node);
207 }
208
209 /// Convenient shorthand to add a node and all of its user to the worklist.
210 void AddToWorklistWithUsers(SDNode *N) {
211 AddUsersToWorklist(N);
212 AddToWorklist(N);
213 }
214
215 // Prune potentially dangling nodes. This is called after
216 // any visit to a node, but should also be called during a visit after any
217 // failed combine which may have created a DAG node.
218 void clearAddedDanglingWorklistEntries() {
219 // Check any nodes added to the worklist to see if they are prunable.
220 while (!PruningList.empty()) {
221 auto *N = PruningList.pop_back_val();
222 if (N->use_empty())
223 recursivelyDeleteUnusedNodes(N);
224 }
225 }
226
227 SDNode *getNextWorklistEntry() {
228 // Before we do any work, remove nodes that are not in use.
229 clearAddedDanglingWorklistEntries();
230 SDNode *N = nullptr;
231 // The Worklist holds the SDNodes in order, but it may contain null
232 // entries.
233 while (!N && !Worklist.empty()) {
234 N = Worklist.pop_back_val();
235 }
236
237 if (N) {
238 assert(N->getCombinerWorklistIndex() >= 0 &&
239 "Found a worklist entry without a corresponding map entry!");
240 // Set to -2 to indicate that we combined the node.
241 N->setCombinerWorklistIndex(-2);
242 }
243 return N;
244 }
245
246 /// Call the node-specific routine that folds each particular type of node.
248
249 public:
250 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
251 : DAG(D), TLI(D.getTargetLoweringInfo()),
252 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
253 ForCodeSize = DAG.shouldOptForSize();
254 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
255
256 MaximumLegalStoreInBits = 0;
257 // We use the minimum store size here, since that's all we can guarantee
258 // for the scalable vector types.
259 for (MVT VT : MVT::all_valuetypes())
260 if (EVT(VT).isSimple() && VT != MVT::Other &&
261 TLI.isTypeLegal(EVT(VT)) &&
262 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
263 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
264 }
265
266 void ConsiderForPruning(SDNode *N) {
267 // Mark this for potential pruning.
268 PruningList.insert(N);
269 }
270
271 /// Add to the worklist making sure its instance is at the back (next to be
272 /// processed.)
273 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
274 bool SkipIfCombinedBefore = false) {
275 assert(N->getOpcode() != ISD::DELETED_NODE &&
276 "Deleted Node added to Worklist");
277
278 // Skip handle nodes as they can't usefully be combined and confuse the
279 // zero-use deletion strategy.
280 if (N->getOpcode() == ISD::HANDLENODE)
281 return;
282
283 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
284 return;
285
286 if (IsCandidateForPruning)
287 ConsiderForPruning(N);
288
289 if (N->getCombinerWorklistIndex() < 0) {
290 N->setCombinerWorklistIndex(Worklist.size());
291 Worklist.push_back(N);
292 }
293 }
294
295 /// Remove all instances of N from the worklist.
296 void removeFromWorklist(SDNode *N) {
297 PruningList.remove(N);
298 StoreRootCountMap.erase(N);
299
300 int WorklistIndex = N->getCombinerWorklistIndex();
301 // If not in the worklist, the index might be -1 or -2 (was combined
302 // before). As the node gets deleted anyway, there's no need to update
303 // the index.
304 if (WorklistIndex < 0)
305 return; // Not in the worklist.
306
307 // Null out the entry rather than erasing it to avoid a linear operation.
308 Worklist[WorklistIndex] = nullptr;
309 N->setCombinerWorklistIndex(-1);
310 }
311
312 void deleteAndRecombine(SDNode *N);
313 bool recursivelyDeleteUnusedNodes(SDNode *N);
314
315 /// Replaces all uses of the results of one DAG node with new values.
316 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
317 bool AddTo = true);
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
321 return CombineTo(N, &Res, 1, AddTo);
322 }
323
324 /// Replaces all uses of the results of one DAG node with new values.
325 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
326 bool AddTo = true) {
327 SDValue To[] = { Res0, Res1 };
328 return CombineTo(N, To, 2, AddTo);
329 }
330
331 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
332
333 private:
334 unsigned MaximumLegalStoreInBits;
335
336 /// Check the specified integer node value to see if it can be simplified or
337 /// if things it uses can be simplified by bit propagation.
338 /// If so, return true.
339 bool SimplifyDemandedBits(SDValue Op) {
340 unsigned BitWidth = Op.getScalarValueSizeInBits();
342 return SimplifyDemandedBits(Op, DemandedBits);
343 }
344
345 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
346 EVT VT = Op.getValueType();
347 APInt DemandedElts = VT.isFixedLengthVector()
349 : APInt(1, 1);
350 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
351 }
352
353 /// Check the specified vector node value to see if it can be simplified or
354 /// if things it uses can be simplified as it only uses some of the
355 /// elements. If so, return true.
356 bool SimplifyDemandedVectorElts(SDValue Op) {
357 // TODO: For now just pretend it cannot be simplified.
358 if (Op.getValueType().isScalableVector())
359 return false;
360
361 unsigned NumElts = Op.getValueType().getVectorNumElements();
362 APInt DemandedElts = APInt::getAllOnes(NumElts);
363 return SimplifyDemandedVectorElts(Op, DemandedElts);
364 }
365
366 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
367 const APInt &DemandedElts,
368 bool AssumeSingleUse = false);
369 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
370 bool AssumeSingleUse = false);
371
372 bool CombineToPreIndexedLoadStore(SDNode *N);
373 bool CombineToPostIndexedLoadStore(SDNode *N);
374 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
375 bool SliceUpLoad(SDNode *N);
376
377 // Looks up the chain to find a unique (unaliased) store feeding the passed
378 // load. If no such store is found, returns a nullptr.
379 // Note: This will look past a CALLSEQ_START if the load is chained to it so
380 // so that it can find stack stores for byval params.
381 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
382 // Scalars have size 0 to distinguish from singleton vectors.
383 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
384 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
385 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
386
387 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
388 /// load.
389 ///
390 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
391 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
392 /// \param EltNo index of the vector element to load.
393 /// \param OriginalLoad load that EVE came from to be replaced.
394 /// \returns EVE on success SDValue() on failure.
395 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
396 SDValue EltNo,
397 LoadSDNode *OriginalLoad);
398 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
399 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
400 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
401 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
402 SDValue PromoteIntBinOp(SDValue Op);
403 SDValue PromoteIntShiftOp(SDValue Op);
404 SDValue PromoteExtend(SDValue Op);
405 bool PromoteLoad(SDValue Op);
406
407 SDValue foldShiftToAvg(SDNode *N);
408
409 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
410 SDValue RHS, SDValue True, SDValue False,
412
413 /// Call the node-specific routine that knows how to fold each
414 /// particular type of node. If that doesn't do anything, try the
415 /// target-specific DAG combines.
416 SDValue combine(SDNode *N);
417
418 // Visitation implementation - Implement dag node combining for different
419 // node types. The semantics are as follows:
420 // Return Value:
421 // SDValue.getNode() == 0 - No change was made
422 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
423 // otherwise - N should be replaced by the returned Operand.
424 //
425 SDValue visitTokenFactor(SDNode *N);
426 SDValue visitMERGE_VALUES(SDNode *N);
427 SDValue visitADD(SDNode *N);
428 SDValue visitADDLike(SDNode *N);
429 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
430 SDValue visitSUB(SDNode *N);
431 SDValue visitADDSAT(SDNode *N);
432 SDValue visitSUBSAT(SDNode *N);
433 SDValue visitADDC(SDNode *N);
434 SDValue visitADDO(SDNode *N);
435 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitSUBC(SDNode *N);
437 SDValue visitSUBO(SDNode *N);
438 SDValue visitADDE(SDNode *N);
439 SDValue visitUADDO_CARRY(SDNode *N);
440 SDValue visitSADDO_CARRY(SDNode *N);
441 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
442 SDNode *N);
443 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
444 SDNode *N);
445 SDValue visitSUBE(SDNode *N);
446 SDValue visitUSUBO_CARRY(SDNode *N);
447 SDValue visitSSUBO_CARRY(SDNode *N);
448 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
449 SDValue visitMULFIX(SDNode *N);
450 SDValue useDivRem(SDNode *N);
451 SDValue visitSDIV(SDNode *N);
452 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
453 SDValue visitUDIV(SDNode *N);
454 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
455 SDValue visitREM(SDNode *N);
456 SDValue visitMULHU(SDNode *N);
457 SDValue visitMULHS(SDNode *N);
458 SDValue visitAVG(SDNode *N);
459 SDValue visitABD(SDNode *N);
460 SDValue visitSMUL_LOHI(SDNode *N);
461 SDValue visitUMUL_LOHI(SDNode *N);
462 SDValue visitMULO(SDNode *N);
463 SDValue visitIMINMAX(SDNode *N);
464 SDValue visitAND(SDNode *N);
465 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
466 SDValue visitOR(SDNode *N);
467 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
468 SDValue visitXOR(SDNode *N);
469 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
470 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
471 SDValue visitSHL(SDNode *N);
472 SDValue visitSRA(SDNode *N);
473 SDValue visitSRL(SDNode *N);
474 SDValue visitFunnelShift(SDNode *N);
475 SDValue visitSHLSAT(SDNode *N);
476 SDValue visitRotate(SDNode *N);
477 SDValue visitABS(SDNode *N);
478 SDValue visitBSWAP(SDNode *N);
479 SDValue visitBITREVERSE(SDNode *N);
480 SDValue visitCTLZ(SDNode *N);
481 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
482 SDValue visitCTTZ(SDNode *N);
483 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
484 SDValue visitCTPOP(SDNode *N);
485 SDValue visitSELECT(SDNode *N);
486 SDValue visitVSELECT(SDNode *N);
487 SDValue visitVP_SELECT(SDNode *N);
488 SDValue visitSELECT_CC(SDNode *N);
489 SDValue visitSETCC(SDNode *N);
490 SDValue visitSETCCCARRY(SDNode *N);
491 SDValue visitSIGN_EXTEND(SDNode *N);
492 SDValue visitZERO_EXTEND(SDNode *N);
493 SDValue visitANY_EXTEND(SDNode *N);
494 SDValue visitAssertExt(SDNode *N);
495 SDValue visitAssertAlign(SDNode *N);
496 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
497 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
498 SDValue visitTRUNCATE(SDNode *N);
499 SDValue visitTRUNCATE_USAT_U(SDNode *N);
500 SDValue visitBITCAST(SDNode *N);
501 SDValue visitFREEZE(SDNode *N);
502 SDValue visitBUILD_PAIR(SDNode *N);
503 SDValue visitFADD(SDNode *N);
504 SDValue visitVP_FADD(SDNode *N);
505 SDValue visitVP_FSUB(SDNode *N);
506 SDValue visitSTRICT_FADD(SDNode *N);
507 SDValue visitFSUB(SDNode *N);
508 SDValue visitFMUL(SDNode *N);
509 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
510 SDValue visitFMAD(SDNode *N);
511 SDValue visitFDIV(SDNode *N);
512 SDValue visitFREM(SDNode *N);
513 SDValue visitFSQRT(SDNode *N);
514 SDValue visitFCOPYSIGN(SDNode *N);
515 SDValue visitFPOW(SDNode *N);
516 SDValue visitFCANONICALIZE(SDNode *N);
517 SDValue visitSINT_TO_FP(SDNode *N);
518 SDValue visitUINT_TO_FP(SDNode *N);
519 SDValue visitFP_TO_SINT(SDNode *N);
520 SDValue visitFP_TO_UINT(SDNode *N);
521 SDValue visitXROUND(SDNode *N);
522 SDValue visitFP_ROUND(SDNode *N);
523 SDValue visitFP_EXTEND(SDNode *N);
524 SDValue visitFNEG(SDNode *N);
525 SDValue visitFABS(SDNode *N);
526 SDValue visitFCEIL(SDNode *N);
527 SDValue visitFTRUNC(SDNode *N);
528 SDValue visitFFREXP(SDNode *N);
529 SDValue visitFFLOOR(SDNode *N);
530 SDValue visitFMinMax(SDNode *N);
531 SDValue visitBRCOND(SDNode *N);
532 SDValue visitBR_CC(SDNode *N);
533 SDValue visitLOAD(SDNode *N);
534
535 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
536 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
537 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
538
539 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
540
541 SDValue visitSTORE(SDNode *N);
542 SDValue visitATOMIC_STORE(SDNode *N);
543 SDValue visitLIFETIME_END(SDNode *N);
544 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
545 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
546 SDValue visitBUILD_VECTOR(SDNode *N);
547 SDValue visitCONCAT_VECTORS(SDNode *N);
548 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
549 SDValue visitVECTOR_SHUFFLE(SDNode *N);
550 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
551 SDValue visitINSERT_SUBVECTOR(SDNode *N);
552 SDValue visitVECTOR_COMPRESS(SDNode *N);
553 SDValue visitMLOAD(SDNode *N);
554 SDValue visitMSTORE(SDNode *N);
555 SDValue visitMGATHER(SDNode *N);
556 SDValue visitMSCATTER(SDNode *N);
557 SDValue visitMHISTOGRAM(SDNode *N);
558 SDValue visitVPGATHER(SDNode *N);
559 SDValue visitVPSCATTER(SDNode *N);
560 SDValue visitVP_STRIDED_LOAD(SDNode *N);
561 SDValue visitVP_STRIDED_STORE(SDNode *N);
562 SDValue visitFP_TO_FP16(SDNode *N);
563 SDValue visitFP16_TO_FP(SDNode *N);
564 SDValue visitFP_TO_BF16(SDNode *N);
565 SDValue visitBF16_TO_FP(SDNode *N);
566 SDValue visitVECREDUCE(SDNode *N);
567 SDValue visitVPOp(SDNode *N);
568 SDValue visitGET_FPENV_MEM(SDNode *N);
569 SDValue visitSET_FPENV_MEM(SDNode *N);
570
571 template <class MatchContextClass>
572 SDValue visitFADDForFMACombine(SDNode *N);
573 template <class MatchContextClass>
574 SDValue visitFSUBForFMACombine(SDNode *N);
575 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
576
577 SDValue XformToShuffleWithZero(SDNode *N);
578 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
579 const SDLoc &DL,
580 SDNode *N,
581 SDValue N0,
582 SDValue N1);
583 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
584 SDValue N1, SDNodeFlags Flags);
585 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
586 SDValue N1, SDNodeFlags Flags);
587 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
588 EVT VT, SDValue N0, SDValue N1,
589 SDNodeFlags Flags = SDNodeFlags());
590
591 SDValue visitShiftByConstant(SDNode *N);
592
593 SDValue foldSelectOfConstants(SDNode *N);
594 SDValue foldVSelectOfConstants(SDNode *N);
595 SDValue foldBinOpIntoSelect(SDNode *BO);
596 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
597 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
598 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
599 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
601 bool NotExtCompare = false);
602 SDValue convertSelectOfFPConstantsToLoadOffset(
603 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
605 SDValue foldSignChangeInBitcast(SDNode *N);
606 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
608 SDValue foldSelectOfBinops(SDNode *N);
609 SDValue foldSextSetcc(SDNode *N);
610 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
611 const SDLoc &DL);
612 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
613 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
614 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
615 SDValue False, ISD::CondCode CC, const SDLoc &DL);
616 SDValue unfoldMaskedMerge(SDNode *N);
617 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
618 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
619 const SDLoc &DL, bool foldBooleans);
620 SDValue rebuildSetCC(SDValue N);
621
622 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
623 SDValue &CC, bool MatchStrict = false) const;
624 bool isOneUseSetCC(SDValue N) const;
625
626 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
627 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
628
629 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
630 unsigned HiOp);
631 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
632 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
633 const TargetLowering &TLI);
634
635 SDValue CombineExtLoad(SDNode *N);
636 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
637 SDValue combineRepeatedFPDivisors(SDNode *N);
638 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
639 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
640 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
641 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
642 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
643 SDValue BuildSDIV(SDNode *N);
644 SDValue BuildSDIVPow2(SDNode *N);
645 SDValue BuildUDIV(SDNode *N);
646 SDValue BuildSREMPow2(SDNode *N);
647 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
648 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
649 bool KnownNeverZero = false,
650 bool InexpensiveOnly = false,
651 std::optional<EVT> OutVT = std::nullopt);
652 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
653 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
654 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
655 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
656 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
657 SDNodeFlags Flags, bool Reciprocal);
658 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
659 SDNodeFlags Flags, bool Reciprocal);
660 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
661 bool DemandHighBits = true);
662 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
663 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
664 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
665 unsigned PosOpcode, unsigned NegOpcode,
666 const SDLoc &DL);
667 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
668 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
669 unsigned PosOpcode, unsigned NegOpcode,
670 const SDLoc &DL);
671 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
672 SDValue MatchLoadCombine(SDNode *N);
673 SDValue mergeTruncStores(StoreSDNode *N);
674 SDValue reduceLoadWidth(SDNode *N);
675 SDValue ReduceLoadOpStoreWidth(SDNode *N);
677 SDValue TransformFPLoadStorePair(SDNode *N);
678 SDValue convertBuildVecZextToZext(SDNode *N);
679 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
680 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
681 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
682 SDValue reduceBuildVecToShuffle(SDNode *N);
683 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
684 ArrayRef<int> VectorMask, SDValue VecIn1,
685 SDValue VecIn2, unsigned LeftIdx,
686 bool DidSplitVec);
687 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
688
689 /// Walk up chain skipping non-aliasing memory nodes,
690 /// looking for aliasing nodes and adding them to the Aliases vector.
691 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
692 SmallVectorImpl<SDValue> &Aliases);
693
694 /// Return true if there is any possibility that the two addresses overlap.
695 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
696
697 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
698 /// chain (aliasing node.)
699 SDValue FindBetterChain(SDNode *N, SDValue Chain);
700
701 /// Try to replace a store and any possibly adjacent stores on
702 /// consecutive chains with better chains. Return true only if St is
703 /// replaced.
704 ///
705 /// Notice that other chains may still be replaced even if the function
706 /// returns false.
707 bool findBetterNeighborChains(StoreSDNode *St);
708
709 // Helper for findBetterNeighborChains. Walk up store chain add additional
710 // chained stores that do not overlap and can be parallelized.
711 bool parallelizeChainedStores(StoreSDNode *St);
712
713 /// Holds a pointer to an LSBaseSDNode as well as information on where it
714 /// is located in a sequence of memory operations connected by a chain.
715 struct MemOpLink {
716 // Ptr to the mem node.
717 LSBaseSDNode *MemNode;
718
719 // Offset from the base ptr.
720 int64_t OffsetFromBase;
721
722 MemOpLink(LSBaseSDNode *N, int64_t Offset)
723 : MemNode(N), OffsetFromBase(Offset) {}
724 };
725
726 // Classify the origin of a stored value.
727 enum class StoreSource { Unknown, Constant, Extract, Load };
728 StoreSource getStoreSource(SDValue StoreVal) {
729 switch (StoreVal.getOpcode()) {
730 case ISD::Constant:
731 case ISD::ConstantFP:
732 return StoreSource::Constant;
736 return StoreSource::Constant;
737 return StoreSource::Unknown;
740 return StoreSource::Extract;
741 case ISD::LOAD:
742 return StoreSource::Load;
743 default:
744 return StoreSource::Unknown;
745 }
746 }
747
748 /// This is a helper function for visitMUL to check the profitability
749 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
750 /// MulNode is the original multiply, AddNode is (add x, c1),
751 /// and ConstNode is c2.
752 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
753 SDValue ConstNode);
754
755 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
756 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
757 /// the type of the loaded value to be extended.
758 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
759 EVT LoadResultTy, EVT &ExtVT);
760
761 /// Helper function to calculate whether the given Load/Store can have its
762 /// width reduced to ExtVT.
763 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
764 EVT &MemVT, unsigned ShAmt = 0);
765
766 /// Used by BackwardsPropagateMask to find suitable loads.
767 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
768 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
769 ConstantSDNode *Mask, SDNode *&NodeToMask);
770 /// Attempt to propagate a given AND node back to load leaves so that they
771 /// can be combined into narrow loads.
772 bool BackwardsPropagateMask(SDNode *N);
773
774 /// Helper function for mergeConsecutiveStores which merges the component
775 /// store chains.
776 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
777 unsigned NumStores);
778
779 /// Helper function for mergeConsecutiveStores which checks if all the store
780 /// nodes have the same underlying object. We can still reuse the first
781 /// store's pointer info if all the stores are from the same object.
782 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
783
784 /// This is a helper function for mergeConsecutiveStores. When the source
785 /// elements of the consecutive stores are all constants or all extracted
786 /// vector elements, try to merge them into one larger store introducing
787 /// bitcasts if necessary. \return True if a merged store was created.
788 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
789 EVT MemVT, unsigned NumStores,
790 bool IsConstantSrc, bool UseVector,
791 bool UseTrunc);
792
793 /// This is a helper function for mergeConsecutiveStores. Stores that
794 /// potentially may be merged with St are placed in StoreNodes. On success,
795 /// returns a chain predecessor to all store candidates.
796 SDNode *getStoreMergeCandidates(StoreSDNode *St,
797 SmallVectorImpl<MemOpLink> &StoreNodes);
798
799 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
800 /// have indirect dependency through their operands. RootNode is the
801 /// predecessor to all stores calculated by getStoreMergeCandidates and is
802 /// used to prune the dependency check. \return True if safe to merge.
803 bool checkMergeStoreCandidatesForDependencies(
804 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
805 SDNode *RootNode);
806
807 /// This is a helper function for mergeConsecutiveStores. Given a list of
808 /// store candidates, find the first N that are consecutive in memory.
809 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
810 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
811 int64_t ElementSizeBytes) const;
812
813 /// This is a helper function for mergeConsecutiveStores. It is used for
814 /// store chains that are composed entirely of constant values.
815 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
816 unsigned NumConsecutiveStores,
817 EVT MemVT, SDNode *Root, bool AllowVectors);
818
819 /// This is a helper function for mergeConsecutiveStores. It is used for
820 /// store chains that are composed entirely of extracted vector elements.
821 /// When extracting multiple vector elements, try to store them in one
822 /// vector store rather than a sequence of scalar stores.
823 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
824 unsigned NumConsecutiveStores, EVT MemVT,
825 SDNode *Root);
826
827 /// This is a helper function for mergeConsecutiveStores. It is used for
828 /// store chains that are composed entirely of loaded values.
829 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
830 unsigned NumConsecutiveStores, EVT MemVT,
831 SDNode *Root, bool AllowVectors,
832 bool IsNonTemporalStore, bool IsNonTemporalLoad);
833
834 /// Merge consecutive store operations into a wide store.
835 /// This optimization uses wide integers or vectors when possible.
836 /// \return true if stores were merged.
837 bool mergeConsecutiveStores(StoreSDNode *St);
838
839 /// Try to transform a truncation where C is a constant:
840 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
841 ///
842 /// \p N needs to be a truncation and its first operand an AND. Other
843 /// requirements are checked by the function (e.g. that trunc is
844 /// single-use) and if missed an empty SDValue is returned.
845 SDValue distributeTruncateThroughAnd(SDNode *N);
846
847 /// Helper function to determine whether the target supports operation
848 /// given by \p Opcode for type \p VT, that is, whether the operation
849 /// is legal or custom before legalizing operations, and whether is
850 /// legal (but not custom) after legalization.
851 bool hasOperation(unsigned Opcode, EVT VT) {
852 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
853 }
854
855 public:
856 /// Runs the dag combiner on all nodes in the work list
857 void Run(CombineLevel AtLevel);
858
859 SelectionDAG &getDAG() const { return DAG; }
860
861 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
862 EVT getShiftAmountTy(EVT LHSTy) {
863 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
864 }
865
866 /// This method returns true if we are running before type legalization or
867 /// if the specified VT is legal.
868 bool isTypeLegal(const EVT &VT) {
869 if (!LegalTypes) return true;
870 return TLI.isTypeLegal(VT);
871 }
872
873 /// Convenience wrapper around TargetLowering::getSetCCResultType
874 EVT getSetCCResultType(EVT VT) const {
875 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
876 }
877
878 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
879 SDValue OrigLoad, SDValue ExtLoad,
880 ISD::NodeType ExtType);
881 };
882
883/// This class is a DAGUpdateListener that removes any deleted
884/// nodes from the worklist.
885class WorklistRemover : public SelectionDAG::DAGUpdateListener {
886 DAGCombiner &DC;
887
888public:
889 explicit WorklistRemover(DAGCombiner &dc)
890 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
891
892 void NodeDeleted(SDNode *N, SDNode *E) override {
893 DC.removeFromWorklist(N);
894 }
895};
896
897class WorklistInserter : public SelectionDAG::DAGUpdateListener {
898 DAGCombiner &DC;
899
900public:
901 explicit WorklistInserter(DAGCombiner &dc)
902 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
903
904 // FIXME: Ideally we could add N to the worklist, but this causes exponential
905 // compile time costs in large DAGs, e.g. Halide.
906 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
907};
908
909} // end anonymous namespace
910
911//===----------------------------------------------------------------------===//
912// TargetLowering::DAGCombinerInfo implementation
913//===----------------------------------------------------------------------===//
914
916 ((DAGCombiner*)DC)->AddToWorklist(N);
917}
918
920CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
921 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
922}
923
925CombineTo(SDNode *N, SDValue Res, bool AddTo) {
926 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
927}
928
930CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
931 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
932}
933
936 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
937}
938
941 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
942}
943
944//===----------------------------------------------------------------------===//
945// Helper Functions
946//===----------------------------------------------------------------------===//
947
948void DAGCombiner::deleteAndRecombine(SDNode *N) {
949 removeFromWorklist(N);
950
951 // If the operands of this node are only used by the node, they will now be
952 // dead. Make sure to re-visit them and recursively delete dead nodes.
953 for (const SDValue &Op : N->ops())
954 // For an operand generating multiple values, one of the values may
955 // become dead allowing further simplification (e.g. split index
956 // arithmetic from an indexed load).
957 if (Op->hasOneUse() || Op->getNumValues() > 1)
958 AddToWorklist(Op.getNode());
959
960 DAG.DeleteNode(N);
961}
962
963// APInts must be the same size for most operations, this helper
964// function zero extends the shorter of the pair so that they match.
965// We provide an Offset so that we can create bitwidths that won't overflow.
966static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
967 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
968 LHS = LHS.zext(Bits);
969 RHS = RHS.zext(Bits);
970}
971
972// Return true if this node is a setcc, or is a select_cc
973// that selects between the target values used for true and false, making it
974// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
975// the appropriate nodes based on the type of node we are checking. This
976// simplifies life a bit for the callers.
977bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
978 SDValue &CC, bool MatchStrict) const {
979 if (N.getOpcode() == ISD::SETCC) {
980 LHS = N.getOperand(0);
981 RHS = N.getOperand(1);
982 CC = N.getOperand(2);
983 return true;
984 }
985
986 if (MatchStrict &&
987 (N.getOpcode() == ISD::STRICT_FSETCC ||
988 N.getOpcode() == ISD::STRICT_FSETCCS)) {
989 LHS = N.getOperand(1);
990 RHS = N.getOperand(2);
991 CC = N.getOperand(3);
992 return true;
993 }
994
995 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
996 !TLI.isConstFalseVal(N.getOperand(3)))
997 return false;
998
999 if (TLI.getBooleanContents(N.getValueType()) ==
1001 return false;
1002
1003 LHS = N.getOperand(0);
1004 RHS = N.getOperand(1);
1005 CC = N.getOperand(4);
1006 return true;
1007}
1008
1009/// Return true if this is a SetCC-equivalent operation with only one use.
1010/// If this is true, it allows the users to invert the operation for free when
1011/// it is profitable to do so.
1012bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1013 SDValue N0, N1, N2;
1014 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1015 return true;
1016 return false;
1017}
1018
1020 if (!ScalarTy.isSimple())
1021 return false;
1022
1023 uint64_t MaskForTy = 0ULL;
1024 switch (ScalarTy.getSimpleVT().SimpleTy) {
1025 case MVT::i8:
1026 MaskForTy = 0xFFULL;
1027 break;
1028 case MVT::i16:
1029 MaskForTy = 0xFFFFULL;
1030 break;
1031 case MVT::i32:
1032 MaskForTy = 0xFFFFFFFFULL;
1033 break;
1034 default:
1035 return false;
1036 break;
1037 }
1038
1039 APInt Val;
1040 if (ISD::isConstantSplatVector(N, Val))
1041 return Val.getLimitedValue() == MaskForTy;
1042
1043 return false;
1044}
1045
1046// Determines if it is a constant integer or a splat/build vector of constant
1047// integers (and undefs).
1048// Do not permit build vector implicit truncation.
1049static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1050 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1051 return !(Const->isOpaque() && NoOpaques);
1052 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1053 return false;
1054 unsigned BitWidth = N.getScalarValueSizeInBits();
1055 for (const SDValue &Op : N->op_values()) {
1056 if (Op.isUndef())
1057 continue;
1058 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1059 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1060 (Const->isOpaque() && NoOpaques))
1061 return false;
1062 }
1063 return true;
1064}
1065
1066// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1067// undef's.
1068static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1069 if (V.getOpcode() != ISD::BUILD_VECTOR)
1070 return false;
1071 return isConstantOrConstantVector(V, NoOpaques) ||
1073}
1074
1075// Determine if this an indexed load with an opaque target constant index.
1076static bool canSplitIdx(LoadSDNode *LD) {
1077 return MaySplitLoadIndex &&
1078 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1079 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1080}
1081
1082bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1083 const SDLoc &DL,
1084 SDNode *N,
1085 SDValue N0,
1086 SDValue N1) {
1087 // Currently this only tries to ensure we don't undo the GEP splits done by
1088 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1089 // we check if the following transformation would be problematic:
1090 // (load/store (add, (add, x, offset1), offset2)) ->
1091 // (load/store (add, x, offset1+offset2)).
1092
1093 // (load/store (add, (add, x, y), offset2)) ->
1094 // (load/store (add, (add, x, offset2), y)).
1095
1096 if (N0.getOpcode() != ISD::ADD)
1097 return false;
1098
1099 // Check for vscale addressing modes.
1100 // (load/store (add/sub (add x, y), vscale))
1101 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1102 // (load/store (add/sub (add x, y), (mul vscale, C)))
1103 if ((N1.getOpcode() == ISD::VSCALE ||
1104 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1105 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1106 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1107 N1.getValueType().getFixedSizeInBits() <= 64) {
1108 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1109 ? N1.getConstantOperandVal(0)
1110 : (N1.getOperand(0).getConstantOperandVal(0) *
1111 (N1.getOpcode() == ISD::SHL
1112 ? (1LL << N1.getConstantOperandVal(1))
1113 : N1.getConstantOperandVal(1)));
1114 if (Opc == ISD::SUB)
1115 ScalableOffset = -ScalableOffset;
1116 if (all_of(N->users(), [&](SDNode *Node) {
1117 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1118 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1120 AM.HasBaseReg = true;
1121 AM.ScalableOffset = ScalableOffset;
1122 EVT VT = LoadStore->getMemoryVT();
1123 unsigned AS = LoadStore->getAddressSpace();
1124 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1125 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1126 AS);
1127 }
1128 return false;
1129 }))
1130 return true;
1131 }
1132
1133 if (Opc != ISD::ADD)
1134 return false;
1135
1136 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1137 if (!C2)
1138 return false;
1139
1140 const APInt &C2APIntVal = C2->getAPIntValue();
1141 if (C2APIntVal.getSignificantBits() > 64)
1142 return false;
1143
1144 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1145 if (N0.hasOneUse())
1146 return false;
1147
1148 const APInt &C1APIntVal = C1->getAPIntValue();
1149 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1150 if (CombinedValueIntVal.getSignificantBits() > 64)
1151 return false;
1152 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1153
1154 for (SDNode *Node : N->users()) {
1155 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1156 // Is x[offset2] already not a legal addressing mode? If so then
1157 // reassociating the constants breaks nothing (we test offset2 because
1158 // that's the one we hope to fold into the load or store).
1160 AM.HasBaseReg = true;
1161 AM.BaseOffs = C2APIntVal.getSExtValue();
1162 EVT VT = LoadStore->getMemoryVT();
1163 unsigned AS = LoadStore->getAddressSpace();
1164 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1165 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1166 continue;
1167
1168 // Would x[offset1+offset2] still be a legal addressing mode?
1169 AM.BaseOffs = CombinedValue;
1170 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1171 return true;
1172 }
1173 }
1174 } else {
1175 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1176 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1177 return false;
1178
1179 for (SDNode *Node : N->users()) {
1180 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1181 if (!LoadStore)
1182 return false;
1183
1184 // Is x[offset2] a legal addressing mode? If so then
1185 // reassociating the constants breaks address pattern
1187 AM.HasBaseReg = true;
1188 AM.BaseOffs = C2APIntVal.getSExtValue();
1189 EVT VT = LoadStore->getMemoryVT();
1190 unsigned AS = LoadStore->getAddressSpace();
1191 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1192 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1193 return false;
1194 }
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1201/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1202/// \p N0 is the same kind of operation as \p Opc.
1203SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1204 SDValue N0, SDValue N1,
1205 SDNodeFlags Flags) {
1206 EVT VT = N0.getValueType();
1207
1208 if (N0.getOpcode() != Opc)
1209 return SDValue();
1210
1211 SDValue N00 = N0.getOperand(0);
1212 SDValue N01 = N0.getOperand(1);
1213
1215 SDNodeFlags NewFlags;
1216 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1217 Flags.hasNoUnsignedWrap())
1218 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1219
1221 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1222 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1223 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1224 N0->getFlags().hasDisjoint());
1225 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1226 }
1227 return SDValue();
1228 }
1229 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1230 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1231 // iff (op x, c1) has one use
1232 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1233 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1234 }
1235 }
1236
1237 // Check for repeated operand logic simplifications.
1238 if (Opc == ISD::AND || Opc == ISD::OR) {
1239 // (N00 & N01) & N00 --> N00 & N01
1240 // (N00 & N01) & N01 --> N00 & N01
1241 // (N00 | N01) | N00 --> N00 | N01
1242 // (N00 | N01) | N01 --> N00 | N01
1243 if (N1 == N00 || N1 == N01)
1244 return N0;
1245 }
1246 if (Opc == ISD::XOR) {
1247 // (N00 ^ N01) ^ N00 --> N01
1248 if (N1 == N00)
1249 return N01;
1250 // (N00 ^ N01) ^ N01 --> N00
1251 if (N1 == N01)
1252 return N00;
1253 }
1254
1255 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1256 if (N1 != N01) {
1257 // Reassociate if (op N00, N1) already exist
1258 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1259 // if Op (Op N00, N1), N01 already exist
1260 // we need to stop reassciate to avoid dead loop
1261 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1262 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1263 }
1264 }
1265
1266 if (N1 != N00) {
1267 // Reassociate if (op N01, N1) already exist
1268 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1269 // if Op (Op N01, N1), N00 already exist
1270 // we need to stop reassciate to avoid dead loop
1271 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1272 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1273 }
1274 }
1275
1276 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1277 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1278 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1279 // comparisons with the same predicate. This enables optimizations as the
1280 // following one:
1281 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1282 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1283 if (Opc == ISD::AND || Opc == ISD::OR) {
1284 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1285 N01->getOpcode() == ISD::SETCC) {
1286 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1287 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1288 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1289 if (CC1 == CC00 && CC1 != CC01) {
1290 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1291 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1292 }
1293 if (CC1 == CC01 && CC1 != CC00) {
1294 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1295 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1296 }
1297 }
1298 }
1299 }
1300
1301 return SDValue();
1302}
1303
1304/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1305/// same kind of operation as \p Opc.
1306SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1307 SDValue N1, SDNodeFlags Flags) {
1308 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1309
1310 // Floating-point reassociation is not allowed without loose FP math.
1311 if (N0.getValueType().isFloatingPoint() ||
1313 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1314 return SDValue();
1315
1316 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1317 return Combined;
1318 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1319 return Combined;
1320 return SDValue();
1321}
1322
1323// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1324// Note that we only expect Flags to be passed from FP operations. For integer
1325// operations they need to be dropped.
1326SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1327 const SDLoc &DL, EVT VT, SDValue N0,
1328 SDValue N1, SDNodeFlags Flags) {
1329 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1330 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1331 N0->hasOneUse() && N1->hasOneUse() &&
1333 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1334 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1335 return DAG.getNode(RedOpc, DL, VT,
1336 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1337 N0.getOperand(0), N1.getOperand(0)));
1338 }
1339 return SDValue();
1340}
1341
1342SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1343 bool AddTo) {
1344 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1345 ++NodesCombined;
1346 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1347 To[0].dump(&DAG);
1348 dbgs() << " and " << NumTo - 1 << " other values\n");
1349 for (unsigned i = 0, e = NumTo; i != e; ++i)
1350 assert((!To[i].getNode() ||
1351 N->getValueType(i) == To[i].getValueType()) &&
1352 "Cannot combine value to value of different type!");
1353
1354 WorklistRemover DeadNodes(*this);
1355 DAG.ReplaceAllUsesWith(N, To);
1356 if (AddTo) {
1357 // Push the new nodes and any users onto the worklist
1358 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1359 if (To[i].getNode())
1360 AddToWorklistWithUsers(To[i].getNode());
1361 }
1362 }
1363
1364 // Finally, if the node is now dead, remove it from the graph. The node
1365 // may not be dead if the replacement process recursively simplified to
1366 // something else needing this node.
1367 if (N->use_empty())
1368 deleteAndRecombine(N);
1369 return SDValue(N, 0);
1370}
1371
1372void DAGCombiner::
1373CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1374 // Replace the old value with the new one.
1375 ++NodesCombined;
1376 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1377 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1378
1379 // Replace all uses.
1380 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1381
1382 // Push the new node and any (possibly new) users onto the worklist.
1383 AddToWorklistWithUsers(TLO.New.getNode());
1384
1385 // Finally, if the node is now dead, remove it from the graph.
1386 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1387}
1388
1389/// Check the specified integer node value to see if it can be simplified or if
1390/// things it uses can be simplified by bit propagation. If so, return true.
1391bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1392 const APInt &DemandedElts,
1393 bool AssumeSingleUse) {
1394 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1395 KnownBits Known;
1396 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1397 AssumeSingleUse))
1398 return false;
1399
1400 // Revisit the node.
1401 AddToWorklist(Op.getNode());
1402
1403 CommitTargetLoweringOpt(TLO);
1404 return true;
1405}
1406
1407/// Check the specified vector node value to see if it can be simplified or
1408/// if things it uses can be simplified as it only uses some of the elements.
1409/// If so, return true.
1410bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1411 const APInt &DemandedElts,
1412 bool AssumeSingleUse) {
1413 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1414 APInt KnownUndef, KnownZero;
1415 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1416 TLO, 0, AssumeSingleUse))
1417 return false;
1418
1419 // Revisit the node.
1420 AddToWorklist(Op.getNode());
1421
1422 CommitTargetLoweringOpt(TLO);
1423 return true;
1424}
1425
1426void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1427 SDLoc DL(Load);
1428 EVT VT = Load->getValueType(0);
1429 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1430
1431 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1432 Trunc.dump(&DAG); dbgs() << '\n');
1433
1434 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1435 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1436
1437 AddToWorklist(Trunc.getNode());
1438 recursivelyDeleteUnusedNodes(Load);
1439}
1440
1441SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1442 Replace = false;
1443 SDLoc DL(Op);
1444 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1445 LoadSDNode *LD = cast<LoadSDNode>(Op);
1446 EVT MemVT = LD->getMemoryVT();
1448 : LD->getExtensionType();
1449 Replace = true;
1450 return DAG.getExtLoad(ExtType, DL, PVT,
1451 LD->getChain(), LD->getBasePtr(),
1452 MemVT, LD->getMemOperand());
1453 }
1454
1455 unsigned Opc = Op.getOpcode();
1456 switch (Opc) {
1457 default: break;
1458 case ISD::AssertSext:
1459 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1460 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1461 break;
1462 case ISD::AssertZext:
1463 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1464 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1465 break;
1466 case ISD::Constant: {
1467 unsigned ExtOpc =
1468 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1469 return DAG.getNode(ExtOpc, DL, PVT, Op);
1470 }
1471 }
1472
1473 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1474 return SDValue();
1475 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1476}
1477
1478SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1480 return SDValue();
1481 EVT OldVT = Op.getValueType();
1482 SDLoc DL(Op);
1483 bool Replace = false;
1484 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1485 if (!NewOp.getNode())
1486 return SDValue();
1487 AddToWorklist(NewOp.getNode());
1488
1489 if (Replace)
1490 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1491 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1492 DAG.getValueType(OldVT));
1493}
1494
1495SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1496 EVT OldVT = Op.getValueType();
1497 SDLoc DL(Op);
1498 bool Replace = false;
1499 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1500 if (!NewOp.getNode())
1501 return SDValue();
1502 AddToWorklist(NewOp.getNode());
1503
1504 if (Replace)
1505 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1506 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1507}
1508
1509/// Promote the specified integer binary operation if the target indicates it is
1510/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1511/// i32 since i16 instructions are longer.
1512SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1513 if (!LegalOperations)
1514 return SDValue();
1515
1516 EVT VT = Op.getValueType();
1517 if (VT.isVector() || !VT.isInteger())
1518 return SDValue();
1519
1520 // If operation type is 'undesirable', e.g. i16 on x86, consider
1521 // promoting it.
1522 unsigned Opc = Op.getOpcode();
1523 if (TLI.isTypeDesirableForOp(Opc, VT))
1524 return SDValue();
1525
1526 EVT PVT = VT;
1527 // Consult target whether it is a good idea to promote this operation and
1528 // what's the right type to promote it to.
1529 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1530 assert(PVT != VT && "Don't know what type to promote to!");
1531
1532 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1533
1534 bool Replace0 = false;
1535 SDValue N0 = Op.getOperand(0);
1536 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1537
1538 bool Replace1 = false;
1539 SDValue N1 = Op.getOperand(1);
1540 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1541 SDLoc DL(Op);
1542
1543 SDValue RV =
1544 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1545
1546 // We are always replacing N0/N1's use in N and only need additional
1547 // replacements if there are additional uses.
1548 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1549 // (SDValue) here because the node may reference multiple values
1550 // (for example, the chain value of a load node).
1551 Replace0 &= !N0->hasOneUse();
1552 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1553
1554 // Combine Op here so it is preserved past replacements.
1555 CombineTo(Op.getNode(), RV);
1556
1557 // If operands have a use ordering, make sure we deal with
1558 // predecessor first.
1559 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1560 std::swap(N0, N1);
1561 std::swap(NN0, NN1);
1562 }
1563
1564 if (Replace0) {
1565 AddToWorklist(NN0.getNode());
1566 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1567 }
1568 if (Replace1) {
1569 AddToWorklist(NN1.getNode());
1570 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1571 }
1572 return Op;
1573 }
1574 return SDValue();
1575}
1576
1577/// Promote the specified integer shift operation if the target indicates it is
1578/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1579/// i32 since i16 instructions are longer.
1580SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1581 if (!LegalOperations)
1582 return SDValue();
1583
1584 EVT VT = Op.getValueType();
1585 if (VT.isVector() || !VT.isInteger())
1586 return SDValue();
1587
1588 // If operation type is 'undesirable', e.g. i16 on x86, consider
1589 // promoting it.
1590 unsigned Opc = Op.getOpcode();
1591 if (TLI.isTypeDesirableForOp(Opc, VT))
1592 return SDValue();
1593
1594 EVT PVT = VT;
1595 // Consult target whether it is a good idea to promote this operation and
1596 // what's the right type to promote it to.
1597 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1598 assert(PVT != VT && "Don't know what type to promote to!");
1599
1600 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1601
1602 bool Replace = false;
1603 SDValue N0 = Op.getOperand(0);
1604 if (Opc == ISD::SRA)
1605 N0 = SExtPromoteOperand(N0, PVT);
1606 else if (Opc == ISD::SRL)
1607 N0 = ZExtPromoteOperand(N0, PVT);
1608 else
1609 N0 = PromoteOperand(N0, PVT, Replace);
1610
1611 if (!N0.getNode())
1612 return SDValue();
1613
1614 SDLoc DL(Op);
1615 SDValue N1 = Op.getOperand(1);
1616 SDValue RV =
1617 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1618
1619 if (Replace)
1620 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1621
1622 // Deal with Op being deleted.
1623 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1624 return RV;
1625 }
1626 return SDValue();
1627}
1628
1629SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1630 if (!LegalOperations)
1631 return SDValue();
1632
1633 EVT VT = Op.getValueType();
1634 if (VT.isVector() || !VT.isInteger())
1635 return SDValue();
1636
1637 // If operation type is 'undesirable', e.g. i16 on x86, consider
1638 // promoting it.
1639 unsigned Opc = Op.getOpcode();
1640 if (TLI.isTypeDesirableForOp(Opc, VT))
1641 return SDValue();
1642
1643 EVT PVT = VT;
1644 // Consult target whether it is a good idea to promote this operation and
1645 // what's the right type to promote it to.
1646 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1647 assert(PVT != VT && "Don't know what type to promote to!");
1648 // fold (aext (aext x)) -> (aext x)
1649 // fold (aext (zext x)) -> (zext x)
1650 // fold (aext (sext x)) -> (sext x)
1651 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1652 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1653 }
1654 return SDValue();
1655}
1656
1657bool DAGCombiner::PromoteLoad(SDValue Op) {
1658 if (!LegalOperations)
1659 return false;
1660
1661 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1662 return false;
1663
1664 EVT VT = Op.getValueType();
1665 if (VT.isVector() || !VT.isInteger())
1666 return false;
1667
1668 // If operation type is 'undesirable', e.g. i16 on x86, consider
1669 // promoting it.
1670 unsigned Opc = Op.getOpcode();
1671 if (TLI.isTypeDesirableForOp(Opc, VT))
1672 return false;
1673
1674 EVT PVT = VT;
1675 // Consult target whether it is a good idea to promote this operation and
1676 // what's the right type to promote it to.
1677 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1678 assert(PVT != VT && "Don't know what type to promote to!");
1679
1680 SDLoc DL(Op);
1681 SDNode *N = Op.getNode();
1682 LoadSDNode *LD = cast<LoadSDNode>(N);
1683 EVT MemVT = LD->getMemoryVT();
1685 : LD->getExtensionType();
1686 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1687 LD->getChain(), LD->getBasePtr(),
1688 MemVT, LD->getMemOperand());
1689 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1690
1691 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1692 Result.dump(&DAG); dbgs() << '\n');
1693
1695 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1696
1697 AddToWorklist(Result.getNode());
1698 recursivelyDeleteUnusedNodes(N);
1699 return true;
1700 }
1701
1702 return false;
1703}
1704
1705/// Recursively delete a node which has no uses and any operands for
1706/// which it is the only use.
1707///
1708/// Note that this both deletes the nodes and removes them from the worklist.
1709/// It also adds any nodes who have had a user deleted to the worklist as they
1710/// may now have only one use and subject to other combines.
1711bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1712 if (!N->use_empty())
1713 return false;
1714
1716 Nodes.insert(N);
1717 do {
1718 N = Nodes.pop_back_val();
1719 if (!N)
1720 continue;
1721
1722 if (N->use_empty()) {
1723 for (const SDValue &ChildN : N->op_values())
1724 Nodes.insert(ChildN.getNode());
1725
1726 removeFromWorklist(N);
1727 DAG.DeleteNode(N);
1728 } else {
1729 AddToWorklist(N);
1730 }
1731 } while (!Nodes.empty());
1732 return true;
1733}
1734
1735//===----------------------------------------------------------------------===//
1736// Main DAG Combiner implementation
1737//===----------------------------------------------------------------------===//
1738
1739void DAGCombiner::Run(CombineLevel AtLevel) {
1740 // set the instance variables, so that the various visit routines may use it.
1741 Level = AtLevel;
1742 LegalDAG = Level >= AfterLegalizeDAG;
1743 LegalOperations = Level >= AfterLegalizeVectorOps;
1744 LegalTypes = Level >= AfterLegalizeTypes;
1745
1746 WorklistInserter AddNodes(*this);
1747
1748 // Add all the dag nodes to the worklist.
1749 //
1750 // Note: All nodes are not added to PruningList here, this is because the only
1751 // nodes which can be deleted are those which have no uses and all other nodes
1752 // which would otherwise be added to the worklist by the first call to
1753 // getNextWorklistEntry are already present in it.
1754 for (SDNode &Node : DAG.allnodes())
1755 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1756
1757 // Create a dummy node (which is not added to allnodes), that adds a reference
1758 // to the root node, preventing it from being deleted, and tracking any
1759 // changes of the root.
1760 HandleSDNode Dummy(DAG.getRoot());
1761
1762 // While we have a valid worklist entry node, try to combine it.
1763 while (SDNode *N = getNextWorklistEntry()) {
1764 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1765 // N is deleted from the DAG, since they too may now be dead or may have a
1766 // reduced number of uses, allowing other xforms.
1767 if (recursivelyDeleteUnusedNodes(N))
1768 continue;
1769
1770 WorklistRemover DeadNodes(*this);
1771
1772 // If this combine is running after legalizing the DAG, re-legalize any
1773 // nodes pulled off the worklist.
1774 if (LegalDAG) {
1775 SmallSetVector<SDNode *, 16> UpdatedNodes;
1776 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1777
1778 for (SDNode *LN : UpdatedNodes)
1779 AddToWorklistWithUsers(LN);
1780
1781 if (!NIsValid)
1782 continue;
1783 }
1784
1785 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1786
1787 // Add any operands of the new node which have not yet been combined to the
1788 // worklist as well. getNextWorklistEntry flags nodes that have been
1789 // combined before. Because the worklist uniques things already, this won't
1790 // repeatedly process the same operand.
1791 for (const SDValue &ChildN : N->op_values())
1792 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1793 /*SkipIfCombinedBefore=*/true);
1794
1795 SDValue RV = combine(N);
1796
1797 if (!RV.getNode())
1798 continue;
1799
1800 ++NodesCombined;
1801
1802 // Invalidate cached info.
1803 ChainsWithoutMergeableStores.clear();
1804
1805 // If we get back the same node we passed in, rather than a new node or
1806 // zero, we know that the node must have defined multiple values and
1807 // CombineTo was used. Since CombineTo takes care of the worklist
1808 // mechanics for us, we have no work to do in this case.
1809 if (RV.getNode() == N)
1810 continue;
1811
1812 assert(N->getOpcode() != ISD::DELETED_NODE &&
1813 RV.getOpcode() != ISD::DELETED_NODE &&
1814 "Node was deleted but visit returned new node!");
1815
1816 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1817
1818 if (N->getNumValues() == RV->getNumValues())
1819 DAG.ReplaceAllUsesWith(N, RV.getNode());
1820 else {
1821 assert(N->getValueType(0) == RV.getValueType() &&
1822 N->getNumValues() == 1 && "Type mismatch");
1823 DAG.ReplaceAllUsesWith(N, &RV);
1824 }
1825
1826 // Push the new node and any users onto the worklist. Omit this if the
1827 // new node is the EntryToken (e.g. if a store managed to get optimized
1828 // out), because re-visiting the EntryToken and its users will not uncover
1829 // any additional opportunities, but there may be a large number of such
1830 // users, potentially causing compile time explosion.
1831 if (RV.getOpcode() != ISD::EntryToken)
1832 AddToWorklistWithUsers(RV.getNode());
1833
1834 // Finally, if the node is now dead, remove it from the graph. The node
1835 // may not be dead if the replacement process recursively simplified to
1836 // something else needing this node. This will also take care of adding any
1837 // operands which have lost a user to the worklist.
1838 recursivelyDeleteUnusedNodes(N);
1839 }
1840
1841 // If the root changed (e.g. it was a dead load, update the root).
1842 DAG.setRoot(Dummy.getValue());
1843 DAG.RemoveDeadNodes();
1844}
1845
1846SDValue DAGCombiner::visit(SDNode *N) {
1847 // clang-format off
1848 switch (N->getOpcode()) {
1849 default: break;
1850 case ISD::TokenFactor: return visitTokenFactor(N);
1851 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1852 case ISD::ADD: return visitADD(N);
1853 case ISD::SUB: return visitSUB(N);
1854 case ISD::SADDSAT:
1855 case ISD::UADDSAT: return visitADDSAT(N);
1856 case ISD::SSUBSAT:
1857 case ISD::USUBSAT: return visitSUBSAT(N);
1858 case ISD::ADDC: return visitADDC(N);
1859 case ISD::SADDO:
1860 case ISD::UADDO: return visitADDO(N);
1861 case ISD::SUBC: return visitSUBC(N);
1862 case ISD::SSUBO:
1863 case ISD::USUBO: return visitSUBO(N);
1864 case ISD::ADDE: return visitADDE(N);
1865 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1866 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1867 case ISD::SUBE: return visitSUBE(N);
1868 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1869 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1870 case ISD::SMULFIX:
1871 case ISD::SMULFIXSAT:
1872 case ISD::UMULFIX:
1873 case ISD::UMULFIXSAT: return visitMULFIX(N);
1874 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1875 case ISD::SDIV: return visitSDIV(N);
1876 case ISD::UDIV: return visitUDIV(N);
1877 case ISD::SREM:
1878 case ISD::UREM: return visitREM(N);
1879 case ISD::MULHU: return visitMULHU(N);
1880 case ISD::MULHS: return visitMULHS(N);
1881 case ISD::AVGFLOORS:
1882 case ISD::AVGFLOORU:
1883 case ISD::AVGCEILS:
1884 case ISD::AVGCEILU: return visitAVG(N);
1885 case ISD::ABDS:
1886 case ISD::ABDU: return visitABD(N);
1887 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1888 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1889 case ISD::SMULO:
1890 case ISD::UMULO: return visitMULO(N);
1891 case ISD::SMIN:
1892 case ISD::SMAX:
1893 case ISD::UMIN:
1894 case ISD::UMAX: return visitIMINMAX(N);
1895 case ISD::AND: return visitAND(N);
1896 case ISD::OR: return visitOR(N);
1897 case ISD::XOR: return visitXOR(N);
1898 case ISD::SHL: return visitSHL(N);
1899 case ISD::SRA: return visitSRA(N);
1900 case ISD::SRL: return visitSRL(N);
1901 case ISD::ROTR:
1902 case ISD::ROTL: return visitRotate(N);
1903 case ISD::FSHL:
1904 case ISD::FSHR: return visitFunnelShift(N);
1905 case ISD::SSHLSAT:
1906 case ISD::USHLSAT: return visitSHLSAT(N);
1907 case ISD::ABS: return visitABS(N);
1908 case ISD::BSWAP: return visitBSWAP(N);
1909 case ISD::BITREVERSE: return visitBITREVERSE(N);
1910 case ISD::CTLZ: return visitCTLZ(N);
1911 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1912 case ISD::CTTZ: return visitCTTZ(N);
1913 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1914 case ISD::CTPOP: return visitCTPOP(N);
1915 case ISD::SELECT: return visitSELECT(N);
1916 case ISD::VSELECT: return visitVSELECT(N);
1917 case ISD::SELECT_CC: return visitSELECT_CC(N);
1918 case ISD::SETCC: return visitSETCC(N);
1919 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1920 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1921 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1922 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1923 case ISD::AssertSext:
1924 case ISD::AssertZext: return visitAssertExt(N);
1925 case ISD::AssertAlign: return visitAssertAlign(N);
1926 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1929 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1930 case ISD::TRUNCATE: return visitTRUNCATE(N);
1931 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1932 case ISD::BITCAST: return visitBITCAST(N);
1933 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1934 case ISD::FADD: return visitFADD(N);
1935 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1936 case ISD::FSUB: return visitFSUB(N);
1937 case ISD::FMUL: return visitFMUL(N);
1938 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1939 case ISD::FMAD: return visitFMAD(N);
1940 case ISD::FDIV: return visitFDIV(N);
1941 case ISD::FREM: return visitFREM(N);
1942 case ISD::FSQRT: return visitFSQRT(N);
1943 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1944 case ISD::FPOW: return visitFPOW(N);
1945 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1946 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1947 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1948 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1949 case ISD::LROUND:
1950 case ISD::LLROUND:
1951 case ISD::LRINT:
1952 case ISD::LLRINT: return visitXROUND(N);
1953 case ISD::FP_ROUND: return visitFP_ROUND(N);
1954 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1955 case ISD::FNEG: return visitFNEG(N);
1956 case ISD::FABS: return visitFABS(N);
1957 case ISD::FFLOOR: return visitFFLOOR(N);
1958 case ISD::FMINNUM:
1959 case ISD::FMAXNUM:
1960 case ISD::FMINIMUM:
1961 case ISD::FMAXIMUM:
1962 case ISD::FMINIMUMNUM:
1963 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
1964 case ISD::FCEIL: return visitFCEIL(N);
1965 case ISD::FTRUNC: return visitFTRUNC(N);
1966 case ISD::FFREXP: return visitFFREXP(N);
1967 case ISD::BRCOND: return visitBRCOND(N);
1968 case ISD::BR_CC: return visitBR_CC(N);
1969 case ISD::LOAD: return visitLOAD(N);
1970 case ISD::STORE: return visitSTORE(N);
1971 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1972 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1973 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1974 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1975 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1976 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1977 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1978 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1979 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1980 case ISD::MGATHER: return visitMGATHER(N);
1981 case ISD::MLOAD: return visitMLOAD(N);
1982 case ISD::MSCATTER: return visitMSCATTER(N);
1983 case ISD::MSTORE: return visitMSTORE(N);
1984 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
1985 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
1986 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1987 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1988 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1989 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1990 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1991 case ISD::FREEZE: return visitFREEZE(N);
1992 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1993 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1994 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
1997 case ISD::VECREDUCE_ADD:
1998 case ISD::VECREDUCE_MUL:
1999 case ISD::VECREDUCE_AND:
2000 case ISD::VECREDUCE_OR:
2001 case ISD::VECREDUCE_XOR:
2009 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2010#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2011#include "llvm/IR/VPIntrinsics.def"
2012 return visitVPOp(N);
2013 }
2014 // clang-format on
2015 return SDValue();
2016}
2017
2018SDValue DAGCombiner::combine(SDNode *N) {
2019 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2020 return SDValue();
2021
2022 SDValue RV;
2023 if (!DisableGenericCombines)
2024 RV = visit(N);
2025
2026 // If nothing happened, try a target-specific DAG combine.
2027 if (!RV.getNode()) {
2028 assert(N->getOpcode() != ISD::DELETED_NODE &&
2029 "Node was deleted but visit returned NULL!");
2030
2031 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2032 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2033
2034 // Expose the DAG combiner to the target combiner impls.
2036 DagCombineInfo(DAG, Level, false, this);
2037
2038 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2039 }
2040 }
2041
2042 // If nothing happened still, try promoting the operation.
2043 if (!RV.getNode()) {
2044 switch (N->getOpcode()) {
2045 default: break;
2046 case ISD::ADD:
2047 case ISD::SUB:
2048 case ISD::MUL:
2049 case ISD::AND:
2050 case ISD::OR:
2051 case ISD::XOR:
2052 RV = PromoteIntBinOp(SDValue(N, 0));
2053 break;
2054 case ISD::SHL:
2055 case ISD::SRA:
2056 case ISD::SRL:
2057 RV = PromoteIntShiftOp(SDValue(N, 0));
2058 break;
2059 case ISD::SIGN_EXTEND:
2060 case ISD::ZERO_EXTEND:
2061 case ISD::ANY_EXTEND:
2062 RV = PromoteExtend(SDValue(N, 0));
2063 break;
2064 case ISD::LOAD:
2065 if (PromoteLoad(SDValue(N, 0)))
2066 RV = SDValue(N, 0);
2067 break;
2068 }
2069 }
2070
2071 // If N is a commutative binary node, try to eliminate it if the commuted
2072 // version is already present in the DAG.
2073 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2074 SDValue N0 = N->getOperand(0);
2075 SDValue N1 = N->getOperand(1);
2076
2077 // Constant operands are canonicalized to RHS.
2078 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2079 SDValue Ops[] = {N1, N0};
2080 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2081 N->getFlags());
2082 if (CSENode)
2083 return SDValue(CSENode, 0);
2084 }
2085 }
2086
2087 return RV;
2088}
2089
2090/// Given a node, return its input chain if it has one, otherwise return a null
2091/// sd operand.
2093 if (unsigned NumOps = N->getNumOperands()) {
2094 if (N->getOperand(0).getValueType() == MVT::Other)
2095 return N->getOperand(0);
2096 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2097 return N->getOperand(NumOps-1);
2098 for (unsigned i = 1; i < NumOps-1; ++i)
2099 if (N->getOperand(i).getValueType() == MVT::Other)
2100 return N->getOperand(i);
2101 }
2102 return SDValue();
2103}
2104
2105SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2106 SDValue Operand = N->getOperand(0);
2107 EVT VT = Operand.getValueType();
2108 SDLoc dl(N);
2109
2110 // Canonicalize undef to quiet NaN.
2111 if (Operand.isUndef()) {
2112 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2113 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2114 }
2115 return SDValue();
2116}
2117
2118SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2119 // If N has two operands, where one has an input chain equal to the other,
2120 // the 'other' chain is redundant.
2121 if (N->getNumOperands() == 2) {
2122 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2123 return N->getOperand(0);
2124 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2125 return N->getOperand(1);
2126 }
2127
2128 // Don't simplify token factors if optnone.
2129 if (OptLevel == CodeGenOptLevel::None)
2130 return SDValue();
2131
2132 // Don't simplify the token factor if the node itself has too many operands.
2133 if (N->getNumOperands() > TokenFactorInlineLimit)
2134 return SDValue();
2135
2136 // If the sole user is a token factor, we should make sure we have a
2137 // chance to merge them together. This prevents TF chains from inhibiting
2138 // optimizations.
2139 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2140 AddToWorklist(*(N->user_begin()));
2141
2142 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2143 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2145 bool Changed = false; // If we should replace this token factor.
2146
2147 // Start out with this token factor.
2148 TFs.push_back(N);
2149
2150 // Iterate through token factors. The TFs grows when new token factors are
2151 // encountered.
2152 for (unsigned i = 0; i < TFs.size(); ++i) {
2153 // Limit number of nodes to inline, to avoid quadratic compile times.
2154 // We have to add the outstanding Token Factors to Ops, otherwise we might
2155 // drop Ops from the resulting Token Factors.
2156 if (Ops.size() > TokenFactorInlineLimit) {
2157 for (unsigned j = i; j < TFs.size(); j++)
2158 Ops.emplace_back(TFs[j], 0);
2159 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2160 // combiner worklist later.
2161 TFs.resize(i);
2162 break;
2163 }
2164
2165 SDNode *TF = TFs[i];
2166 // Check each of the operands.
2167 for (const SDValue &Op : TF->op_values()) {
2168 switch (Op.getOpcode()) {
2169 case ISD::EntryToken:
2170 // Entry tokens don't need to be added to the list. They are
2171 // redundant.
2172 Changed = true;
2173 break;
2174
2175 case ISD::TokenFactor:
2176 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2177 // Queue up for processing.
2178 TFs.push_back(Op.getNode());
2179 Changed = true;
2180 break;
2181 }
2182 [[fallthrough]];
2183
2184 default:
2185 // Only add if it isn't already in the list.
2186 if (SeenOps.insert(Op.getNode()).second)
2187 Ops.push_back(Op);
2188 else
2189 Changed = true;
2190 break;
2191 }
2192 }
2193 }
2194
2195 // Re-visit inlined Token Factors, to clean them up in case they have been
2196 // removed. Skip the first Token Factor, as this is the current node.
2197 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2198 AddToWorklist(TFs[i]);
2199
2200 // Remove Nodes that are chained to another node in the list. Do so
2201 // by walking up chains breath-first stopping when we've seen
2202 // another operand. In general we must climb to the EntryNode, but we can exit
2203 // early if we find all remaining work is associated with just one operand as
2204 // no further pruning is possible.
2205
2206 // List of nodes to search through and original Ops from which they originate.
2208 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2209 SmallPtrSet<SDNode *, 16> SeenChains;
2210 bool DidPruneOps = false;
2211
2212 unsigned NumLeftToConsider = 0;
2213 for (const SDValue &Op : Ops) {
2214 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2215 OpWorkCount.push_back(1);
2216 }
2217
2218 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2219 // If this is an Op, we can remove the op from the list. Remark any
2220 // search associated with it as from the current OpNumber.
2221 if (SeenOps.contains(Op)) {
2222 Changed = true;
2223 DidPruneOps = true;
2224 unsigned OrigOpNumber = 0;
2225 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2226 OrigOpNumber++;
2227 assert((OrigOpNumber != Ops.size()) &&
2228 "expected to find TokenFactor Operand");
2229 // Re-mark worklist from OrigOpNumber to OpNumber
2230 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2231 if (Worklist[i].second == OrigOpNumber) {
2232 Worklist[i].second = OpNumber;
2233 }
2234 }
2235 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2236 OpWorkCount[OrigOpNumber] = 0;
2237 NumLeftToConsider--;
2238 }
2239 // Add if it's a new chain
2240 if (SeenChains.insert(Op).second) {
2241 OpWorkCount[OpNumber]++;
2242 Worklist.push_back(std::make_pair(Op, OpNumber));
2243 }
2244 };
2245
2246 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2247 // We need at least be consider at least 2 Ops to prune.
2248 if (NumLeftToConsider <= 1)
2249 break;
2250 auto CurNode = Worklist[i].first;
2251 auto CurOpNumber = Worklist[i].second;
2252 assert((OpWorkCount[CurOpNumber] > 0) &&
2253 "Node should not appear in worklist");
2254 switch (CurNode->getOpcode()) {
2255 case ISD::EntryToken:
2256 // Hitting EntryToken is the only way for the search to terminate without
2257 // hitting
2258 // another operand's search. Prevent us from marking this operand
2259 // considered.
2260 NumLeftToConsider++;
2261 break;
2262 case ISD::TokenFactor:
2263 for (const SDValue &Op : CurNode->op_values())
2264 AddToWorklist(i, Op.getNode(), CurOpNumber);
2265 break;
2267 case ISD::LIFETIME_END:
2268 case ISD::CopyFromReg:
2269 case ISD::CopyToReg:
2270 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2271 break;
2272 default:
2273 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2274 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2275 break;
2276 }
2277 OpWorkCount[CurOpNumber]--;
2278 if (OpWorkCount[CurOpNumber] == 0)
2279 NumLeftToConsider--;
2280 }
2281
2282 // If we've changed things around then replace token factor.
2283 if (Changed) {
2285 if (Ops.empty()) {
2286 // The entry token is the only possible outcome.
2287 Result = DAG.getEntryNode();
2288 } else {
2289 if (DidPruneOps) {
2290 SmallVector<SDValue, 8> PrunedOps;
2291 //
2292 for (const SDValue &Op : Ops) {
2293 if (SeenChains.count(Op.getNode()) == 0)
2294 PrunedOps.push_back(Op);
2295 }
2296 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2297 } else {
2298 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2299 }
2300 }
2301 return Result;
2302 }
2303 return SDValue();
2304}
2305
2306/// MERGE_VALUES can always be eliminated.
2307SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2308 WorklistRemover DeadNodes(*this);
2309 // Replacing results may cause a different MERGE_VALUES to suddenly
2310 // be CSE'd with N, and carry its uses with it. Iterate until no
2311 // uses remain, to ensure that the node can be safely deleted.
2312 // First add the users of this node to the work list so that they
2313 // can be tried again once they have new operands.
2314 AddUsersToWorklist(N);
2315 do {
2316 // Do as a single replacement to avoid rewalking use lists.
2317 SmallVector<SDValue, 8> Ops(N->ops());
2318 DAG.ReplaceAllUsesWith(N, Ops.data());
2319 } while (!N->use_empty());
2320 deleteAndRecombine(N);
2321 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2322}
2323
2324/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2325/// ConstantSDNode pointer else nullptr.
2327 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2328 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2329}
2330
2331// isTruncateOf - If N is a truncate of some other value, return true, record
2332// the value being truncated in Op and which of Op's bits are zero/one in Known.
2333// This function computes KnownBits to avoid a duplicated call to
2334// computeKnownBits in the caller.
2336 KnownBits &Known) {
2337 if (N->getOpcode() == ISD::TRUNCATE) {
2338 Op = N->getOperand(0);
2339 Known = DAG.computeKnownBits(Op);
2340 if (N->getFlags().hasNoUnsignedWrap())
2341 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2342 return true;
2343 }
2344
2345 if (N.getValueType().getScalarType() != MVT::i1 ||
2346 !sd_match(
2348 return false;
2349
2350 Known = DAG.computeKnownBits(Op);
2351 return (Known.Zero | 1).isAllOnes();
2352}
2353
2354/// Return true if 'Use' is a load or a store that uses N as its base pointer
2355/// and that N may be folded in the load / store addressing mode.
2357 const TargetLowering &TLI) {
2358 EVT VT;
2359 unsigned AS;
2360
2361 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2362 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2363 return false;
2364 VT = LD->getMemoryVT();
2365 AS = LD->getAddressSpace();
2366 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2367 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2368 return false;
2369 VT = ST->getMemoryVT();
2370 AS = ST->getAddressSpace();
2371 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2372 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2373 return false;
2374 VT = LD->getMemoryVT();
2375 AS = LD->getAddressSpace();
2376 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2377 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2378 return false;
2379 VT = ST->getMemoryVT();
2380 AS = ST->getAddressSpace();
2381 } else {
2382 return false;
2383 }
2384
2386 if (N->getOpcode() == ISD::ADD) {
2387 AM.HasBaseReg = true;
2388 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2389 if (Offset)
2390 // [reg +/- imm]
2391 AM.BaseOffs = Offset->getSExtValue();
2392 else
2393 // [reg +/- reg]
2394 AM.Scale = 1;
2395 } else if (N->getOpcode() == ISD::SUB) {
2396 AM.HasBaseReg = true;
2397 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2398 if (Offset)
2399 // [reg +/- imm]
2400 AM.BaseOffs = -Offset->getSExtValue();
2401 else
2402 // [reg +/- reg]
2403 AM.Scale = 1;
2404 } else {
2405 return false;
2406 }
2407
2408 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2409 VT.getTypeForEVT(*DAG.getContext()), AS);
2410}
2411
2412/// This inverts a canonicalization in IR that replaces a variable select arm
2413/// with an identity constant. Codegen improves if we re-use the variable
2414/// operand rather than load a constant. This can also be converted into a
2415/// masked vector operation if the target supports it.
2417 bool ShouldCommuteOperands) {
2418 // Match a select as operand 1. The identity constant that we are looking for
2419 // is only valid as operand 1 of a non-commutative binop.
2420 SDValue N0 = N->getOperand(0);
2421 SDValue N1 = N->getOperand(1);
2422 if (ShouldCommuteOperands)
2423 std::swap(N0, N1);
2424
2425 // TODO: Should this apply to scalar select too?
2426 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2427 return SDValue();
2428
2429 // We can't hoist all instructions because of immediate UB (not speculatable).
2430 // For example div/rem by zero.
2432 return SDValue();
2433
2434 unsigned Opcode = N->getOpcode();
2435 EVT VT = N->getValueType(0);
2436 SDValue Cond = N1.getOperand(0);
2437 SDValue TVal = N1.getOperand(1);
2438 SDValue FVal = N1.getOperand(2);
2439
2440 // This transform increases uses of N0, so freeze it to be safe.
2441 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2442 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2443 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2444 SDValue F0 = DAG.getFreeze(N0);
2445 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2446 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2447 }
2448 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2449 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2450 SDValue F0 = DAG.getFreeze(N0);
2451 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2452 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2453 }
2454
2455 return SDValue();
2456}
2457
2458SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2459 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2460 "Unexpected binary operator");
2461
2462 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2463 auto BinOpcode = BO->getOpcode();
2464 EVT VT = BO->getValueType(0);
2465 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2466 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2467 return Sel;
2468
2469 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2470 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2471 return Sel;
2472 }
2473
2474 // Don't do this unless the old select is going away. We want to eliminate the
2475 // binary operator, not replace a binop with a select.
2476 // TODO: Handle ISD::SELECT_CC.
2477 unsigned SelOpNo = 0;
2478 SDValue Sel = BO->getOperand(0);
2479 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2480 SelOpNo = 1;
2481 Sel = BO->getOperand(1);
2482
2483 // Peek through trunc to shift amount type.
2484 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2485 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2486 // This is valid when the truncated bits of x are already zero.
2487 SDValue Op;
2488 KnownBits Known;
2489 if (isTruncateOf(DAG, Sel, Op, Known) &&
2491 Sel = Op;
2492 }
2493 }
2494
2495 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2496 return SDValue();
2497
2498 SDValue CT = Sel.getOperand(1);
2499 if (!isConstantOrConstantVector(CT, true) &&
2501 return SDValue();
2502
2503 SDValue CF = Sel.getOperand(2);
2504 if (!isConstantOrConstantVector(CF, true) &&
2506 return SDValue();
2507
2508 // Bail out if any constants are opaque because we can't constant fold those.
2509 // The exception is "and" and "or" with either 0 or -1 in which case we can
2510 // propagate non constant operands into select. I.e.:
2511 // and (select Cond, 0, -1), X --> select Cond, 0, X
2512 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2513 bool CanFoldNonConst =
2514 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2517
2518 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2519 if (!CanFoldNonConst &&
2520 !isConstantOrConstantVector(CBO, true) &&
2522 return SDValue();
2523
2524 SDLoc DL(Sel);
2525 SDValue NewCT, NewCF;
2526
2527 if (CanFoldNonConst) {
2528 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2529 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2530 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2531 NewCT = CT;
2532 else
2533 NewCT = CBO;
2534
2535 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2536 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2537 NewCF = CF;
2538 else
2539 NewCF = CBO;
2540 } else {
2541 // We have a select-of-constants followed by a binary operator with a
2542 // constant. Eliminate the binop by pulling the constant math into the
2543 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2544 // CBO, CF + CBO
2545 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2546 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2547 if (!NewCT)
2548 return SDValue();
2549
2550 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2551 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2552 if (!NewCF)
2553 return SDValue();
2554 }
2555
2556 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2557 SelectOp->setFlags(BO->getFlags());
2558 return SelectOp;
2559}
2560
2562 SelectionDAG &DAG) {
2563 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2564 "Expecting add or sub");
2565
2566 // Match a constant operand and a zext operand for the math instruction:
2567 // add Z, C
2568 // sub C, Z
2569 bool IsAdd = N->getOpcode() == ISD::ADD;
2570 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2571 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2572 auto *CN = dyn_cast<ConstantSDNode>(C);
2573 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2574 return SDValue();
2575
2576 // Match the zext operand as a setcc of a boolean.
2577 if (Z.getOperand(0).getValueType() != MVT::i1)
2578 return SDValue();
2579
2580 // Match the compare as: setcc (X & 1), 0, eq.
2581 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2583 return SDValue();
2584
2585 // We are adding/subtracting a constant and an inverted low bit. Turn that
2586 // into a subtract/add of the low bit with incremented/decremented constant:
2587 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2588 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2589 EVT VT = C.getValueType();
2590 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2591 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2592 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2593 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2594}
2595
2596// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2597SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2598 SDValue N0 = N->getOperand(0);
2599 EVT VT = N0.getValueType();
2600 SDValue A, B;
2601
2602 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2604 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2605 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2606 }
2607 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2609 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2610 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2611 }
2612 return SDValue();
2613}
2614
2615/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2616/// a shift and add with a different constant.
2618 SelectionDAG &DAG) {
2619 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2620 "Expecting add or sub");
2621
2622 // We need a constant operand for the add/sub, and the other operand is a
2623 // logical shift right: add (srl), C or sub C, (srl).
2624 bool IsAdd = N->getOpcode() == ISD::ADD;
2625 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2626 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2627 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2628 ShiftOp.getOpcode() != ISD::SRL)
2629 return SDValue();
2630
2631 // The shift must be of a 'not' value.
2632 SDValue Not = ShiftOp.getOperand(0);
2633 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2634 return SDValue();
2635
2636 // The shift must be moving the sign bit to the least-significant-bit.
2637 EVT VT = ShiftOp.getValueType();
2638 SDValue ShAmt = ShiftOp.getOperand(1);
2639 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2640 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2641 return SDValue();
2642
2643 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2644 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2645 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2646 if (SDValue NewC = DAG.FoldConstantArithmetic(
2647 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2648 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2649 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2650 Not.getOperand(0), ShAmt);
2651 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2652 }
2653
2654 return SDValue();
2655}
2656
2657static bool
2659 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2660 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2661}
2662
2663/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2664/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2665/// are no common bits set in the operands).
2666SDValue DAGCombiner::visitADDLike(SDNode *N) {
2667 SDValue N0 = N->getOperand(0);
2668 SDValue N1 = N->getOperand(1);
2669 EVT VT = N0.getValueType();
2670 SDLoc DL(N);
2671
2672 // fold (add x, undef) -> undef
2673 if (N0.isUndef())
2674 return N0;
2675 if (N1.isUndef())
2676 return N1;
2677
2678 // fold (add c1, c2) -> c1+c2
2679 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2680 return C;
2681
2682 // canonicalize constant to RHS
2685 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2686
2687 if (areBitwiseNotOfEachother(N0, N1))
2688 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2689
2690 // fold vector ops
2691 if (VT.isVector()) {
2692 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2693 return FoldedVOp;
2694
2695 // fold (add x, 0) -> x, vector edition
2697 return N0;
2698 }
2699
2700 // fold (add x, 0) -> x
2701 if (isNullConstant(N1))
2702 return N0;
2703
2704 if (N0.getOpcode() == ISD::SUB) {
2705 SDValue N00 = N0.getOperand(0);
2706 SDValue N01 = N0.getOperand(1);
2707
2708 // fold ((A-c1)+c2) -> (A+(c2-c1))
2709 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2710 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2711
2712 // fold ((c1-A)+c2) -> (c1+c2)-A
2713 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2714 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2715 }
2716
2717 // add (sext i1 X), 1 -> zext (not i1 X)
2718 // We don't transform this pattern:
2719 // add (zext i1 X), -1 -> sext (not i1 X)
2720 // because most (?) targets generate better code for the zext form.
2721 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2722 isOneOrOneSplat(N1)) {
2723 SDValue X = N0.getOperand(0);
2724 if ((!LegalOperations ||
2725 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2727 X.getScalarValueSizeInBits() == 1) {
2728 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2729 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2730 }
2731 }
2732
2733 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2734 // iff (or x, c0) is equivalent to (add x, c0).
2735 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2736 // iff (xor x, c0) is equivalent to (add x, c0).
2737 if (DAG.isADDLike(N0)) {
2738 SDValue N01 = N0.getOperand(1);
2739 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2740 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2741 }
2742
2743 if (SDValue NewSel = foldBinOpIntoSelect(N))
2744 return NewSel;
2745
2746 // reassociate add
2747 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2748 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2749 return RADD;
2750
2751 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2752 // equivalent to (add x, c).
2753 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2754 // equivalent to (add x, c).
2755 // Do this optimization only when adding c does not introduce instructions
2756 // for adding carries.
2757 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2758 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2759 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2760 // If N0's type does not split or is a sign mask, it does not introduce
2761 // add carry.
2762 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2763 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2766 if (NoAddCarry)
2767 return DAG.getNode(
2768 ISD::ADD, DL, VT,
2769 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2770 N0.getOperand(1));
2771 }
2772 return SDValue();
2773 };
2774 if (SDValue Add = ReassociateAddOr(N0, N1))
2775 return Add;
2776 if (SDValue Add = ReassociateAddOr(N1, N0))
2777 return Add;
2778
2779 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2780 if (SDValue SD =
2781 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2782 return SD;
2783 }
2784
2785 SDValue A, B, C, D;
2786
2787 // fold ((0-A) + B) -> B-A
2788 if (sd_match(N0, m_Neg(m_Value(A))))
2789 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2790
2791 // fold (A + (0-B)) -> A-B
2792 if (sd_match(N1, m_Neg(m_Value(B))))
2793 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2794
2795 // fold (A+(B-A)) -> B
2796 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2797 return B;
2798
2799 // fold ((B-A)+A) -> B
2800 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2801 return B;
2802
2803 // fold ((A-B)+(C-A)) -> (C-B)
2804 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2806 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2807
2808 // fold ((A-B)+(B-C)) -> (A-C)
2809 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2811 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2812
2813 // fold (A+(B-(A+C))) to (B-C)
2814 // fold (A+(B-(C+A))) to (B-C)
2815 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2816 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2817
2818 // fold (A+((B-A)+or-C)) to (B+or-C)
2819 if (sd_match(N1,
2821 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2822 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2823
2824 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2825 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2826 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2828 return DAG.getNode(ISD::SUB, DL, VT,
2829 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2830 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2831
2832 // fold (add (umax X, C), -C) --> (usubsat X, C)
2833 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2834 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2835 return (!Max && !Op) ||
2836 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2837 };
2838 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2839 /*AllowUndefs*/ true))
2840 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2841 N0.getOperand(1));
2842 }
2843
2845 return SDValue(N, 0);
2846
2847 if (isOneOrOneSplat(N1)) {
2848 // fold (add (xor a, -1), 1) -> (sub 0, a)
2849 if (isBitwiseNot(N0))
2850 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2851 N0.getOperand(0));
2852
2853 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2854 if (N0.getOpcode() == ISD::ADD) {
2855 SDValue A, Xor;
2856
2857 if (isBitwiseNot(N0.getOperand(0))) {
2858 A = N0.getOperand(1);
2859 Xor = N0.getOperand(0);
2860 } else if (isBitwiseNot(N0.getOperand(1))) {
2861 A = N0.getOperand(0);
2862 Xor = N0.getOperand(1);
2863 }
2864
2865 if (Xor)
2866 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2867 }
2868
2869 // Look for:
2870 // add (add x, y), 1
2871 // And if the target does not like this form then turn into:
2872 // sub y, (xor x, -1)
2873 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2874 N0.hasOneUse() &&
2875 // Limit this to after legalization if the add has wrap flags
2876 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2877 !N->getFlags().hasNoSignedWrap()))) {
2878 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2879 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2880 }
2881 }
2882
2883 // (x - y) + -1 -> add (xor y, -1), x
2884 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2885 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2886 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2887 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2888 }
2889
2890 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2891 // This can help if the inner add has multiple uses.
2892 APInt CM, CA;
2893 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2894 if (VT.getScalarSizeInBits() <= 64) {
2896 m_ConstInt(CM)))) &&
2898 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2900 // If all the inputs are nuw, the outputs can be nuw. If all the input
2901 // are _also_ nsw the outputs can be too.
2902 if (N->getFlags().hasNoUnsignedWrap() &&
2903 N0->getFlags().hasNoUnsignedWrap() &&
2906 if (N->getFlags().hasNoSignedWrap() &&
2907 N0->getFlags().hasNoSignedWrap() &&
2910 }
2911 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2912 DAG.getConstant(CM, DL, VT), Flags);
2913 return DAG.getNode(
2914 ISD::ADD, DL, VT, Mul,
2915 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2916 }
2917 // Also look in case there is an intermediate add.
2918 if (sd_match(N0, m_OneUse(m_Add(
2920 m_ConstInt(CM))),
2921 m_Value(B)))) &&
2923 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2925 // If all the inputs are nuw, the outputs can be nuw. If all the input
2926 // are _also_ nsw the outputs can be too.
2927 SDValue OMul =
2928 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2929 if (N->getFlags().hasNoUnsignedWrap() &&
2930 N0->getFlags().hasNoUnsignedWrap() &&
2931 OMul->getFlags().hasNoUnsignedWrap() &&
2932 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2934 if (N->getFlags().hasNoSignedWrap() &&
2935 N0->getFlags().hasNoSignedWrap() &&
2936 OMul->getFlags().hasNoSignedWrap() &&
2937 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2939 }
2940 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2941 DAG.getConstant(CM, DL, VT), Flags);
2942 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2943 return DAG.getNode(
2944 ISD::ADD, DL, VT, Add,
2945 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2946 }
2947 }
2948 }
2949
2950 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2951 return Combined;
2952
2953 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2954 return Combined;
2955
2956 return SDValue();
2957}
2958
2959// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2960SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2961 SDValue N0 = N->getOperand(0);
2962 EVT VT = N0.getValueType();
2963 SDValue A, B;
2964
2965 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2967 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2968 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2969 }
2970 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2972 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2973 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2974 }
2975
2976 return SDValue();
2977}
2978
2979SDValue DAGCombiner::visitADD(SDNode *N) {
2980 SDValue N0 = N->getOperand(0);
2981 SDValue N1 = N->getOperand(1);
2982 EVT VT = N0.getValueType();
2983 SDLoc DL(N);
2984
2985 if (SDValue Combined = visitADDLike(N))
2986 return Combined;
2987
2988 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2989 return V;
2990
2991 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2992 return V;
2993
2994 // Try to match AVGFLOOR fixedwidth pattern
2995 if (SDValue V = foldAddToAvg(N, DL))
2996 return V;
2997
2998 // fold (a+b) -> (a|b) iff a and b share no bits.
2999 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3000 DAG.haveNoCommonBitsSet(N0, N1))
3001 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3002
3003 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3004 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3005 const APInt &C0 = N0->getConstantOperandAPInt(0);
3006 const APInt &C1 = N1->getConstantOperandAPInt(0);
3007 return DAG.getVScale(DL, VT, C0 + C1);
3008 }
3009
3010 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3011 if (N0.getOpcode() == ISD::ADD &&
3012 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3013 N1.getOpcode() == ISD::VSCALE) {
3014 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3015 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3016 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3017 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3018 }
3019
3020 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3021 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3022 N1.getOpcode() == ISD::STEP_VECTOR) {
3023 const APInt &C0 = N0->getConstantOperandAPInt(0);
3024 const APInt &C1 = N1->getConstantOperandAPInt(0);
3025 APInt NewStep = C0 + C1;
3026 return DAG.getStepVector(DL, VT, NewStep);
3027 }
3028
3029 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3030 if (N0.getOpcode() == ISD::ADD &&
3032 N1.getOpcode() == ISD::STEP_VECTOR) {
3033 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3034 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3035 APInt NewStep = SV0 + SV1;
3036 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3037 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3038 }
3039
3040 return SDValue();
3041}
3042
3043SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3044 unsigned Opcode = N->getOpcode();
3045 SDValue N0 = N->getOperand(0);
3046 SDValue N1 = N->getOperand(1);
3047 EVT VT = N0.getValueType();
3048 bool IsSigned = Opcode == ISD::SADDSAT;
3049 SDLoc DL(N);
3050
3051 // fold (add_sat x, undef) -> -1
3052 if (N0.isUndef() || N1.isUndef())
3053 return DAG.getAllOnesConstant(DL, VT);
3054
3055 // fold (add_sat c1, c2) -> c3
3056 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3057 return C;
3058
3059 // canonicalize constant to RHS
3062 return DAG.getNode(Opcode, DL, VT, N1, N0);
3063
3064 // fold vector ops
3065 if (VT.isVector()) {
3066 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3067 return FoldedVOp;
3068
3069 // fold (add_sat x, 0) -> x, vector edition
3071 return N0;
3072 }
3073
3074 // fold (add_sat x, 0) -> x
3075 if (isNullConstant(N1))
3076 return N0;
3077
3078 // If it cannot overflow, transform into an add.
3079 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3080 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3081
3082 return SDValue();
3083}
3084
3086 bool ForceCarryReconstruction = false) {
3087 bool Masked = false;
3088
3089 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3090 while (true) {
3091 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3092 V = V.getOperand(0);
3093 continue;
3094 }
3095
3096 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3097 if (ForceCarryReconstruction)
3098 return V;
3099
3100 Masked = true;
3101 V = V.getOperand(0);
3102 continue;
3103 }
3104
3105 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3106 return V;
3107
3108 break;
3109 }
3110
3111 // If this is not a carry, return.
3112 if (V.getResNo() != 1)
3113 return SDValue();
3114
3115 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3116 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3117 return SDValue();
3118
3119 EVT VT = V->getValueType(0);
3120 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3121 return SDValue();
3122
3123 // If the result is masked, then no matter what kind of bool it is we can
3124 // return. If it isn't, then we need to make sure the bool type is either 0 or
3125 // 1 and not other values.
3126 if (Masked ||
3127 TLI.getBooleanContents(V.getValueType()) ==
3129 return V;
3130
3131 return SDValue();
3132}
3133
3134/// Given the operands of an add/sub operation, see if the 2nd operand is a
3135/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3136/// the opcode and bypass the mask operation.
3137static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3138 SelectionDAG &DAG, const SDLoc &DL) {
3139 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3140 N1 = N1.getOperand(0);
3141
3142 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3143 return SDValue();
3144
3145 EVT VT = N0.getValueType();
3146 SDValue N10 = N1.getOperand(0);
3147 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3148 N10 = N10.getOperand(0);
3149
3150 if (N10.getValueType() != VT)
3151 return SDValue();
3152
3153 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3154 return SDValue();
3155
3156 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3157 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3158 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3159}
3160
3161/// Helper for doing combines based on N0 and N1 being added to each other.
3162SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3163 SDNode *LocReference) {
3164 EVT VT = N0.getValueType();
3165 SDLoc DL(LocReference);
3166
3167 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3168 SDValue Y, N;
3169 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3170 return DAG.getNode(ISD::SUB, DL, VT, N0,
3171 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3172
3173 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3174 return V;
3175
3176 // Look for:
3177 // add (add x, 1), y
3178 // And if the target does not like this form then turn into:
3179 // sub y, (xor x, -1)
3180 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3181 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3182 // Limit this to after legalization if the add has wrap flags
3183 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3184 !N0->getFlags().hasNoSignedWrap()))) {
3185 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3186 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3187 }
3188
3189 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3190 // Hoist one-use subtraction by non-opaque constant:
3191 // (x - C) + y -> (x + y) - C
3192 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3193 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3194 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3195 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3196 }
3197 // Hoist one-use subtraction from non-opaque constant:
3198 // (C - x) + y -> (y - x) + C
3199 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3200 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3201 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3202 }
3203 }
3204
3205 // add (mul x, C), x -> mul x, C+1
3206 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3207 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3208 N0.hasOneUse()) {
3209 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3210 DAG.getConstant(1, DL, VT));
3211 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3212 }
3213
3214 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3215 // rather than 'add 0/-1' (the zext should get folded).
3216 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3217 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3218 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3220 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3221 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3222 }
3223
3224 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3225 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3226 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3227 if (TN->getVT() == MVT::i1) {
3228 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3229 DAG.getConstant(1, DL, VT));
3230 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3231 }
3232 }
3233
3234 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3235 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3236 N1.getResNo() == 0)
3237 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3238 N0, N1.getOperand(0), N1.getOperand(2));
3239
3240 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3242 if (SDValue Carry = getAsCarry(TLI, N1))
3243 return DAG.getNode(ISD::UADDO_CARRY, DL,
3244 DAG.getVTList(VT, Carry.getValueType()), N0,
3245 DAG.getConstant(0, DL, VT), Carry);
3246
3247 return SDValue();
3248}
3249
3250SDValue DAGCombiner::visitADDC(SDNode *N) {
3251 SDValue N0 = N->getOperand(0);
3252 SDValue N1 = N->getOperand(1);
3253 EVT VT = N0.getValueType();
3254 SDLoc DL(N);
3255
3256 // If the flag result is dead, turn this into an ADD.
3257 if (!N->hasAnyUseOfValue(1))
3258 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3259 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3260
3261 // canonicalize constant to RHS.
3262 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3263 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3264 if (N0C && !N1C)
3265 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3266
3267 // fold (addc x, 0) -> x + no carry out
3268 if (isNullConstant(N1))
3269 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3270 DL, MVT::Glue));
3271
3272 // If it cannot overflow, transform into an add.
3274 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3275 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3276
3277 return SDValue();
3278}
3279
3280/**
3281 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3282 * then the flip also occurs if computing the inverse is the same cost.
3283 * This function returns an empty SDValue in case it cannot flip the boolean
3284 * without increasing the cost of the computation. If you want to flip a boolean
3285 * no matter what, use DAG.getLogicalNOT.
3286 */
3288 const TargetLowering &TLI,
3289 bool Force) {
3290 if (Force && isa<ConstantSDNode>(V))
3291 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3292
3293 if (V.getOpcode() != ISD::XOR)
3294 return SDValue();
3295
3296 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3297 return V.getOperand(0);
3298 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3299 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3300 return SDValue();
3301}
3302
3303SDValue DAGCombiner::visitADDO(SDNode *N) {
3304 SDValue N0 = N->getOperand(0);
3305 SDValue N1 = N->getOperand(1);
3306 EVT VT = N0.getValueType();
3307 bool IsSigned = (ISD::SADDO == N->getOpcode());
3308
3309 EVT CarryVT = N->getValueType(1);
3310 SDLoc DL(N);
3311
3312 // If the flag result is dead, turn this into an ADD.
3313 if (!N->hasAnyUseOfValue(1))
3314 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3315 DAG.getUNDEF(CarryVT));
3316
3317 // canonicalize constant to RHS.
3320 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3321
3322 // fold (addo x, 0) -> x + no carry out
3323 if (isNullOrNullSplat(N1))
3324 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3325
3326 // If it cannot overflow, transform into an add.
3327 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3328 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3329 DAG.getConstant(0, DL, CarryVT));
3330
3331 if (IsSigned) {
3332 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3333 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3334 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3335 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3336 } else {
3337 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3338 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3339 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3340 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3341 return CombineTo(
3342 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3343 }
3344
3345 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3346 return Combined;
3347
3348 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3349 return Combined;
3350 }
3351
3352 return SDValue();
3353}
3354
3355SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3356 EVT VT = N0.getValueType();
3357 if (VT.isVector())
3358 return SDValue();
3359
3360 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3361 // If Y + 1 cannot overflow.
3362 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3363 SDValue Y = N1.getOperand(0);
3364 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3366 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3367 N1.getOperand(2));
3368 }
3369
3370 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3372 if (SDValue Carry = getAsCarry(TLI, N1))
3373 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3374 DAG.getConstant(0, SDLoc(N), VT), Carry);
3375
3376 return SDValue();
3377}
3378
3379SDValue DAGCombiner::visitADDE(SDNode *N) {
3380 SDValue N0 = N->getOperand(0);
3381 SDValue N1 = N->getOperand(1);
3382 SDValue CarryIn = N->getOperand(2);
3383
3384 // canonicalize constant to RHS
3385 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3386 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3387 if (N0C && !N1C)
3388 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3389 N1, N0, CarryIn);
3390
3391 // fold (adde x, y, false) -> (addc x, y)
3392 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3393 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3394
3395 return SDValue();
3396}
3397
3398SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3399 SDValue N0 = N->getOperand(0);
3400 SDValue N1 = N->getOperand(1);
3401 SDValue CarryIn = N->getOperand(2);
3402 SDLoc DL(N);
3403
3404 // canonicalize constant to RHS
3405 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3406 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3407 if (N0C && !N1C)
3408 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3409
3410 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3411 if (isNullConstant(CarryIn)) {
3412 if (!LegalOperations ||
3413 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3414 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3415 }
3416
3417 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3418 if (isNullConstant(N0) && isNullConstant(N1)) {
3419 EVT VT = N0.getValueType();
3420 EVT CarryVT = CarryIn.getValueType();
3421 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3422 AddToWorklist(CarryExt.getNode());
3423 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3424 DAG.getConstant(1, DL, VT)),
3425 DAG.getConstant(0, DL, CarryVT));
3426 }
3427
3428 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3429 return Combined;
3430
3431 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3432 return Combined;
3433
3434 // We want to avoid useless duplication.
3435 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3436 // not a binary operation, this is not really possible to leverage this
3437 // existing mechanism for it. However, if more operations require the same
3438 // deduplication logic, then it may be worth generalize.
3439 SDValue Ops[] = {N1, N0, CarryIn};
3440 SDNode *CSENode =
3441 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3442 if (CSENode)
3443 return SDValue(CSENode, 0);
3444
3445 return SDValue();
3446}
3447
3448/**
3449 * If we are facing some sort of diamond carry propagation pattern try to
3450 * break it up to generate something like:
3451 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3452 *
3453 * The end result is usually an increase in operation required, but because the
3454 * carry is now linearized, other transforms can kick in and optimize the DAG.
3455 *
3456 * Patterns typically look something like
3457 * (uaddo A, B)
3458 * / \
3459 * Carry Sum
3460 * | \
3461 * | (uaddo_carry *, 0, Z)
3462 * | /
3463 * \ Carry
3464 * | /
3465 * (uaddo_carry X, *, *)
3466 *
3467 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3468 * produce a combine with a single path for carry propagation.
3469 */
3471 SelectionDAG &DAG, SDValue X,
3472 SDValue Carry0, SDValue Carry1,
3473 SDNode *N) {
3474 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3475 return SDValue();
3476 if (Carry1.getOpcode() != ISD::UADDO)
3477 return SDValue();
3478
3479 SDValue Z;
3480
3481 /**
3482 * First look for a suitable Z. It will present itself in the form of
3483 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3484 */
3485 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3486 isNullConstant(Carry0.getOperand(1))) {
3487 Z = Carry0.getOperand(2);
3488 } else if (Carry0.getOpcode() == ISD::UADDO &&
3489 isOneConstant(Carry0.getOperand(1))) {
3490 EVT VT = Carry0->getValueType(1);
3491 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3492 } else {
3493 // We couldn't find a suitable Z.
3494 return SDValue();
3495 }
3496
3497
3498 auto cancelDiamond = [&](SDValue A,SDValue B) {
3499 SDLoc DL(N);
3500 SDValue NewY =
3501 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3502 Combiner.AddToWorklist(NewY.getNode());
3503 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3504 DAG.getConstant(0, DL, X.getValueType()),
3505 NewY.getValue(1));
3506 };
3507
3508 /**
3509 * (uaddo A, B)
3510 * |
3511 * Sum
3512 * |
3513 * (uaddo_carry *, 0, Z)
3514 */
3515 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3516 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3517 }
3518
3519 /**
3520 * (uaddo_carry A, 0, Z)
3521 * |
3522 * Sum
3523 * |
3524 * (uaddo *, B)
3525 */
3526 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3527 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3528 }
3529
3530 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3531 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3532 }
3533
3534 return SDValue();
3535}
3536
3537// If we are facing some sort of diamond carry/borrow in/out pattern try to
3538// match patterns like:
3539//
3540// (uaddo A, B) CarryIn
3541// | \ |
3542// | \ |
3543// PartialSum PartialCarryOutX /
3544// | | /
3545// | ____|____________/
3546// | / |
3547// (uaddo *, *) \________
3548// | \ \
3549// | \ |
3550// | PartialCarryOutY |
3551// | \ |
3552// | \ /
3553// AddCarrySum | ______/
3554// | /
3555// CarryOut = (or *, *)
3556//
3557// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3558//
3559// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3560//
3561// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3562// with a single path for carry/borrow out propagation.
3564 SDValue N0, SDValue N1, SDNode *N) {
3565 SDValue Carry0 = getAsCarry(TLI, N0);
3566 if (!Carry0)
3567 return SDValue();
3568 SDValue Carry1 = getAsCarry(TLI, N1);
3569 if (!Carry1)
3570 return SDValue();
3571
3572 unsigned Opcode = Carry0.getOpcode();
3573 if (Opcode != Carry1.getOpcode())
3574 return SDValue();
3575 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3576 return SDValue();
3577 // Guarantee identical type of CarryOut
3578 EVT CarryOutType = N->getValueType(0);
3579 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3580 CarryOutType != Carry1.getValue(1).getValueType())
3581 return SDValue();
3582
3583 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3584 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3585 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3586 std::swap(Carry0, Carry1);
3587
3588 // Check if nodes are connected in expected way.
3589 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3590 Carry1.getOperand(1) != Carry0.getValue(0))
3591 return SDValue();
3592
3593 // The carry in value must be on the righthand side for subtraction.
3594 unsigned CarryInOperandNum =
3595 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3596 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3597 return SDValue();
3598 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3599
3600 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3601 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3602 return SDValue();
3603
3604 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3605 CarryIn = getAsCarry(TLI, CarryIn, true);
3606 if (!CarryIn)
3607 return SDValue();
3608
3609 SDLoc DL(N);
3610 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3611 Carry1->getValueType(0));
3612 SDValue Merged =
3613 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3614 Carry0.getOperand(1), CarryIn);
3615
3616 // Please note that because we have proven that the result of the UADDO/USUBO
3617 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3618 // therefore prove that if the first UADDO/USUBO overflows, the second
3619 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3620 // maximum value.
3621 //
3622 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3623 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3624 //
3625 // This is important because it means that OR and XOR can be used to merge
3626 // carry flags; and that AND can return a constant zero.
3627 //
3628 // TODO: match other operations that can merge flags (ADD, etc)
3629 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3630 if (N->getOpcode() == ISD::AND)
3631 return DAG.getConstant(0, DL, CarryOutType);
3632 return Merged.getValue(1);
3633}
3634
3635SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3636 SDValue CarryIn, SDNode *N) {
3637 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3638 // carry.
3639 if (isBitwiseNot(N0))
3640 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3641 SDLoc DL(N);
3642 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3643 N0.getOperand(0), NotC);
3644 return CombineTo(
3645 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3646 }
3647
3648 // Iff the flag result is dead:
3649 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3650 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3651 // or the dependency between the instructions.
3652 if ((N0.getOpcode() == ISD::ADD ||
3653 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3654 N0.getValue(1) != CarryIn)) &&
3655 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3656 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3657 N0.getOperand(0), N0.getOperand(1), CarryIn);
3658
3659 /**
3660 * When one of the uaddo_carry argument is itself a carry, we may be facing
3661 * a diamond carry propagation. In which case we try to transform the DAG
3662 * to ensure linear carry propagation if that is possible.
3663 */
3664 if (auto Y = getAsCarry(TLI, N1)) {
3665 // Because both are carries, Y and Z can be swapped.
3666 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3667 return R;
3668 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3669 return R;
3670 }
3671
3672 return SDValue();
3673}
3674
3675SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3676 SDValue CarryIn, SDNode *N) {
3677 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3678 if (isBitwiseNot(N0)) {
3679 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3680 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3681 N0.getOperand(0), NotC);
3682 }
3683
3684 return SDValue();
3685}
3686
3687SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3688 SDValue N0 = N->getOperand(0);
3689 SDValue N1 = N->getOperand(1);
3690 SDValue CarryIn = N->getOperand(2);
3691 SDLoc DL(N);
3692
3693 // canonicalize constant to RHS
3694 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3695 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3696 if (N0C && !N1C)
3697 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3698
3699 // fold (saddo_carry x, y, false) -> (saddo x, y)
3700 if (isNullConstant(CarryIn)) {
3701 if (!LegalOperations ||
3702 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3703 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3704 }
3705
3706 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3707 return Combined;
3708
3709 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3710 return Combined;
3711
3712 return SDValue();
3713}
3714
3715// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3716// clamp/truncation if necessary.
3717static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3718 SDValue RHS, SelectionDAG &DAG,
3719 const SDLoc &DL) {
3720 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3721 "Illegal truncation");
3722
3723 if (DstVT == SrcVT)
3724 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3725
3726 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3727 // clamping RHS.
3729 DstVT.getScalarSizeInBits());
3730 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3731 return SDValue();
3732
3733 SDValue SatLimit =
3735 DstVT.getScalarSizeInBits()),
3736 DL, SrcVT);
3737 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3738 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3739 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3740 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3741}
3742
3743// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3744// usubsat(a,b), optionally as a truncated type.
3745SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3746 if (N->getOpcode() != ISD::SUB ||
3747 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3748 return SDValue();
3749
3750 EVT SubVT = N->getValueType(0);
3751 SDValue Op0 = N->getOperand(0);
3752 SDValue Op1 = N->getOperand(1);
3753
3754 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3755 // they may be converted to usubsat(a,b).
3756 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3757 SDValue MaxLHS = Op0.getOperand(0);
3758 SDValue MaxRHS = Op0.getOperand(1);
3759 if (MaxLHS == Op1)
3760 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3761 if (MaxRHS == Op1)
3762 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3763 }
3764
3765 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3766 SDValue MinLHS = Op1.getOperand(0);
3767 SDValue MinRHS = Op1.getOperand(1);
3768 if (MinLHS == Op0)
3769 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3770 if (MinRHS == Op0)
3771 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3772 }
3773
3774 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3775 if (Op1.getOpcode() == ISD::TRUNCATE &&
3776 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3777 Op1.getOperand(0).hasOneUse()) {
3778 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3779 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3780 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3781 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3782 DAG, DL);
3783 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3784 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3785 DAG, DL);
3786 }
3787
3788 return SDValue();
3789}
3790
3791// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3792// counting leading ones. Broadly, it replaces the substraction with a left
3793// shift.
3794//
3795// * DAG Legalisation Pattern:
3796//
3797// (sub (ctlz (zeroextend (not Src)))
3798// BitWidthDiff)
3799//
3800// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3801// -->
3802//
3803// (ctlz_zero_undef (not (shl (anyextend Src)
3804// BitWidthDiff)))
3805//
3806// * Type Legalisation Pattern:
3807//
3808// (sub (ctlz (and (xor Src XorMask)
3809// AndMask))
3810// BitWidthDiff)
3811//
3812// if AndMask has only trailing ones
3813// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3814// and XorMask has more trailing ones than AndMask
3815// -->
3816//
3817// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3818template <class MatchContextClass>
3820 const SDLoc DL(N);
3821 SDValue N0 = N->getOperand(0);
3822 EVT VT = N0.getValueType();
3823 unsigned BitWidth = VT.getScalarSizeInBits();
3824
3825 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3826
3827 APInt AndMask;
3828 APInt XorMask;
3829 APInt BitWidthDiff;
3830
3831 SDValue CtlzOp;
3832 SDValue Src;
3833
3834 if (!sd_context_match(
3835 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3836 return SDValue();
3837
3838 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3839 // DAG Legalisation Pattern:
3840 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3841 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3842 return SDValue();
3843
3844 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3845 } else if (sd_context_match(CtlzOp, Matcher,
3846 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3847 m_ConstInt(AndMask)))) {
3848 // Type Legalisation Pattern:
3849 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3850 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3851 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3852 return SDValue();
3853 } else
3854 return SDValue();
3855
3856 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
3857 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
3858 SDValue Not =
3859 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
3860
3861 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
3862}
3863
3864// Since it may not be valid to emit a fold to zero for vector initializers
3865// check if we can before folding.
3866static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3867 SelectionDAG &DAG, bool LegalOperations) {
3868 if (!VT.isVector())
3869 return DAG.getConstant(0, DL, VT);
3870 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3871 return DAG.getConstant(0, DL, VT);
3872 return SDValue();
3873}
3874
3875SDValue DAGCombiner::visitSUB(SDNode *N) {
3876 SDValue N0 = N->getOperand(0);
3877 SDValue N1 = N->getOperand(1);
3878 EVT VT = N0.getValueType();
3879 unsigned BitWidth = VT.getScalarSizeInBits();
3880 SDLoc DL(N);
3881
3882 auto PeekThroughFreeze = [](SDValue N) {
3883 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3884 return N->getOperand(0);
3885 return N;
3886 };
3887
3888 if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
3889 return V;
3890
3891 // fold (sub x, x) -> 0
3892 // FIXME: Refactor this and xor and other similar operations together.
3893 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3894 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3895
3896 // fold (sub c1, c2) -> c3
3897 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3898 return C;
3899
3900 // fold vector ops
3901 if (VT.isVector()) {
3902 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3903 return FoldedVOp;
3904
3905 // fold (sub x, 0) -> x, vector edition
3907 return N0;
3908 }
3909
3910 if (SDValue NewSel = foldBinOpIntoSelect(N))
3911 return NewSel;
3912
3913 // fold (sub x, c) -> (add x, -c)
3915 return DAG.getNode(ISD::ADD, DL, VT, N0,
3916 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3917
3918 if (isNullOrNullSplat(N0)) {
3919 // Right-shifting everything out but the sign bit followed by negation is
3920 // the same as flipping arithmetic/logical shift type without the negation:
3921 // -(X >>u 31) -> (X >>s 31)
3922 // -(X >>s 31) -> (X >>u 31)
3923 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3925 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3926 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3927 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3928 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3929 }
3930 }
3931
3932 // 0 - X --> 0 if the sub is NUW.
3933 if (N->getFlags().hasNoUnsignedWrap())
3934 return N0;
3935
3937 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3938 // N1 must be 0 because negating the minimum signed value is undefined.
3939 if (N->getFlags().hasNoSignedWrap())
3940 return N0;
3941
3942 // 0 - X --> X if X is 0 or the minimum signed value.
3943 return N1;
3944 }
3945
3946 // Convert 0 - abs(x).
3947 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3949 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3950 return Result;
3951
3952 // Similar to the previous rule, but this time targeting an expanded abs.
3953 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3954 // as well as
3955 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3956 // Note that these two are applicable to both signed and unsigned min/max.
3957 SDValue X;
3958 SDValue S0;
3959 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
3960 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
3961 m_UMax(m_Value(X), NegPat),
3962 m_SMin(m_Value(X), NegPat),
3963 m_UMin(m_Value(X), NegPat))))) {
3964 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
3965 if (hasOperation(NewOpc, VT))
3966 return DAG.getNode(NewOpc, DL, VT, X, S0);
3967 }
3968
3969 // Fold neg(splat(neg(x)) -> splat(x)
3970 if (VT.isVector()) {
3971 SDValue N1S = DAG.getSplatValue(N1, true);
3972 if (N1S && N1S.getOpcode() == ISD::SUB &&
3973 isNullConstant(N1S.getOperand(0)))
3974 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3975 }
3976 }
3977
3978 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3980 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3981
3982 // fold (A - (0-B)) -> A+B
3983 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3984 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3985
3986 // fold A-(A-B) -> B
3987 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3988 return N1.getOperand(1);
3989
3990 // fold (A+B)-A -> B
3991 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3992 return N0.getOperand(1);
3993
3994 // fold (A+B)-B -> A
3995 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3996 return N0.getOperand(0);
3997
3998 // fold (A+C1)-C2 -> A+(C1-C2)
3999 if (N0.getOpcode() == ISD::ADD) {
4000 SDValue N01 = N0.getOperand(1);
4001 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4002 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4003 }
4004
4005 // fold C2-(A+C1) -> (C2-C1)-A
4006 if (N1.getOpcode() == ISD::ADD) {
4007 SDValue N11 = N1.getOperand(1);
4008 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4009 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4010 }
4011
4012 // fold (A-C1)-C2 -> A-(C1+C2)
4013 if (N0.getOpcode() == ISD::SUB) {
4014 SDValue N01 = N0.getOperand(1);
4015 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4016 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4017 }
4018
4019 // fold (c1-A)-c2 -> (c1-c2)-A
4020 if (N0.getOpcode() == ISD::SUB) {
4021 SDValue N00 = N0.getOperand(0);
4022 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4023 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4024 }
4025
4026 SDValue A, B, C;
4027
4028 // fold ((A+(B+C))-B) -> A+C
4029 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4030 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4031
4032 // fold ((A+(B-C))-B) -> A-C
4033 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4034 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4035
4036 // fold ((A-(B-C))-C) -> A-B
4037 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4038 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4039
4040 // fold (A-(B-C)) -> A+(C-B)
4041 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4042 return DAG.getNode(ISD::ADD, DL, VT, N0,
4043 DAG.getNode(ISD::SUB, DL, VT, C, B));
4044
4045 // A - (A & B) -> A & (~B)
4046 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4047 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4048 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4049
4050 // fold (A - (-B * C)) -> (A + (B * C))
4051 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4052 return DAG.getNode(ISD::ADD, DL, VT, N0,
4053 DAG.getNode(ISD::MUL, DL, VT, B, C));
4054
4055 // If either operand of a sub is undef, the result is undef
4056 if (N0.isUndef())
4057 return N0;
4058 if (N1.isUndef())
4059 return N1;
4060
4061 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4062 return V;
4063
4064 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4065 return V;
4066
4067 // Try to match AVGCEIL fixedwidth pattern
4068 if (SDValue V = foldSubToAvg(N, DL))
4069 return V;
4070
4071 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4072 return V;
4073
4074 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4075 return V;
4076
4077 // (A - B) - 1 -> add (xor B, -1), A
4079 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4080
4081 // Look for:
4082 // sub y, (xor x, -1)
4083 // And if the target does not like this form then turn into:
4084 // add (add x, y), 1
4085 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4086 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4087 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4088 }
4089
4090 // Hoist one-use addition by non-opaque constant:
4091 // (x + C) - y -> (x - y) + C
4092 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4093 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4094 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4095 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4096 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4097 }
4098 // y - (x + C) -> (y - x) - C
4099 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4100 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4101 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4102 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4103 }
4104 // (x - C) - y -> (x - y) - C
4105 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4106 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4107 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4108 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4109 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4110 }
4111 // (C - x) - y -> C - (x + y)
4112 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4113 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4114 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4115 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4116 }
4117
4118 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4119 // rather than 'sub 0/1' (the sext should get folded).
4120 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4121 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4122 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4123 TLI.getBooleanContents(VT) ==
4125 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4126 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4127 }
4128
4129 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4130 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4132 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4133 return DAG.getNode(ISD::ABS, DL, VT, A);
4134
4135 // If the relocation model supports it, consider symbol offsets.
4136 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4137 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4138 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4139 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4140 if (GA->getGlobal() == GB->getGlobal())
4141 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4142 DL, VT);
4143 }
4144
4145 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4146 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4147 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4148 if (TN->getVT() == MVT::i1) {
4149 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4150 DAG.getConstant(1, DL, VT));
4151 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4152 }
4153 }
4154
4155 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4156 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4157 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4158 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4159 }
4160
4161 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4162 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4163 APInt NewStep = -N1.getConstantOperandAPInt(0);
4164 return DAG.getNode(ISD::ADD, DL, VT, N0,
4165 DAG.getStepVector(DL, VT, NewStep));
4166 }
4167
4168 // Prefer an add for more folding potential and possibly better codegen:
4169 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4170 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4171 SDValue ShAmt = N1.getOperand(1);
4172 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4173 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4174 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4175 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4176 }
4177 }
4178
4179 // As with the previous fold, prefer add for more folding potential.
4180 // Subtracting SMIN/0 is the same as adding SMIN/0:
4181 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4182 if (N1.getOpcode() == ISD::SHL) {
4184 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4185 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4186 }
4187
4188 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4189 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4190 N0.getResNo() == 0 && N0.hasOneUse())
4191 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4192 N0.getOperand(0), N1, N0.getOperand(2));
4193
4195 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4196 if (SDValue Carry = getAsCarry(TLI, N0)) {
4197 SDValue X = N1;
4198 SDValue Zero = DAG.getConstant(0, DL, VT);
4199 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4200 return DAG.getNode(ISD::UADDO_CARRY, DL,
4201 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4202 Carry);
4203 }
4204 }
4205
4206 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4207 // sub C0, X --> xor X, C0
4208 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4209 if (!C0->isOpaque()) {
4210 const APInt &C0Val = C0->getAPIntValue();
4211 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4212 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4213 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4214 }
4215 }
4216
4217 // smax(a,b) - smin(a,b) --> abds(a,b)
4218 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4219 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4221 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4222
4223 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4224 if (hasOperation(ISD::ABDS, VT) &&
4225 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4227 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4228
4229 // umax(a,b) - umin(a,b) --> abdu(a,b)
4230 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4231 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4233 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4234
4235 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4236 if (hasOperation(ISD::ABDU, VT) &&
4237 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4239 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4240
4241 return SDValue();
4242}
4243
4244SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4245 unsigned Opcode = N->getOpcode();
4246 SDValue N0 = N->getOperand(0);
4247 SDValue N1 = N->getOperand(1);
4248 EVT VT = N0.getValueType();
4249 bool IsSigned = Opcode == ISD::SSUBSAT;
4250 SDLoc DL(N);
4251
4252 // fold (sub_sat x, undef) -> 0
4253 if (N0.isUndef() || N1.isUndef())
4254 return DAG.getConstant(0, DL, VT);
4255
4256 // fold (sub_sat x, x) -> 0
4257 if (N0 == N1)
4258 return DAG.getConstant(0, DL, VT);
4259
4260 // fold (sub_sat c1, c2) -> c3
4261 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4262 return C;
4263
4264 // fold vector ops
4265 if (VT.isVector()) {
4266 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4267 return FoldedVOp;
4268
4269 // fold (sub_sat x, 0) -> x, vector edition
4271 return N0;
4272 }
4273
4274 // fold (sub_sat x, 0) -> x
4275 if (isNullConstant(N1))
4276 return N0;
4277
4278 // If it cannot overflow, transform into an sub.
4279 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4280 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4281
4282 return SDValue();
4283}
4284
4285SDValue DAGCombiner::visitSUBC(SDNode *N) {
4286 SDValue N0 = N->getOperand(0);
4287 SDValue N1 = N->getOperand(1);
4288 EVT VT = N0.getValueType();
4289 SDLoc DL(N);
4290
4291 // If the flag result is dead, turn this into an SUB.
4292 if (!N->hasAnyUseOfValue(1))
4293 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4294 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4295
4296 // fold (subc x, x) -> 0 + no borrow
4297 if (N0 == N1)
4298 return CombineTo(N, DAG.getConstant(0, DL, VT),
4299 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4300
4301 // fold (subc x, 0) -> x + no borrow
4302 if (isNullConstant(N1))
4303 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4304
4305 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4306 if (isAllOnesConstant(N0))
4307 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4308 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4309
4310 return SDValue();
4311}
4312
4313SDValue DAGCombiner::visitSUBO(SDNode *N) {
4314 SDValue N0 = N->getOperand(0);
4315 SDValue N1 = N->getOperand(1);
4316 EVT VT = N0.getValueType();
4317 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4318
4319 EVT CarryVT = N->getValueType(1);
4320 SDLoc DL(N);
4321
4322 // If the flag result is dead, turn this into an SUB.
4323 if (!N->hasAnyUseOfValue(1))
4324 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4325 DAG.getUNDEF(CarryVT));
4326
4327 // fold (subo x, x) -> 0 + no borrow
4328 if (N0 == N1)
4329 return CombineTo(N, DAG.getConstant(0, DL, VT),
4330 DAG.getConstant(0, DL, CarryVT));
4331
4332 // fold (subox, c) -> (addo x, -c)
4334 if (IsSigned && !N1C->isMinSignedValue())
4335 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4336 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4337
4338 // fold (subo x, 0) -> x + no borrow
4339 if (isNullOrNullSplat(N1))
4340 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4341
4342 // If it cannot overflow, transform into an sub.
4343 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4344 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4345 DAG.getConstant(0, DL, CarryVT));
4346
4347 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4348 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4349 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4350 DAG.getConstant(0, DL, CarryVT));
4351
4352 return SDValue();
4353}
4354
4355SDValue DAGCombiner::visitSUBE(SDNode *N) {
4356 SDValue N0 = N->getOperand(0);
4357 SDValue N1 = N->getOperand(1);
4358 SDValue CarryIn = N->getOperand(2);
4359
4360 // fold (sube x, y, false) -> (subc x, y)
4361 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4362 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4363
4364 return SDValue();
4365}
4366
4367SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4368 SDValue N0 = N->getOperand(0);
4369 SDValue N1 = N->getOperand(1);
4370 SDValue CarryIn = N->getOperand(2);
4371
4372 // fold (usubo_carry x, y, false) -> (usubo x, y)
4373 if (isNullConstant(CarryIn)) {
4374 if (!LegalOperations ||
4375 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4376 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4377 }
4378
4379 return SDValue();
4380}
4381
4382SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4383 SDValue N0 = N->getOperand(0);
4384 SDValue N1 = N->getOperand(1);
4385 SDValue CarryIn = N->getOperand(2);
4386
4387 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4388 if (isNullConstant(CarryIn)) {
4389 if (!LegalOperations ||
4390 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4391 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4392 }
4393
4394 return SDValue();
4395}
4396
4397// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4398// UMULFIXSAT here.
4399SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4400 SDValue N0 = N->getOperand(0);
4401 SDValue N1 = N->getOperand(1);
4402 SDValue Scale = N->getOperand(2);
4403 EVT VT = N0.getValueType();
4404
4405 // fold (mulfix x, undef, scale) -> 0
4406 if (N0.isUndef() || N1.isUndef())
4407 return DAG.getConstant(0, SDLoc(N), VT);
4408
4409 // Canonicalize constant to RHS (vector doesn't have to splat)
4412 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4413
4414 // fold (mulfix x, 0, scale) -> 0
4415 if (isNullConstant(N1))
4416 return DAG.getConstant(0, SDLoc(N), VT);
4417
4418 return SDValue();
4419}
4420
4421template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4422 SDValue N0 = N->getOperand(0);
4423 SDValue N1 = N->getOperand(1);
4424 EVT VT = N0.getValueType();
4425 unsigned BitWidth = VT.getScalarSizeInBits();
4426 SDLoc DL(N);
4427 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4428 MatchContextClass Matcher(DAG, TLI, N);
4429
4430 // fold (mul x, undef) -> 0
4431 if (N0.isUndef() || N1.isUndef())
4432 return DAG.getConstant(0, DL, VT);
4433
4434 // fold (mul c1, c2) -> c1*c2
4435 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4436 return C;
4437
4438 // canonicalize constant to RHS (vector doesn't have to splat)
4441 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4442
4443 bool N1IsConst = false;
4444 bool N1IsOpaqueConst = false;
4445 APInt ConstValue1;
4446
4447 // fold vector ops
4448 if (VT.isVector()) {
4449 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4450 if (!UseVP)
4451 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4452 return FoldedVOp;
4453
4454 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4455 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4456 "Splat APInt should be element width");
4457 } else {
4458 N1IsConst = isa<ConstantSDNode>(N1);
4459 if (N1IsConst) {
4460 ConstValue1 = N1->getAsAPIntVal();
4461 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4462 }
4463 }
4464
4465 // fold (mul x, 0) -> 0
4466 if (N1IsConst && ConstValue1.isZero())
4467 return N1;
4468
4469 // fold (mul x, 1) -> x
4470 if (N1IsConst && ConstValue1.isOne())
4471 return N0;
4472
4473 if (!UseVP)
4474 if (SDValue NewSel = foldBinOpIntoSelect(N))
4475 return NewSel;
4476
4477 // fold (mul x, -1) -> 0-x
4478 if (N1IsConst && ConstValue1.isAllOnes())
4479 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4480
4481 // fold (mul x, (1 << c)) -> x << c
4482 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4483 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4484 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4485 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4486 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4487 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4488 }
4489 }
4490
4491 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4492 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4493 unsigned Log2Val = (-ConstValue1).logBase2();
4494
4495 // FIXME: If the input is something that is easily negated (e.g. a
4496 // single-use add), we should put the negate there.
4497 return Matcher.getNode(
4498 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4499 Matcher.getNode(ISD::SHL, DL, VT, N0,
4500 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4501 }
4502
4503 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4504 // hi result is in use in case we hit this mid-legalization.
4505 if (!UseVP) {
4506 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4507 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4508 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4509 // TODO: Can we match commutable operands with getNodeIfExists?
4510 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4511 if (LoHi->hasAnyUseOfValue(1))
4512 return SDValue(LoHi, 0);
4513 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4514 if (LoHi->hasAnyUseOfValue(1))
4515 return SDValue(LoHi, 0);
4516 }
4517 }
4518 }
4519
4520 // Try to transform:
4521 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4522 // mul x, (2^N + 1) --> add (shl x, N), x
4523 // mul x, (2^N - 1) --> sub (shl x, N), x
4524 // Examples: x * 33 --> (x << 5) + x
4525 // x * 15 --> (x << 4) - x
4526 // x * -33 --> -((x << 5) + x)
4527 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4528 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4529 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4530 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4531 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4532 // x * 0xf800 --> (x << 16) - (x << 11)
4533 // x * -0x8800 --> -((x << 15) + (x << 11))
4534 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4535 if (!UseVP && N1IsConst &&
4536 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4537 // TODO: We could handle more general decomposition of any constant by
4538 // having the target set a limit on number of ops and making a
4539 // callback to determine that sequence (similar to sqrt expansion).
4540 unsigned MathOp = ISD::DELETED_NODE;
4541 APInt MulC = ConstValue1.abs();
4542 // The constant `2` should be treated as (2^0 + 1).
4543 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4544 MulC.lshrInPlace(TZeros);
4545 if ((MulC - 1).isPowerOf2())
4546 MathOp = ISD::ADD;
4547 else if ((MulC + 1).isPowerOf2())
4548 MathOp = ISD::SUB;
4549
4550 if (MathOp != ISD::DELETED_NODE) {
4551 unsigned ShAmt =
4552 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4553 ShAmt += TZeros;
4554 assert(ShAmt < BitWidth &&
4555 "multiply-by-constant generated out of bounds shift");
4556 SDValue Shl =
4557 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4558 SDValue R =
4559 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4560 DAG.getNode(ISD::SHL, DL, VT, N0,
4561 DAG.getConstant(TZeros, DL, VT)))
4562 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4563 if (ConstValue1.isNegative())
4564 R = DAG.getNegative(R, DL, VT);
4565 return R;
4566 }
4567 }
4568
4569 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4570 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4571 SDValue N01 = N0.getOperand(1);
4572 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4573 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4574 }
4575
4576 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4577 // use.
4578 {
4579 SDValue Sh, Y;
4580
4581 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4582 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4584 Sh = N0; Y = N1;
4585 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4587 Sh = N1; Y = N0;
4588 }
4589
4590 if (Sh.getNode()) {
4591 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4592 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4593 }
4594 }
4595
4596 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4597 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4601 return Matcher.getNode(
4602 ISD::ADD, DL, VT,
4603 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4604 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4605
4606 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4608 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4609 const APInt &C0 = N0.getConstantOperandAPInt(0);
4610 const APInt &C1 = NC1->getAPIntValue();
4611 return DAG.getVScale(DL, VT, C0 * C1);
4612 }
4613
4614 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4615 APInt MulVal;
4616 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4617 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4618 const APInt &C0 = N0.getConstantOperandAPInt(0);
4619 APInt NewStep = C0 * MulVal;
4620 return DAG.getStepVector(DL, VT, NewStep);
4621 }
4622
4623 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4624 SDValue X;
4625 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4627 N, Matcher,
4629 m_Deferred(X)))) {
4630 return Matcher.getNode(ISD::ABS, DL, VT, X);
4631 }
4632
4633 // Fold ((mul x, 0/undef) -> 0,
4634 // (mul x, 1) -> x) -> x)
4635 // -> and(x, mask)
4636 // We can replace vectors with '0' and '1' factors with a clearing mask.
4637 if (VT.isFixedLengthVector()) {
4638 unsigned NumElts = VT.getVectorNumElements();
4639 SmallBitVector ClearMask;
4640 ClearMask.reserve(NumElts);
4641 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4642 if (!V || V->isZero()) {
4643 ClearMask.push_back(true);
4644 return true;
4645 }
4646 ClearMask.push_back(false);
4647 return V->isOne();
4648 };
4649 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4650 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4651 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4652 EVT LegalSVT = N1.getOperand(0).getValueType();
4653 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4654 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4656 for (unsigned I = 0; I != NumElts; ++I)
4657 if (ClearMask[I])
4658 Mask[I] = Zero;
4659 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4660 }
4661 }
4662
4663 // reassociate mul
4664 // TODO: Change reassociateOps to support vp ops.
4665 if (!UseVP)
4666 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4667 return RMUL;
4668
4669 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4670 // TODO: Change reassociateReduction to support vp ops.
4671 if (!UseVP)
4672 if (SDValue SD =
4673 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4674 return SD;
4675
4676 // Simplify the operands using demanded-bits information.
4678 return SDValue(N, 0);
4679
4680 return SDValue();
4681}
4682
4683/// Return true if divmod libcall is available.
4685 const TargetLowering &TLI) {
4686 RTLIB::Libcall LC;
4687 EVT NodeType = Node->getValueType(0);
4688 if (!NodeType.isSimple())
4689 return false;
4690 switch (NodeType.getSimpleVT().SimpleTy) {
4691 default: return false; // No libcall for vector types.
4692 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4693 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4694 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4695 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4696 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4697 }
4698
4699 return TLI.getLibcallName(LC) != nullptr;
4700}
4701
4702/// Issue divrem if both quotient and remainder are needed.
4703SDValue DAGCombiner::useDivRem(SDNode *Node) {
4704 if (Node->use_empty())
4705 return SDValue(); // This is a dead node, leave it alone.
4706
4707 unsigned Opcode = Node->getOpcode();
4708 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4709 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4710
4711 // DivMod lib calls can still work on non-legal types if using lib-calls.
4712 EVT VT = Node->getValueType(0);
4713 if (VT.isVector() || !VT.isInteger())
4714 return SDValue();
4715
4716 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4717 return SDValue();
4718
4719 // If DIVREM is going to get expanded into a libcall,
4720 // but there is no libcall available, then don't combine.
4721 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4723 return SDValue();
4724
4725 // If div is legal, it's better to do the normal expansion
4726 unsigned OtherOpcode = 0;
4727 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4728 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4729 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4730 return SDValue();
4731 } else {
4732 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4733 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4734 return SDValue();
4735 }
4736
4737 SDValue Op0 = Node->getOperand(0);
4738 SDValue Op1 = Node->getOperand(1);
4739 SDValue combined;
4740 for (SDNode *User : Op0->users()) {
4741 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4742 User->use_empty())
4743 continue;
4744 // Convert the other matching node(s), too;
4745 // otherwise, the DIVREM may get target-legalized into something
4746 // target-specific that we won't be able to recognize.
4747 unsigned UserOpc = User->getOpcode();
4748 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4749 User->getOperand(0) == Op0 &&
4750 User->getOperand(1) == Op1) {
4751 if (!combined) {
4752 if (UserOpc == OtherOpcode) {
4753 SDVTList VTs = DAG.getVTList(VT, VT);
4754 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4755 } else if (UserOpc == DivRemOpc) {
4756 combined = SDValue(User, 0);
4757 } else {
4758 assert(UserOpc == Opcode);
4759 continue;
4760 }
4761 }
4762 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4763 CombineTo(User, combined);
4764 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4765 CombineTo(User, combined.getValue(1));
4766 }
4767 }
4768 return combined;
4769}
4770
4772 SDValue N0 = N->getOperand(0);
4773 SDValue N1 = N->getOperand(1);
4774 EVT VT = N->getValueType(0);
4775 SDLoc DL(N);
4776
4777 unsigned Opc = N->getOpcode();
4778 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4780
4781 // X / undef -> undef
4782 // X % undef -> undef
4783 // X / 0 -> undef
4784 // X % 0 -> undef
4785 // NOTE: This includes vectors where any divisor element is zero/undef.
4786 if (DAG.isUndef(Opc, {N0, N1}))
4787 return DAG.getUNDEF(VT);
4788
4789 // undef / X -> 0
4790 // undef % X -> 0
4791 if (N0.isUndef())
4792 return DAG.getConstant(0, DL, VT);
4793
4794 // 0 / X -> 0
4795 // 0 % X -> 0
4797 if (N0C && N0C->isZero())
4798 return N0;
4799
4800 // X / X -> 1
4801 // X % X -> 0
4802 if (N0 == N1)
4803 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4804
4805 // X / 1 -> X
4806 // X % 1 -> 0
4807 // If this is a boolean op (single-bit element type), we can't have
4808 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4809 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4810 // it's a 1.
4811 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4812 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4813
4814 return SDValue();
4815}
4816
4817SDValue DAGCombiner::visitSDIV(SDNode *N) {
4818 SDValue N0 = N->getOperand(0);
4819 SDValue N1 = N->getOperand(1);
4820 EVT VT = N->getValueType(0);
4821 EVT CCVT = getSetCCResultType(VT);
4822 SDLoc DL(N);
4823
4824 // fold (sdiv c1, c2) -> c1/c2
4825 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4826 return C;
4827
4828 // fold vector ops
4829 if (VT.isVector())
4830 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4831 return FoldedVOp;
4832
4833 // fold (sdiv X, -1) -> 0-X
4835 if (N1C && N1C->isAllOnes())
4836 return DAG.getNegative(N0, DL, VT);
4837
4838 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4839 if (N1C && N1C->isMinSignedValue())
4840 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4841 DAG.getConstant(1, DL, VT),
4842 DAG.getConstant(0, DL, VT));
4843
4844 if (SDValue V = simplifyDivRem(N, DAG))
4845 return V;
4846
4847 if (SDValue NewSel = foldBinOpIntoSelect(N))
4848 return NewSel;
4849
4850 // If we know the sign bits of both operands are zero, strength reduce to a
4851 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4852 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4853 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4854
4855 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4856 // If the corresponding remainder node exists, update its users with
4857 // (Dividend - (Quotient * Divisor).
4858 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4859 { N0, N1 })) {
4860 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4861 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4862 AddToWorklist(Mul.getNode());
4863 AddToWorklist(Sub.getNode());
4864 CombineTo(RemNode, Sub);
4865 }
4866 return V;
4867 }
4868
4869 // sdiv, srem -> sdivrem
4870 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4871 // true. Otherwise, we break the simplification logic in visitREM().
4873 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4874 if (SDValue DivRem = useDivRem(N))
4875 return DivRem;
4876
4877 return SDValue();
4878}
4879
4880static bool isDivisorPowerOfTwo(SDValue Divisor) {
4881 // Helper for determining whether a value is a power-2 constant scalar or a
4882 // vector of such elements.
4883 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4884 if (C->isZero() || C->isOpaque())
4885 return false;
4886 if (C->getAPIntValue().isPowerOf2())
4887 return true;
4888 if (C->getAPIntValue().isNegatedPowerOf2())
4889 return true;
4890 return false;
4891 };
4892
4893 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4894}
4895
4896SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4897 SDLoc DL(N);
4898 EVT VT = N->getValueType(0);
4899 EVT CCVT = getSetCCResultType(VT);
4900 unsigned BitWidth = VT.getScalarSizeInBits();
4901
4902 // fold (sdiv X, pow2) -> simple ops after legalize
4903 // FIXME: We check for the exact bit here because the generic lowering gives
4904 // better results in that case. The target-specific lowering should learn how
4905 // to handle exact sdivs efficiently.
4906 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4907 // Target-specific implementation of sdiv x, pow2.
4908 if (SDValue Res = BuildSDIVPow2(N))
4909 return Res;
4910
4911 // Create constants that are functions of the shift amount value.
4912 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4913 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4914 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4915 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4916 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4917 if (!isConstantOrConstantVector(Inexact))
4918 return SDValue();
4919
4920 // Splat the sign bit into the register
4921 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4922 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4923 AddToWorklist(Sign.getNode());
4924
4925 // Add (N0 < 0) ? abs2 - 1 : 0;
4926 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4927 AddToWorklist(Srl.getNode());
4928 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4929 AddToWorklist(Add.getNode());
4930 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4931 AddToWorklist(Sra.getNode());
4932
4933 // Special case: (sdiv X, 1) -> X
4934 // Special Case: (sdiv X, -1) -> 0-X
4935 SDValue One = DAG.getConstant(1, DL, VT);
4937 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4938 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4939 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4940 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4941
4942 // If dividing by a positive value, we're done. Otherwise, the result must
4943 // be negated.
4944 SDValue Zero = DAG.getConstant(0, DL, VT);
4945 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4946
4947 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4948 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4949 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4950 return Res;
4951 }
4952
4953 // If integer divide is expensive and we satisfy the requirements, emit an
4954 // alternate sequence. Targets may check function attributes for size/speed
4955 // trade-offs.
4958 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4959 if (SDValue Op = BuildSDIV(N))
4960 return Op;
4961
4962 return SDValue();
4963}
4964
4965SDValue DAGCombiner::visitUDIV(SDNode *N) {
4966 SDValue N0 = N->getOperand(0);
4967 SDValue N1 = N->getOperand(1);
4968 EVT VT = N->getValueType(0);
4969 EVT CCVT = getSetCCResultType(VT);
4970 SDLoc DL(N);
4971
4972 // fold (udiv c1, c2) -> c1/c2
4973 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4974 return C;
4975
4976 // fold vector ops
4977 if (VT.isVector())
4978 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4979 return FoldedVOp;
4980
4981 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4983 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4984 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4985 DAG.getConstant(1, DL, VT),
4986 DAG.getConstant(0, DL, VT));
4987 }
4988
4989 if (SDValue V = simplifyDivRem(N, DAG))
4990 return V;
4991
4992 if (SDValue NewSel = foldBinOpIntoSelect(N))
4993 return NewSel;
4994
4995 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4996 // If the corresponding remainder node exists, update its users with
4997 // (Dividend - (Quotient * Divisor).
4998 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4999 { N0, N1 })) {
5000 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5001 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5002 AddToWorklist(Mul.getNode());
5003 AddToWorklist(Sub.getNode());
5004 CombineTo(RemNode, Sub);
5005 }
5006 return V;
5007 }
5008
5009 // sdiv, srem -> sdivrem
5010 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5011 // true. Otherwise, we break the simplification logic in visitREM().
5013 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5014 if (SDValue DivRem = useDivRem(N))
5015 return DivRem;
5016
5017 // Simplify the operands using demanded-bits information.
5018 // We don't have demanded bits support for UDIV so this just enables constant
5019 // folding based on known bits.
5021 return SDValue(N, 0);
5022
5023 return SDValue();
5024}
5025
5026SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5027 SDLoc DL(N);
5028 EVT VT = N->getValueType(0);
5029
5030 // fold (udiv x, (1 << c)) -> x >>u c
5031 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5032 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5033 AddToWorklist(LogBase2.getNode());
5034
5035 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5036 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5037 AddToWorklist(Trunc.getNode());
5038 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5039 }
5040 }
5041
5042 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5043 if (N1.getOpcode() == ISD::SHL) {
5044 SDValue N10 = N1.getOperand(0);
5045 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5046 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5047 AddToWorklist(LogBase2.getNode());
5048
5049 EVT ADDVT = N1.getOperand(1).getValueType();
5050 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5051 AddToWorklist(Trunc.getNode());
5052 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5053 AddToWorklist(Add.getNode());
5054 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5055 }
5056 }
5057 }
5058
5059 // fold (udiv x, c) -> alternate
5062 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5063 if (SDValue Op = BuildUDIV(N))
5064 return Op;
5065
5066 return SDValue();
5067}
5068
5069SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5070 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5071 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5072 // Target-specific implementation of srem x, pow2.
5073 if (SDValue Res = BuildSREMPow2(N))
5074 return Res;
5075 }
5076 return SDValue();
5077}
5078
5079// handles ISD::SREM and ISD::UREM
5080SDValue DAGCombiner::visitREM(SDNode *N) {
5081 unsigned Opcode = N->getOpcode();
5082 SDValue N0 = N->getOperand(0);
5083 SDValue N1 = N->getOperand(1);
5084 EVT VT = N->getValueType(0);
5085 EVT CCVT = getSetCCResultType(VT);
5086
5087 bool isSigned = (Opcode == ISD::SREM);
5088 SDLoc DL(N);
5089
5090 // fold (rem c1, c2) -> c1%c2
5091 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5092 return C;
5093
5094 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5095 // Freeze the numerator to avoid a miscompile with an undefined value.
5096 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5097 CCVT.isVector() == VT.isVector()) {
5098 SDValue F0 = DAG.getFreeze(N0);
5099 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5100 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5101 }
5102
5103 if (SDValue V = simplifyDivRem(N, DAG))
5104 return V;
5105
5106 if (SDValue NewSel = foldBinOpIntoSelect(N))
5107 return NewSel;
5108
5109 if (isSigned) {
5110 // If we know the sign bits of both operands are zero, strength reduce to a
5111 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5112 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5113 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5114 } else {
5115 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5116 // fold (urem x, pow2) -> (and x, pow2-1)
5117 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5118 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5119 AddToWorklist(Add.getNode());
5120 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5121 }
5122 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5123 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5124 // TODO: We should sink the following into isKnownToBePowerOfTwo
5125 // using a OrZero parameter analogous to our handling in ValueTracking.
5126 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5128 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5129 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5130 AddToWorklist(Add.getNode());
5131 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5132 }
5133 }
5134
5136
5137 // If X/C can be simplified by the division-by-constant logic, lower
5138 // X%C to the equivalent of X-X/C*C.
5139 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5140 // speculative DIV must not cause a DIVREM conversion. We guard against this
5141 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5142 // combine will not return a DIVREM. Regardless, checking cheapness here
5143 // makes sense since the simplification results in fatter code.
5144 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5145 if (isSigned) {
5146 // check if we can build faster implementation for srem
5147 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5148 return OptimizedRem;
5149 }
5150
5151 SDValue OptimizedDiv =
5152 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5153 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5154 // If the equivalent Div node also exists, update its users.
5155 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5156 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5157 { N0, N1 }))
5158 CombineTo(DivNode, OptimizedDiv);
5159 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5160 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5161 AddToWorklist(OptimizedDiv.getNode());
5162 AddToWorklist(Mul.getNode());
5163 return Sub;
5164 }
5165 }
5166
5167 // sdiv, srem -> sdivrem
5168 if (SDValue DivRem = useDivRem(N))
5169 return DivRem.getValue(1);
5170
5171 return SDValue();
5172}
5173
5174SDValue DAGCombiner::visitMULHS(SDNode *N) {
5175 SDValue N0 = N->getOperand(0);
5176 SDValue N1 = N->getOperand(1);
5177 EVT VT = N->getValueType(0);
5178 SDLoc DL(N);
5179
5180 // fold (mulhs c1, c2)
5181 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5182 return C;
5183
5184 // canonicalize constant to RHS.
5187 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5188
5189 if (VT.isVector()) {
5190 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5191 return FoldedVOp;
5192
5193 // fold (mulhs x, 0) -> 0
5194 // do not return N1, because undef node may exist.
5196 return DAG.getConstant(0, DL, VT);
5197 }
5198
5199 // fold (mulhs x, 0) -> 0
5200 if (isNullConstant(N1))
5201 return N1;
5202
5203 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5204 if (isOneConstant(N1))
5205 return DAG.getNode(
5206 ISD::SRA, DL, VT, N0,
5208
5209 // fold (mulhs x, undef) -> 0
5210 if (N0.isUndef() || N1.isUndef())
5211 return DAG.getConstant(0, DL, VT);
5212
5213 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5214 // plus a shift.
5215 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5216 !VT.isVector()) {
5217 MVT Simple = VT.getSimpleVT();
5218 unsigned SimpleSize = Simple.getSizeInBits();
5219 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5220 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5221 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5222 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5223 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5224 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5225 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5226 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5227 }
5228 }
5229
5230 return SDValue();
5231}
5232
5233SDValue DAGCombiner::visitMULHU(SDNode *N) {
5234 SDValue N0 = N->getOperand(0);
5235 SDValue N1 = N->getOperand(1);
5236 EVT VT = N->getValueType(0);
5237 SDLoc DL(N);
5238
5239 // fold (mulhu c1, c2)
5240 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5241 return C;
5242
5243 // canonicalize constant to RHS.
5246 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5247
5248 if (VT.isVector()) {
5249 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5250 return FoldedVOp;
5251
5252 // fold (mulhu x, 0) -> 0
5253 // do not return N1, because undef node may exist.
5255 return DAG.getConstant(0, DL, VT);
5256 }
5257
5258 // fold (mulhu x, 0) -> 0
5259 if (isNullConstant(N1))
5260 return N1;
5261
5262 // fold (mulhu x, 1) -> 0
5263 if (isOneConstant(N1))
5264 return DAG.getConstant(0, DL, VT);
5265
5266 // fold (mulhu x, undef) -> 0
5267 if (N0.isUndef() || N1.isUndef())
5268 return DAG.getConstant(0, DL, VT);
5269
5270 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5271 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5272 hasOperation(ISD::SRL, VT)) {
5273 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5274 unsigned NumEltBits = VT.getScalarSizeInBits();
5275 SDValue SRLAmt = DAG.getNode(
5276 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5277 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5278 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5279 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5280 }
5281 }
5282
5283 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5284 // plus a shift.
5285 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5286 !VT.isVector()) {
5287 MVT Simple = VT.getSimpleVT();
5288 unsigned SimpleSize = Simple.getSizeInBits();
5289 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5290 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5291 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5292 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5293 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5294 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5295 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5296 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5297 }
5298 }
5299
5300 // Simplify the operands using demanded-bits information.
5301 // We don't have demanded bits support for MULHU so this just enables constant
5302 // folding based on known bits.
5304 return SDValue(N, 0);
5305
5306 return SDValue();
5307}
5308
5309SDValue DAGCombiner::visitAVG(SDNode *N) {
5310 unsigned Opcode = N->getOpcode();
5311 SDValue N0 = N->getOperand(0);
5312 SDValue N1 = N->getOperand(1);
5313 EVT VT = N->getValueType(0);
5314 SDLoc DL(N);
5315 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5316
5317 // fold (avg c1, c2)
5318 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5319 return C;
5320
5321 // canonicalize constant to RHS.
5324 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5325
5326 if (VT.isVector())
5327 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5328 return FoldedVOp;
5329
5330 // fold (avg x, undef) -> x
5331 if (N0.isUndef())
5332 return N1;
5333 if (N1.isUndef())
5334 return N0;
5335
5336 // fold (avg x, x) --> x
5337 if (N0 == N1 && Level >= AfterLegalizeTypes)
5338 return N0;
5339
5340 // fold (avgfloor x, 0) -> x >> 1
5341 SDValue X, Y;
5343 return DAG.getNode(ISD::SRA, DL, VT, X,
5344 DAG.getShiftAmountConstant(1, VT, DL));
5346 return DAG.getNode(ISD::SRL, DL, VT, X,
5347 DAG.getShiftAmountConstant(1, VT, DL));
5348
5349 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5350 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5351 if (!IsSigned &&
5352 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5353 X.getValueType() == Y.getValueType() &&
5354 hasOperation(Opcode, X.getValueType())) {
5355 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5356 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5357 }
5358 if (IsSigned &&
5359 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5360 X.getValueType() == Y.getValueType() &&
5361 hasOperation(Opcode, X.getValueType())) {
5362 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5363 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5364 }
5365
5366 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5367 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5368 // Check if avgflooru isn't legal/custom but avgceilu is.
5369 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5370 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5371 if (DAG.isKnownNeverZero(N1))
5372 return DAG.getNode(
5373 ISD::AVGCEILU, DL, VT, N0,
5374 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5375 if (DAG.isKnownNeverZero(N0))
5376 return DAG.getNode(
5377 ISD::AVGCEILU, DL, VT, N1,
5378 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5379 }
5380
5381 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5382 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5383 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5384 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5385 SDValue Add;
5386 if (sd_match(N,
5387 m_c_BinOp(Opcode,
5389 m_One())) ||
5390 sd_match(N, m_c_BinOp(Opcode,
5392 m_Value(Y)))) {
5393
5394 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5395 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5396
5397 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5398 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5399 }
5400 }
5401
5402 return SDValue();
5403}
5404
5405SDValue DAGCombiner::visitABD(SDNode *N) {
5406 unsigned Opcode = N->getOpcode();
5407 SDValue N0 = N->getOperand(0);
5408 SDValue N1 = N->getOperand(1);
5409 EVT VT = N->getValueType(0);
5410 SDLoc DL(N);
5411
5412 // fold (abd c1, c2)
5413 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5414 return C;
5415
5416 // canonicalize constant to RHS.
5419 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5420
5421 if (VT.isVector())
5422 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5423 return FoldedVOp;
5424
5425 // fold (abd x, undef) -> 0
5426 if (N0.isUndef() || N1.isUndef())
5427 return DAG.getConstant(0, DL, VT);
5428
5429 // fold (abd x, x) -> 0
5430 if (N0 == N1)
5431 return DAG.getConstant(0, DL, VT);
5432
5433 SDValue X;
5434
5435 // fold (abds x, 0) -> abs x
5437 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5438 return DAG.getNode(ISD::ABS, DL, VT, X);
5439
5440 // fold (abdu x, 0) -> x
5442 return X;
5443
5444 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5445 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5446 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5447 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5448
5449 return SDValue();
5450}
5451
5452/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5453/// give the opcodes for the two computations that are being performed. Return
5454/// true if a simplification was made.
5455SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5456 unsigned HiOp) {
5457 // If the high half is not needed, just compute the low half.
5458 bool HiExists = N->hasAnyUseOfValue(1);
5459 if (!HiExists && (!LegalOperations ||
5460 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5461 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5462 return CombineTo(N, Res, Res);
5463 }
5464
5465 // If the low half is not needed, just compute the high half.
5466 bool LoExists = N->hasAnyUseOfValue(0);
5467 if (!LoExists && (!LegalOperations ||
5468 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5469 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5470 return CombineTo(N, Res, Res);
5471 }
5472
5473 // If both halves are used, return as it is.
5474 if (LoExists && HiExists)
5475 return SDValue();
5476
5477 // If the two computed results can be simplified separately, separate them.
5478 if (LoExists) {
5479 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5480 AddToWorklist(Lo.getNode());
5481 SDValue LoOpt = combine(Lo.getNode());
5482 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5483 (!LegalOperations ||
5484 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5485 return CombineTo(N, LoOpt, LoOpt);
5486 }
5487
5488 if (HiExists) {
5489 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5490 AddToWorklist(Hi.getNode());
5491 SDValue HiOpt = combine(Hi.getNode());
5492 if (HiOpt.getNode() && HiOpt != Hi &&
5493 (!LegalOperations ||
5494 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5495 return CombineTo(N, HiOpt, HiOpt);
5496 }
5497
5498 return SDValue();
5499}
5500
5501SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5502 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5503 return Res;
5504
5505 SDValue N0 = N->getOperand(0);
5506 SDValue N1 = N->getOperand(1);
5507 EVT VT = N->getValueType(0);
5508 SDLoc DL(N);
5509
5510 // Constant fold.
5511 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5512 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5513
5514 // canonicalize constant to RHS (vector doesn't have to splat)
5517 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5518
5519 // If the type is twice as wide is legal, transform the mulhu to a wider
5520 // multiply plus a shift.
5521 if (VT.isSimple() && !VT.isVector()) {
5522 MVT Simple = VT.getSimpleVT();
5523 unsigned SimpleSize = Simple.getSizeInBits();
5524 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5525 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5526 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5527 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5528 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5529 // Compute the high part as N1.
5530 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5531 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5532 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5533 // Compute the low part as N0.
5534 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5535 return CombineTo(N, Lo, Hi);
5536 }
5537 }
5538
5539 return SDValue();
5540}
5541
5542SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5543 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5544 return Res;
5545
5546 SDValue N0 = N->getOperand(0);
5547 SDValue N1 = N->getOperand(1);
5548 EVT VT = N->getValueType(0);
5549 SDLoc DL(N);
5550
5551 // Constant fold.
5552 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5553 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5554
5555 // canonicalize constant to RHS (vector doesn't have to splat)
5558 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5559
5560 // (umul_lohi N0, 0) -> (0, 0)
5561 if (isNullConstant(N1)) {
5562 SDValue Zero = DAG.getConstant(0, DL, VT);
5563 return CombineTo(N, Zero, Zero);
5564 }
5565
5566 // (umul_lohi N0, 1) -> (N0, 0)
5567 if (isOneConstant(N1)) {
5568 SDValue Zero = DAG.getConstant(0, DL, VT);
5569 return CombineTo(N, N0, Zero);
5570 }
5571
5572 // If the type is twice as wide is legal, transform the mulhu to a wider
5573 // multiply plus a shift.
5574 if (VT.isSimple() && !VT.isVector()) {
5575 MVT Simple = VT.getSimpleVT();
5576 unsigned SimpleSize = Simple.getSizeInBits();
5577 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5578 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5579 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5580 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5581 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5582 // Compute the high part as N1.
5583 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5584 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5585 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5586 // Compute the low part as N0.
5587 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5588 return CombineTo(N, Lo, Hi);
5589 }
5590 }
5591
5592 return SDValue();
5593}
5594
5595SDValue DAGCombiner::visitMULO(SDNode *N) {
5596 SDValue N0 = N->getOperand(0);
5597 SDValue N1 = N->getOperand(1);
5598 EVT VT = N0.getValueType();
5599 bool IsSigned = (ISD::SMULO == N->getOpcode());
5600
5601 EVT CarryVT = N->getValueType(1);
5602 SDLoc DL(N);
5603
5606
5607 // fold operation with constant operands.
5608 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5609 // multiple results.
5610 if (N0C && N1C) {
5611 bool Overflow;
5612 APInt Result =
5613 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5614 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5615 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5616 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5617 }
5618
5619 // canonicalize constant to RHS.
5622 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5623
5624 // fold (mulo x, 0) -> 0 + no carry out
5625 if (isNullOrNullSplat(N1))
5626 return CombineTo(N, DAG.getConstant(0, DL, VT),
5627 DAG.getConstant(0, DL, CarryVT));
5628
5629 // (mulo x, 2) -> (addo x, x)
5630 // FIXME: This needs a freeze.
5631 if (N1C && N1C->getAPIntValue() == 2 &&
5632 (!IsSigned || VT.getScalarSizeInBits() > 2))
5633 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5634 N->getVTList(), N0, N0);
5635
5636 // A 1 bit SMULO overflows if both inputs are 1.
5637 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5638 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5639 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5640 DAG.getConstant(0, DL, VT), ISD::SETNE);
5641 return CombineTo(N, And, Cmp);
5642 }
5643
5644 // If it cannot overflow, transform into a mul.
5645 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5646 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5647 DAG.getConstant(0, DL, CarryVT));
5648 return SDValue();
5649}
5650
5651// Function to calculate whether the Min/Max pair of SDNodes (potentially
5652// swapped around) make a signed saturate pattern, clamping to between a signed
5653// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5654// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5655// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5656// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5658 SDValue N3, ISD::CondCode CC, unsigned &BW,
5659 bool &Unsigned, SelectionDAG &DAG) {
5660 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5661 ISD::CondCode CC) {
5662 // The compare and select operand should be the same or the select operands
5663 // should be truncated versions of the comparison.
5664 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5665 return 0;
5666 // The constants need to be the same or a truncated version of each other.
5669 if (!N1C || !N3C)
5670 return 0;
5671 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5672 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5673 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5674 return 0;
5675 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5676 };
5677
5678 // Check the initial value is a SMIN/SMAX equivalent.
5679 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5680 if (!Opcode0)
5681 return SDValue();
5682
5683 // We could only need one range check, if the fptosi could never produce
5684 // the upper value.
5685 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5686 if (isNullOrNullSplat(N3)) {
5687 EVT IntVT = N0.getValueType().getScalarType();
5688 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5689 if (FPVT.isSimple()) {
5690 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5691 const fltSemantics &Semantics = InputTy->getFltSemantics();
5692 uint32_t MinBitWidth =
5693 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5694 if (IntVT.getSizeInBits() >= MinBitWidth) {
5695 Unsigned = true;
5696 BW = PowerOf2Ceil(MinBitWidth);
5697 return N0;
5698 }
5699 }
5700 }
5701 }
5702
5703 SDValue N00, N01, N02, N03;
5704 ISD::CondCode N0CC;
5705 switch (N0.getOpcode()) {
5706 case ISD::SMIN:
5707 case ISD::SMAX:
5708 N00 = N02 = N0.getOperand(0);
5709 N01 = N03 = N0.getOperand(1);
5710 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5711 break;
5712 case ISD::SELECT_CC:
5713 N00 = N0.getOperand(0);
5714 N01 = N0.getOperand(1);
5715 N02 = N0.getOperand(2);
5716 N03 = N0.getOperand(3);
5717 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5718 break;
5719 case ISD::SELECT:
5720 case ISD::VSELECT:
5721 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5722 return SDValue();
5723 N00 = N0.getOperand(0).getOperand(0);
5724 N01 = N0.getOperand(0).getOperand(1);
5725 N02 = N0.getOperand(1);
5726 N03 = N0.getOperand(2);
5727 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5728 break;
5729 default:
5730 return SDValue();
5731 }
5732
5733 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5734 if (!Opcode1 || Opcode0 == Opcode1)
5735 return SDValue();
5736
5737 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5738 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5739 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5740 return SDValue();
5741
5742 const APInt &MinC = MinCOp->getAPIntValue();
5743 const APInt &MaxC = MaxCOp->getAPIntValue();
5744 APInt MinCPlus1 = MinC + 1;
5745 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5746 BW = MinCPlus1.exactLogBase2() + 1;
5747 Unsigned = false;
5748 return N02;
5749 }
5750
5751 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5752 BW = MinCPlus1.exactLogBase2();
5753 Unsigned = true;
5754 return N02;
5755 }
5756
5757 return SDValue();
5758}
5759
5762 SelectionDAG &DAG) {
5763 unsigned BW;
5764 bool Unsigned;
5765 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5766 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5767 return SDValue();
5768 EVT FPVT = Fp.getOperand(0).getValueType();
5769 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5770 if (FPVT.isVector())
5771 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5772 FPVT.getVectorElementCount());
5773 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5774 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5775 return SDValue();
5776 SDLoc DL(Fp);
5777 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5778 DAG.getValueType(NewVT.getScalarType()));
5779 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5780}
5781
5784 SelectionDAG &DAG) {
5785 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5786 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5787 // be truncated versions of the setcc (N0/N1).
5788 if ((N0 != N2 &&
5789 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5791 return SDValue();
5794 if (!N1C || !N3C)
5795 return SDValue();
5796 const APInt &C1 = N1C->getAPIntValue();
5797 const APInt &C3 = N3C->getAPIntValue();
5798 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5799 C1 != C3.zext(C1.getBitWidth()))
5800 return SDValue();
5801
5802 unsigned BW = (C1 + 1).exactLogBase2();
5803 EVT FPVT = N0.getOperand(0).getValueType();
5804 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5805 if (FPVT.isVector())
5806 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5807 FPVT.getVectorElementCount());
5809 FPVT, NewVT))
5810 return SDValue();
5811
5812 SDValue Sat =
5813 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5814 DAG.getValueType(NewVT.getScalarType()));
5815 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5816}
5817
5818SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5819 SDValue N0 = N->getOperand(0);
5820 SDValue N1 = N->getOperand(1);
5821 EVT VT = N0.getValueType();
5822 unsigned Opcode = N->getOpcode();
5823 SDLoc DL(N);
5824
5825 // fold operation with constant operands.
5826 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5827 return C;
5828
5829 // If the operands are the same, this is a no-op.
5830 if (N0 == N1)
5831 return N0;
5832
5833 // canonicalize constant to RHS
5836 return DAG.getNode(Opcode, DL, VT, N1, N0);
5837
5838 // fold vector ops
5839 if (VT.isVector())
5840 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5841 return FoldedVOp;
5842
5843 // reassociate minmax
5844 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5845 return RMINMAX;
5846
5847 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5848 // Only do this if:
5849 // 1. The current op isn't legal and the flipped is.
5850 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5851 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5852 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5853 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5854 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5855 unsigned AltOpcode;
5856 switch (Opcode) {
5857 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5858 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5859 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5860 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5861 default: llvm_unreachable("Unknown MINMAX opcode");
5862 }
5863 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5864 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5865 }
5866
5867 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5869 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5870 return S;
5871 if (Opcode == ISD::UMIN)
5872 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5873 return S;
5874
5875 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5876 auto ReductionOpcode = [](unsigned Opcode) {
5877 switch (Opcode) {
5878 case ISD::SMIN:
5879 return ISD::VECREDUCE_SMIN;
5880 case ISD::SMAX:
5881 return ISD::VECREDUCE_SMAX;
5882 case ISD::UMIN:
5883 return ISD::VECREDUCE_UMIN;
5884 case ISD::UMAX:
5885 return ISD::VECREDUCE_UMAX;
5886 default:
5887 llvm_unreachable("Unexpected opcode");
5888 }
5889 };
5890 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5891 SDLoc(N), VT, N0, N1))
5892 return SD;
5893
5894 // Simplify the operands using demanded-bits information.
5896 return SDValue(N, 0);
5897
5898 return SDValue();
5899}
5900
5901/// If this is a bitwise logic instruction and both operands have the same
5902/// opcode, try to sink the other opcode after the logic instruction.
5903SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5904 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5905 EVT VT = N0.getValueType();
5906 unsigned LogicOpcode = N->getOpcode();
5907 unsigned HandOpcode = N0.getOpcode();
5908 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5909 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5910
5911 // Bail early if none of these transforms apply.
5912 if (N0.getNumOperands() == 0)
5913 return SDValue();
5914
5915 // FIXME: We should check number of uses of the operands to not increase
5916 // the instruction count for all transforms.
5917
5918 // Handle size-changing casts (or sign_extend_inreg).
5919 SDValue X = N0.getOperand(0);
5920 SDValue Y = N1.getOperand(0);
5921 EVT XVT = X.getValueType();
5922 SDLoc DL(N);
5923 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5924 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5925 N0.getOperand(1) == N1.getOperand(1))) {
5926 // If both operands have other uses, this transform would create extra
5927 // instructions without eliminating anything.
5928 if (!N0.hasOneUse() && !N1.hasOneUse())
5929 return SDValue();
5930 // We need matching integer source types.
5931 if (XVT != Y.getValueType())
5932 return SDValue();
5933 // Don't create an illegal op during or after legalization. Don't ever
5934 // create an unsupported vector op.
5935 if ((VT.isVector() || LegalOperations) &&
5936 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5937 return SDValue();
5938 // Avoid infinite looping with PromoteIntBinOp.
5939 // TODO: Should we apply desirable/legal constraints to all opcodes?
5940 if ((HandOpcode == ISD::ANY_EXTEND ||
5941 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5942 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5943 return SDValue();
5944 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5945 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5946 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5947 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5948 return DAG.getNode(HandOpcode, DL, VT, Logic);
5949 }
5950
5951 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5952 if (HandOpcode == ISD::TRUNCATE) {
5953 // If both operands have other uses, this transform would create extra
5954 // instructions without eliminating anything.
5955 if (!N0.hasOneUse() && !N1.hasOneUse())
5956 return SDValue();
5957 // We need matching source types.
5958 if (XVT != Y.getValueType())
5959 return SDValue();
5960 // Don't create an illegal op during or after legalization.
5961 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5962 return SDValue();
5963 // Be extra careful sinking truncate. If it's free, there's no benefit in
5964 // widening a binop. Also, don't create a logic op on an illegal type.
5965 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5966 return SDValue();
5967 if (!TLI.isTypeLegal(XVT))
5968 return SDValue();
5969 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5970 return DAG.getNode(HandOpcode, DL, VT, Logic);
5971 }
5972
5973 // For binops SHL/SRL/SRA/AND:
5974 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5975 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5976 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5977 N0.getOperand(1) == N1.getOperand(1)) {
5978 // If either operand has other uses, this transform is not an improvement.
5979 if (!N0.hasOneUse() || !N1.hasOneUse())
5980 return SDValue();
5981 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5982 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5983 }
5984
5985 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5986 if (HandOpcode == ISD::BSWAP) {
5987 // If either operand has other uses, this transform is not an improvement.
5988 if (!N0.hasOneUse() || !N1.hasOneUse())
5989 return SDValue();
5990 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5991 return DAG.getNode(HandOpcode, DL, VT, Logic);
5992 }
5993
5994 // For funnel shifts FSHL/FSHR:
5995 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5996 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5997 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5998 N0.getOperand(2) == N1.getOperand(2)) {
5999 if (!N0.hasOneUse() || !N1.hasOneUse())
6000 return SDValue();
6001 SDValue X1 = N0.getOperand(1);
6002 SDValue Y1 = N1.getOperand(1);
6003 SDValue S = N0.getOperand(2);
6004 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6005 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6006 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6007 }
6008
6009 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6010 // Only perform this optimization up until type legalization, before
6011 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6012 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6013 // we don't want to undo this promotion.
6014 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6015 // on scalars.
6016 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6017 Level <= AfterLegalizeTypes) {
6018 // Input types must be integer and the same.
6019 if (XVT.isInteger() && XVT == Y.getValueType() &&
6020 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6021 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6022 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6023 return DAG.getNode(HandOpcode, DL, VT, Logic);
6024 }
6025 }
6026
6027 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6028 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6029 // If both shuffles use the same mask, and both shuffle within a single
6030 // vector, then it is worthwhile to move the swizzle after the operation.
6031 // The type-legalizer generates this pattern when loading illegal
6032 // vector types from memory. In many cases this allows additional shuffle
6033 // optimizations.
6034 // There are other cases where moving the shuffle after the xor/and/or
6035 // is profitable even if shuffles don't perform a swizzle.
6036 // If both shuffles use the same mask, and both shuffles have the same first
6037 // or second operand, then it might still be profitable to move the shuffle
6038 // after the xor/and/or operation.
6039 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6040 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6041 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6042 assert(X.getValueType() == Y.getValueType() &&
6043 "Inputs to shuffles are not the same type");
6044
6045 // Check that both shuffles use the same mask. The masks are known to be of
6046 // the same length because the result vector type is the same.
6047 // Check also that shuffles have only one use to avoid introducing extra
6048 // instructions.
6049 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6050 !SVN0->getMask().equals(SVN1->getMask()))
6051 return SDValue();
6052
6053 // Don't try to fold this node if it requires introducing a
6054 // build vector of all zeros that might be illegal at this stage.
6055 SDValue ShOp = N0.getOperand(1);
6056 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6057 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6058
6059 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6060 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6061 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6062 N0.getOperand(0), N1.getOperand(0));
6063 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6064 }
6065
6066 // Don't try to fold this node if it requires introducing a
6067 // build vector of all zeros that might be illegal at this stage.
6068 ShOp = N0.getOperand(0);
6069 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6070 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6071
6072 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6073 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6074 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6075 N1.getOperand(1));
6076 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6077 }
6078 }
6079
6080 return SDValue();
6081}
6082
6083/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6084SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6085 const SDLoc &DL) {
6086 SDValue LL, LR, RL, RR, N0CC, N1CC;
6087 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6088 !isSetCCEquivalent(N1, RL, RR, N1CC))
6089 return SDValue();
6090
6091 assert(N0.getValueType() == N1.getValueType() &&
6092 "Unexpected operand types for bitwise logic op");
6093 assert(LL.getValueType() == LR.getValueType() &&
6094 RL.getValueType() == RR.getValueType() &&
6095 "Unexpected operand types for setcc");
6096
6097 // If we're here post-legalization or the logic op type is not i1, the logic
6098 // op type must match a setcc result type. Also, all folds require new
6099 // operations on the left and right operands, so those types must match.
6100 EVT VT = N0.getValueType();
6101 EVT OpVT = LL.getValueType();
6102 if (LegalOperations || VT.getScalarType() != MVT::i1)
6103 if (VT != getSetCCResultType(OpVT))
6104 return SDValue();
6105 if (OpVT != RL.getValueType())
6106 return SDValue();
6107
6108 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6109 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6110 bool IsInteger = OpVT.isInteger();
6111 if (LR == RR && CC0 == CC1 && IsInteger) {
6112 bool IsZero = isNullOrNullSplat(LR);
6113 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6114
6115 // All bits clear?
6116 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6117 // All sign bits clear?
6118 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6119 // Any bits set?
6120 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6121 // Any sign bits set?
6122 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6123
6124 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6125 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6126 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6127 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6128 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6129 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6130 AddToWorklist(Or.getNode());
6131 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6132 }
6133
6134 // All bits set?
6135 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6136 // All sign bits set?
6137 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6138 // Any bits clear?
6139 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6140 // Any sign bits clear?
6141 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6142
6143 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6144 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6145 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6146 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6147 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6148 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6149 AddToWorklist(And.getNode());
6150 return DAG.getSetCC(DL, VT, And, LR, CC1);
6151 }
6152 }
6153
6154 // TODO: What is the 'or' equivalent of this fold?
6155 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6156 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6157 IsInteger && CC0 == ISD::SETNE &&
6158 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6159 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6160 SDValue One = DAG.getConstant(1, DL, OpVT);
6161 SDValue Two = DAG.getConstant(2, DL, OpVT);
6162 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6163 AddToWorklist(Add.getNode());
6164 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6165 }
6166
6167 // Try more general transforms if the predicates match and the only user of
6168 // the compares is the 'and' or 'or'.
6169 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6170 N0.hasOneUse() && N1.hasOneUse()) {
6171 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6172 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6173 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6174 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6175 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6176 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6177 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6178 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6179 }
6180
6181 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6182 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6183 // Match a shared variable operand and 2 non-opaque constant operands.
6184 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6185 // The difference of the constants must be a single bit.
6186 const APInt &CMax =
6187 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6188 const APInt &CMin =
6189 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6190 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6191 };
6192 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6193 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6194 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6195 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6196 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6197 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6198 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6199 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6200 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6201 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6202 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6203 }
6204 }
6205 }
6206
6207 // Canonicalize equivalent operands to LL == RL.
6208 if (LL == RR && LR == RL) {
6210 std::swap(RL, RR);
6211 }
6212
6213 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6214 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6215 if (LL == RL && LR == RR) {
6216 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6217 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6218 if (NewCC != ISD::SETCC_INVALID &&
6219 (!LegalOperations ||
6220 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6221 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6222 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6223 }
6224
6225 return SDValue();
6226}
6227
6228static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6229 SelectionDAG &DAG) {
6230 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6231}
6232
6233static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6234 SelectionDAG &DAG) {
6235 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6236}
6237
6238// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6239static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6240 ISD::CondCode CC, unsigned OrAndOpcode,
6241 SelectionDAG &DAG,
6242 bool isFMAXNUMFMINNUM_IEEE,
6243 bool isFMAXNUMFMINNUM) {
6244 // The optimization cannot be applied for all the predicates because
6245 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6246 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6247 // applied at all if one of the operands is a signaling NaN.
6248
6249 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6250 // are non NaN values.
6251 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6252 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6253 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6254 isFMAXNUMFMINNUM_IEEE
6257 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6258 (OrAndOpcode == ISD::OR)) ||
6259 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6260 (OrAndOpcode == ISD::AND)))
6261 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6262 isFMAXNUMFMINNUM_IEEE
6265 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6266 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6267 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6268 // that there are not any sNaNs, then the optimization is not valid
6269 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6270 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6271 // we can prove that we do not have any sNaNs, then we can do the
6272 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6273 // cases.
6274 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6275 (OrAndOpcode == ISD::OR)) ||
6276 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6277 (OrAndOpcode == ISD::AND)))
6278 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6279 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6280 isFMAXNUMFMINNUM_IEEE
6283 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6284 (OrAndOpcode == ISD::OR)) ||
6285 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6286 (OrAndOpcode == ISD::AND)))
6287 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6288 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6289 isFMAXNUMFMINNUM_IEEE
6292 return ISD::DELETED_NODE;
6293}
6294
6297 assert(
6298 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6299 "Invalid Op to combine SETCC with");
6300
6301 // TODO: Search past casts/truncates.
6302 SDValue LHS = LogicOp->getOperand(0);
6303 SDValue RHS = LogicOp->getOperand(1);
6304 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6305 !LHS->hasOneUse() || !RHS->hasOneUse())
6306 return SDValue();
6307
6308 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6310 LogicOp, LHS.getNode(), RHS.getNode());
6311
6312 SDValue LHS0 = LHS->getOperand(0);
6313 SDValue RHS0 = RHS->getOperand(0);
6314 SDValue LHS1 = LHS->getOperand(1);
6315 SDValue RHS1 = RHS->getOperand(1);
6316 // TODO: We don't actually need a splat here, for vectors we just need the
6317 // invariants to hold for each element.
6318 auto *LHS1C = isConstOrConstSplat(LHS1);
6319 auto *RHS1C = isConstOrConstSplat(RHS1);
6320 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6321 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6322 EVT VT = LogicOp->getValueType(0);
6323 EVT OpVT = LHS0.getValueType();
6324 SDLoc DL(LogicOp);
6325
6326 // Check if the operands of an and/or operation are comparisons and if they
6327 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6328 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6329 // sequence will be replaced with min-cmp sequence:
6330 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6331 // and and-cmp-cmp will be replaced with max-cmp sequence:
6332 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6333 // The optimization does not work for `==` or `!=` .
6334 // The two comparisons should have either the same predicate or the
6335 // predicate of one of the comparisons is the opposite of the other one.
6336 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6338 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6340 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6341 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6342 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6343 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6344 (OpVT.isFloatingPoint() &&
6345 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6347 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6348 CCL != ISD::SETTRUE &&
6349 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6350
6351 SDValue CommonValue, Operand1, Operand2;
6353 if (CCL == CCR) {
6354 if (LHS0 == RHS0) {
6355 CommonValue = LHS0;
6356 Operand1 = LHS1;
6357 Operand2 = RHS1;
6359 } else if (LHS1 == RHS1) {
6360 CommonValue = LHS1;
6361 Operand1 = LHS0;
6362 Operand2 = RHS0;
6363 CC = CCL;
6364 }
6365 } else {
6366 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6367 if (LHS0 == RHS1) {
6368 CommonValue = LHS0;
6369 Operand1 = LHS1;
6370 Operand2 = RHS0;
6371 CC = CCR;
6372 } else if (RHS0 == LHS1) {
6373 CommonValue = LHS1;
6374 Operand1 = LHS0;
6375 Operand2 = RHS1;
6376 CC = CCL;
6377 }
6378 }
6379
6380 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6381 // handle it using OR/AND.
6382 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6384 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6386
6387 if (CC != ISD::SETCC_INVALID) {
6388 unsigned NewOpcode = ISD::DELETED_NODE;
6389 bool IsSigned = isSignedIntSetCC(CC);
6390 if (OpVT.isInteger()) {
6391 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6392 CC == ISD::SETLT || CC == ISD::SETULT);
6393 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6394 if (IsLess == IsOr)
6395 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6396 else
6397 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6398 } else if (OpVT.isFloatingPoint())
6399 NewOpcode =
6400 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6401 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6402
6403 if (NewOpcode != ISD::DELETED_NODE) {
6404 SDValue MinMaxValue =
6405 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6406 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6407 }
6408 }
6409 }
6410
6411 if (TargetPreference == AndOrSETCCFoldKind::None)
6412 return SDValue();
6413
6414 if (CCL == CCR &&
6415 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6416 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6417 const APInt &APLhs = LHS1C->getAPIntValue();
6418 const APInt &APRhs = RHS1C->getAPIntValue();
6419
6420 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6421 // case this is just a compare).
6422 if (APLhs == (-APRhs) &&
6423 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6424 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6425 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6426 // (icmp eq A, C) | (icmp eq A, -C)
6427 // -> (icmp eq Abs(A), C)
6428 // (icmp ne A, C) & (icmp ne A, -C)
6429 // -> (icmp ne Abs(A), C)
6430 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6431 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6432 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6433 } else if (TargetPreference &
6435
6436 // AndOrSETCCFoldKind::AddAnd:
6437 // A == C0 | A == C1
6438 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6439 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6440 // A != C0 & A != C1
6441 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6442 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6443
6444 // AndOrSETCCFoldKind::NotAnd:
6445 // A == C0 | A == C1
6446 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6447 // -> ~A & smin(C0, C1) == 0
6448 // A != C0 & A != C1
6449 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6450 // -> ~A & smin(C0, C1) != 0
6451
6452 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6453 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6454 APInt Dif = MaxC - MinC;
6455 if (!Dif.isZero() && Dif.isPowerOf2()) {
6456 if (MaxC.isAllOnes() &&
6457 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6458 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6459 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6460 DAG.getConstant(MinC, DL, OpVT));
6461 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6462 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6463 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6464
6465 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6466 DAG.getConstant(-MinC, DL, OpVT));
6467 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6468 DAG.getConstant(~Dif, DL, OpVT));
6469 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6470 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6471 }
6472 }
6473 }
6474 }
6475
6476 return SDValue();
6477}
6478
6479// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6480// We canonicalize to the `select` form in the middle end, but the `and` form
6481// gets better codegen and all tested targets (arm, x86, riscv)
6483 const SDLoc &DL, SelectionDAG &DAG) {
6484 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6485 if (!isNullConstant(F))
6486 return SDValue();
6487
6488 EVT CondVT = Cond.getValueType();
6489 if (TLI.getBooleanContents(CondVT) !=
6491 return SDValue();
6492
6493 if (T.getOpcode() != ISD::AND)
6494 return SDValue();
6495
6496 if (!isOneConstant(T.getOperand(1)))
6497 return SDValue();
6498
6499 EVT OpVT = T.getValueType();
6500
6501 SDValue CondMask =
6502 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6503 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6504}
6505
6506/// This contains all DAGCombine rules which reduce two values combined by
6507/// an And operation to a single value. This makes them reusable in the context
6508/// of visitSELECT(). Rules involving constants are not included as
6509/// visitSELECT() already handles those cases.
6510SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6511 EVT VT = N1.getValueType();
6512 SDLoc DL(N);
6513
6514 // fold (and x, undef) -> 0
6515 if (N0.isUndef() || N1.isUndef())
6516 return DAG.getConstant(0, DL, VT);
6517
6518 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6519 return V;
6520
6521 // Canonicalize:
6522 // and(x, add) -> and(add, x)
6523 if (N1.getOpcode() == ISD::ADD)
6524 std::swap(N0, N1);
6525
6526 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6527 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6528 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6529 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6530 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6531 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6532 // immediate for an add, but it is legal if its top c2 bits are set,
6533 // transform the ADD so the immediate doesn't need to be materialized
6534 // in a register.
6535 APInt ADDC = ADDI->getAPIntValue();
6536 APInt SRLC = SRLI->getAPIntValue();
6537 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6538 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6540 SRLC.getZExtValue());
6541 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6542 ADDC |= Mask;
6543 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6544 SDLoc DL0(N0);
6545 SDValue NewAdd =
6546 DAG.getNode(ISD::ADD, DL0, VT,
6547 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6548 CombineTo(N0.getNode(), NewAdd);
6549 // Return N so it doesn't get rechecked!
6550 return SDValue(N, 0);
6551 }
6552 }
6553 }
6554 }
6555 }
6556 }
6557
6558 return SDValue();
6559}
6560
6561bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6562 EVT LoadResultTy, EVT &ExtVT) {
6563 if (!AndC->getAPIntValue().isMask())
6564 return false;
6565
6566 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6567
6568 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6569 EVT LoadedVT = LoadN->getMemoryVT();
6570
6571 if (ExtVT == LoadedVT &&
6572 (!LegalOperations ||
6573 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6574 // ZEXTLOAD will match without needing to change the size of the value being
6575 // loaded.
6576 return true;
6577 }
6578
6579 // Do not change the width of a volatile or atomic loads.
6580 if (!LoadN->isSimple())
6581 return false;
6582
6583 // Do not generate loads of non-round integer types since these can
6584 // be expensive (and would be wrong if the type is not byte sized).
6585 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6586 return false;
6587
6588 if (LegalOperations &&
6589 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6590 return false;
6591
6592 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6593 return false;
6594
6595 return true;
6596}
6597
6598bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6599 ISD::LoadExtType ExtType, EVT &MemVT,
6600 unsigned ShAmt) {
6601 if (!LDST)
6602 return false;
6603 // Only allow byte offsets.
6604 if (ShAmt % 8)
6605 return false;
6606
6607 // Do not generate loads of non-round integer types since these can
6608 // be expensive (and would be wrong if the type is not byte sized).
6609 if (!MemVT.isRound())
6610 return false;
6611
6612 // Don't change the width of a volatile or atomic loads.
6613 if (!LDST->isSimple())
6614 return false;
6615
6616 EVT LdStMemVT = LDST->getMemoryVT();
6617
6618 // Bail out when changing the scalable property, since we can't be sure that
6619 // we're actually narrowing here.
6620 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6621 return false;
6622
6623 // Verify that we are actually reducing a load width here.
6624 if (LdStMemVT.bitsLT(MemVT))
6625 return false;
6626
6627 // Ensure that this isn't going to produce an unsupported memory access.
6628 if (ShAmt) {
6629 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6630 const unsigned ByteShAmt = ShAmt / 8;
6631 const Align LDSTAlign = LDST->getAlign();
6632 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6633 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6634 LDST->getAddressSpace(), NarrowAlign,
6635 LDST->getMemOperand()->getFlags()))
6636 return false;
6637 }
6638
6639 // It's not possible to generate a constant of extended or untyped type.
6640 EVT PtrType = LDST->getBasePtr().getValueType();
6641 if (PtrType == MVT::Untyped || PtrType.isExtended())
6642 return false;
6643
6644 if (isa<LoadSDNode>(LDST)) {
6645 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6646 // Don't transform one with multiple uses, this would require adding a new
6647 // load.
6648 if (!SDValue(Load, 0).hasOneUse())
6649 return false;
6650
6651 if (LegalOperations &&
6652 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6653 return false;
6654
6655 // For the transform to be legal, the load must produce only two values
6656 // (the value loaded and the chain). Don't transform a pre-increment
6657 // load, for example, which produces an extra value. Otherwise the
6658 // transformation is not equivalent, and the downstream logic to replace
6659 // uses gets things wrong.
6660 if (Load->getNumValues() > 2)
6661 return false;
6662
6663 // If the load that we're shrinking is an extload and we're not just
6664 // discarding the extension we can't simply shrink the load. Bail.
6665 // TODO: It would be possible to merge the extensions in some cases.
6666 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6667 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6668 return false;
6669
6670 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6671 return false;
6672 } else {
6673 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6674 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6675 // Can't write outside the original store
6676 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6677 return false;
6678
6679 if (LegalOperations &&
6680 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6681 return false;
6682 }
6683 return true;
6684}
6685
6686bool DAGCombiner::SearchForAndLoads(SDNode *N,
6688 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6689 ConstantSDNode *Mask,
6690 SDNode *&NodeToMask) {
6691 // Recursively search for the operands, looking for loads which can be
6692 // narrowed.
6693 for (SDValue Op : N->op_values()) {
6694 if (Op.getValueType().isVector())
6695 return false;
6696
6697 // Some constants may need fixing up later if they are too large.
6698 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6699 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6700 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6701 NodesWithConsts.insert(N);
6702 continue;
6703 }
6704
6705 if (!Op.hasOneUse())
6706 return false;
6707
6708 switch(Op.getOpcode()) {
6709 case ISD::LOAD: {
6710 auto *Load = cast<LoadSDNode>(Op);
6711 EVT ExtVT;
6712 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6713 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6714
6715 // ZEXTLOAD is already small enough.
6716 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6717 ExtVT.bitsGE(Load->getMemoryVT()))
6718 continue;
6719
6720 // Use LE to convert equal sized loads to zext.
6721 if (ExtVT.bitsLE(Load->getMemoryVT()))
6722 Loads.push_back(Load);
6723
6724 continue;
6725 }
6726 return false;
6727 }
6728 case ISD::ZERO_EXTEND:
6729 case ISD::AssertZext: {
6730 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6731 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6732 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6733 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6734 Op.getOperand(0).getValueType();
6735
6736 // We can accept extending nodes if the mask is wider or an equal
6737 // width to the original type.
6738 if (ExtVT.bitsGE(VT))
6739 continue;
6740 break;
6741 }
6742 case ISD::OR:
6743 case ISD::XOR:
6744 case ISD::AND:
6745 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6746 NodeToMask))
6747 return false;
6748 continue;
6749 }
6750
6751 // Allow one node which will masked along with any loads found.
6752 if (NodeToMask)
6753 return false;
6754
6755 // Also ensure that the node to be masked only produces one data result.
6756 NodeToMask = Op.getNode();
6757 if (NodeToMask->getNumValues() > 1) {
6758 bool HasValue = false;
6759 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6760 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6761 if (VT != MVT::Glue && VT != MVT::Other) {
6762 if (HasValue) {
6763 NodeToMask = nullptr;
6764 return false;
6765 }
6766 HasValue = true;
6767 }
6768 }
6769 assert(HasValue && "Node to be masked has no data result?");
6770 }
6771 }
6772 return true;
6773}
6774
6775bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6776 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6777 if (!Mask)
6778 return false;
6779
6780 if (!Mask->getAPIntValue().isMask())
6781 return false;
6782
6783 // No need to do anything if the and directly uses a load.
6784 if (isa<LoadSDNode>(N->getOperand(0)))
6785 return false;
6786
6788 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6789 SDNode *FixupNode = nullptr;
6790 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6791 if (Loads.empty())
6792 return false;
6793
6794 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6795 SDValue MaskOp = N->getOperand(1);
6796
6797 // If it exists, fixup the single node we allow in the tree that needs
6798 // masking.
6799 if (FixupNode) {
6800 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6801 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6802 FixupNode->getValueType(0),
6803 SDValue(FixupNode, 0), MaskOp);
6804 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6805 if (And.getOpcode() == ISD ::AND)
6806 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6807 }
6808
6809 // Narrow any constants that need it.
6810 for (auto *LogicN : NodesWithConsts) {
6811 SDValue Op0 = LogicN->getOperand(0);
6812 SDValue Op1 = LogicN->getOperand(1);
6813
6814 if (isa<ConstantSDNode>(Op0))
6815 Op0 =
6816 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6817
6818 if (isa<ConstantSDNode>(Op1))
6819 Op1 =
6820 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6821
6822 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6823 std::swap(Op0, Op1);
6824
6825 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6826 }
6827
6828 // Create narrow loads.
6829 for (auto *Load : Loads) {
6830 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6831 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6832 SDValue(Load, 0), MaskOp);
6833 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6834 if (And.getOpcode() == ISD ::AND)
6835 And = SDValue(
6836 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6837 SDValue NewLoad = reduceLoadWidth(And.getNode());
6838 assert(NewLoad &&
6839 "Shouldn't be masking the load if it can't be narrowed");
6840 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6841 }
6842 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6843 return true;
6844 }
6845 return false;
6846}
6847
6848// Unfold
6849// x & (-1 'logical shift' y)
6850// To
6851// (x 'opposite logical shift' y) 'logical shift' y
6852// if it is better for performance.
6853SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6854 assert(N->getOpcode() == ISD::AND);
6855
6856 SDValue N0 = N->getOperand(0);
6857 SDValue N1 = N->getOperand(1);
6858
6859 // Do we actually prefer shifts over mask?
6861 return SDValue();
6862
6863 // Try to match (-1 '[outer] logical shift' y)
6864 unsigned OuterShift;
6865 unsigned InnerShift; // The opposite direction to the OuterShift.
6866 SDValue Y; // Shift amount.
6867 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6868 if (!M.hasOneUse())
6869 return false;
6870 OuterShift = M->getOpcode();
6871 if (OuterShift == ISD::SHL)
6872 InnerShift = ISD::SRL;
6873 else if (OuterShift == ISD::SRL)
6874 InnerShift = ISD::SHL;
6875 else
6876 return false;
6877 if (!isAllOnesConstant(M->getOperand(0)))
6878 return false;
6879 Y = M->getOperand(1);
6880 return true;
6881 };
6882
6883 SDValue X;
6884 if (matchMask(N1))
6885 X = N0;
6886 else if (matchMask(N0))
6887 X = N1;
6888 else
6889 return SDValue();
6890
6891 SDLoc DL(N);
6892 EVT VT = N->getValueType(0);
6893
6894 // tmp = x 'opposite logical shift' y
6895 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6896 // ret = tmp 'logical shift' y
6897 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6898
6899 return T1;
6900}
6901
6902/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6903/// For a target with a bit test, this is expected to become test + set and save
6904/// at least 1 instruction.
6906 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6907
6908 // Look through an optional extension.
6909 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6910 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6911 And0 = And0.getOperand(0);
6912 if (!isOneConstant(And1) || !And0.hasOneUse())
6913 return SDValue();
6914
6915 SDValue Src = And0;
6916
6917 // Attempt to find a 'not' op.
6918 // TODO: Should we favor test+set even without the 'not' op?
6919 bool FoundNot = false;
6920 if (isBitwiseNot(Src)) {
6921 FoundNot = true;
6922 Src = Src.getOperand(0);
6923
6924 // Look though an optional truncation. The source operand may not be the
6925 // same type as the original 'and', but that is ok because we are masking
6926 // off everything but the low bit.
6927 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6928 Src = Src.getOperand(0);
6929 }
6930
6931 // Match a shift-right by constant.
6932 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6933 return SDValue();
6934
6935 // This is probably not worthwhile without a supported type.
6936 EVT SrcVT = Src.getValueType();
6937 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6938 if (!TLI.isTypeLegal(SrcVT))
6939 return SDValue();
6940
6941 // We might have looked through casts that make this transform invalid.
6942 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6943 SDValue ShiftAmt = Src.getOperand(1);
6944 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6945 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6946 return SDValue();
6947
6948 // Set source to shift source.
6949 Src = Src.getOperand(0);
6950
6951 // Try again to find a 'not' op.
6952 // TODO: Should we favor test+set even with two 'not' ops?
6953 if (!FoundNot) {
6954 if (!isBitwiseNot(Src))
6955 return SDValue();
6956 Src = Src.getOperand(0);
6957 }
6958
6959 if (!TLI.hasBitTest(Src, ShiftAmt))
6960 return SDValue();
6961
6962 // Turn this into a bit-test pattern using mask op + setcc:
6963 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6964 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6965 SDLoc DL(And);
6966 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6967 EVT CCVT =
6968 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6969 SDValue Mask = DAG.getConstant(
6970 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6971 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6972 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6973 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6974 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6975}
6976
6977/// For targets that support usubsat, match a bit-hack form of that operation
6978/// that ends in 'and' and convert it.
6980 EVT VT = N->getValueType(0);
6981 unsigned BitWidth = VT.getScalarSizeInBits();
6982 APInt SignMask = APInt::getSignMask(BitWidth);
6983
6984 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6985 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6986 // xor/add with SMIN (signmask) are logically equivalent.
6987 SDValue X;
6988 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6990 m_SpecificInt(BitWidth - 1))))) &&
6993 m_SpecificInt(BitWidth - 1))))))
6994 return SDValue();
6995
6996 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6997 DAG.getConstant(SignMask, DL, VT));
6998}
6999
7000/// Given a bitwise logic operation N with a matching bitwise logic operand,
7001/// fold a pattern where 2 of the source operands are identically shifted
7002/// values. For example:
7003/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7005 SelectionDAG &DAG) {
7006 unsigned LogicOpcode = N->getOpcode();
7007 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7008 "Expected bitwise logic operation");
7009
7010 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7011 return SDValue();
7012
7013 // Match another bitwise logic op and a shift.
7014 unsigned ShiftOpcode = ShiftOp.getOpcode();
7015 if (LogicOp.getOpcode() != LogicOpcode ||
7016 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7017 ShiftOpcode == ISD::SRA))
7018 return SDValue();
7019
7020 // Match another shift op inside the first logic operand. Handle both commuted
7021 // possibilities.
7022 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7023 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7024 SDValue X1 = ShiftOp.getOperand(0);
7025 SDValue Y = ShiftOp.getOperand(1);
7026 SDValue X0, Z;
7027 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7028 LogicOp.getOperand(0).getOperand(1) == Y) {
7029 X0 = LogicOp.getOperand(0).getOperand(0);
7030 Z = LogicOp.getOperand(1);
7031 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7032 LogicOp.getOperand(1).getOperand(1) == Y) {
7033 X0 = LogicOp.getOperand(1).getOperand(0);
7034 Z = LogicOp.getOperand(0);
7035 } else {
7036 return SDValue();
7037 }
7038
7039 EVT VT = N->getValueType(0);
7040 SDLoc DL(N);
7041 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7042 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7043 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7044}
7045
7046/// Given a tree of logic operations with shape like
7047/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7048/// try to match and fold shift operations with the same shift amount.
7049/// For example:
7050/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7051/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7053 SDValue RightHand, SelectionDAG &DAG) {
7054 unsigned LogicOpcode = N->getOpcode();
7055 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7056 "Expected bitwise logic operation");
7057 if (LeftHand.getOpcode() != LogicOpcode ||
7058 RightHand.getOpcode() != LogicOpcode)
7059 return SDValue();
7060 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7061 return SDValue();
7062
7063 // Try to match one of following patterns:
7064 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7065 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7066 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7067 // itself.
7068 SDValue CombinedShifts, W;
7069 SDValue R0 = RightHand.getOperand(0);
7070 SDValue R1 = RightHand.getOperand(1);
7071 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7072 W = R1;
7073 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7074 W = R0;
7075 else
7076 return SDValue();
7077
7078 EVT VT = N->getValueType(0);
7079 SDLoc DL(N);
7080 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7081}
7082
7083SDValue DAGCombiner::visitAND(SDNode *N) {
7084 SDValue N0 = N->getOperand(0);
7085 SDValue N1 = N->getOperand(1);
7086 EVT VT = N1.getValueType();
7087 SDLoc DL(N);
7088
7089 // x & x --> x
7090 if (N0 == N1)
7091 return N0;
7092
7093 // fold (and c1, c2) -> c1&c2
7094 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7095 return C;
7096
7097 // canonicalize constant to RHS
7100 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7101
7102 if (areBitwiseNotOfEachother(N0, N1))
7103 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7104
7105 // fold vector ops
7106 if (VT.isVector()) {
7107 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7108 return FoldedVOp;
7109
7110 // fold (and x, 0) -> 0, vector edition
7112 // do not return N1, because undef node may exist in N1
7114 N1.getValueType());
7115
7116 // fold (and x, -1) -> x, vector edition
7118 return N0;
7119
7120 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7121 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7122 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7123 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7124 EVT LoadVT = MLoad->getMemoryVT();
7125 EVT ExtVT = VT;
7126 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7127 // For this AND to be a zero extension of the masked load the elements
7128 // of the BuildVec must mask the bottom bits of the extended element
7129 // type
7130 uint64_t ElementSize =
7132 if (Splat->getAPIntValue().isMask(ElementSize)) {
7133 SDValue NewLoad = DAG.getMaskedLoad(
7134 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7135 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7136 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7137 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7138 bool LoadHasOtherUsers = !N0.hasOneUse();
7139 CombineTo(N, NewLoad);
7140 if (LoadHasOtherUsers)
7141 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7142 return SDValue(N, 0);
7143 }
7144 }
7145 }
7146 }
7147
7148 // fold (and x, -1) -> x
7149 if (isAllOnesConstant(N1))
7150 return N0;
7151
7152 // if (and x, c) is known to be zero, return 0
7153 unsigned BitWidth = VT.getScalarSizeInBits();
7156 return DAG.getConstant(0, DL, VT);
7157
7158 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7159 return R;
7160
7161 if (SDValue NewSel = foldBinOpIntoSelect(N))
7162 return NewSel;
7163
7164 // reassociate and
7165 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7166 return RAND;
7167
7168 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7169 if (SDValue SD =
7170 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7171 return SD;
7172
7173 // fold (and (or x, C), D) -> D if (C & D) == D
7174 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7175 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7176 };
7177 if (N0.getOpcode() == ISD::OR &&
7178 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7179 return N1;
7180
7181 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7182 SDValue N0Op0 = N0.getOperand(0);
7183 EVT SrcVT = N0Op0.getValueType();
7184 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7185 APInt Mask = ~N1C->getAPIntValue();
7186 Mask = Mask.trunc(SrcBitWidth);
7187
7188 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7189 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7190 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7191
7192 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7193 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7194 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7195 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7196 TLI.isNarrowingProfitable(N, VT, SrcVT))
7197 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7198 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7199 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7200 }
7201
7202 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7203 if (ISD::isExtOpcode(N0.getOpcode())) {
7204 unsigned ExtOpc = N0.getOpcode();
7205 SDValue N0Op0 = N0.getOperand(0);
7206 if (N0Op0.getOpcode() == ISD::AND &&
7207 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7208 N0->hasOneUse() && N0Op0->hasOneUse()) {
7209 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7210 {N0Op0.getOperand(1)})) {
7211 if (SDValue NewMask =
7212 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7213 return DAG.getNode(ISD::AND, DL, VT,
7214 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7215 NewMask);
7216 }
7217 }
7218 }
7219 }
7220
7221 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7222 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7223 // already be zero by virtue of the width of the base type of the load.
7224 //
7225 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7226 // more cases.
7227 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7229 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7230 N0.getOperand(0).getResNo() == 0) ||
7231 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7232 auto *Load =
7233 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7234
7235 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7236 // This can be a pure constant or a vector splat, in which case we treat the
7237 // vector as a scalar and use the splat value.
7240 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7241 Constant = C->getAPIntValue();
7242 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7243 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7244 APInt SplatValue, SplatUndef;
7245 unsigned SplatBitSize;
7246 bool HasAnyUndefs;
7247 // Endianness should not matter here. Code below makes sure that we only
7248 // use the result if the SplatBitSize is a multiple of the vector element
7249 // size. And after that we AND all element sized parts of the splat
7250 // together. So the end result should be the same regardless of in which
7251 // order we do those operations.
7252 const bool IsBigEndian = false;
7253 bool IsSplat =
7254 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7255 HasAnyUndefs, EltBitWidth, IsBigEndian);
7256
7257 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7258 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7259 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7260 // Undef bits can contribute to a possible optimisation if set, so
7261 // set them.
7262 SplatValue |= SplatUndef;
7263
7264 // The splat value may be something like "0x00FFFFFF", which means 0 for
7265 // the first vector value and FF for the rest, repeating. We need a mask
7266 // that will apply equally to all members of the vector, so AND all the
7267 // lanes of the constant together.
7268 Constant = APInt::getAllOnes(EltBitWidth);
7269 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7270 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7271 }
7272 }
7273
7274 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7275 // actually legal and isn't going to get expanded, else this is a false
7276 // optimisation.
7277 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7278 Load->getValueType(0),
7279 Load->getMemoryVT());
7280
7281 // Resize the constant to the same size as the original memory access before
7282 // extension. If it is still the AllOnesValue then this AND is completely
7283 // unneeded.
7284 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7285
7286 bool B;
7287 switch (Load->getExtensionType()) {
7288 default: B = false; break;
7289 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7290 case ISD::ZEXTLOAD:
7291 case ISD::NON_EXTLOAD: B = true; break;
7292 }
7293
7294 if (B && Constant.isAllOnes()) {
7295 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7296 // preserve semantics once we get rid of the AND.
7297 SDValue NewLoad(Load, 0);
7298
7299 // Fold the AND away. NewLoad may get replaced immediately.
7300 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7301
7302 if (Load->getExtensionType() == ISD::EXTLOAD) {
7303 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7304 Load->getValueType(0), SDLoc(Load),
7305 Load->getChain(), Load->getBasePtr(),
7306 Load->getOffset(), Load->getMemoryVT(),
7307 Load->getMemOperand());
7308 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7309 if (Load->getNumValues() == 3) {
7310 // PRE/POST_INC loads have 3 values.
7311 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7312 NewLoad.getValue(2) };
7313 CombineTo(Load, To, 3, true);
7314 } else {
7315 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7316 }
7317 }
7318
7319 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7320 }
7321 }
7322
7323 // Try to convert a constant mask AND into a shuffle clear mask.
7324 if (VT.isVector())
7325 if (SDValue Shuffle = XformToShuffleWithZero(N))
7326 return Shuffle;
7327
7328 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7329 return Combined;
7330
7331 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7333 SDValue Ext = N0.getOperand(0);
7334 EVT ExtVT = Ext->getValueType(0);
7335 SDValue Extendee = Ext->getOperand(0);
7336
7337 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7338 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7339 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7340 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7341 // => (extract_subvector (iN_zeroext v))
7342 SDValue ZeroExtExtendee =
7343 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7344
7345 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7346 N0.getOperand(1));
7347 }
7348 }
7349
7350 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7351 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7352 EVT MemVT = GN0->getMemoryVT();
7353 EVT ScalarVT = MemVT.getScalarType();
7354
7355 if (SDValue(GN0, 0).hasOneUse() &&
7356 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7358 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7359 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7360
7361 SDValue ZExtLoad = DAG.getMaskedGather(
7362 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7363 GN0->getIndexType(), ISD::ZEXTLOAD);
7364
7365 CombineTo(N, ZExtLoad);
7366 AddToWorklist(ZExtLoad.getNode());
7367 // Avoid recheck of N.
7368 return SDValue(N, 0);
7369 }
7370 }
7371
7372 // fold (and (load x), 255) -> (zextload x, i8)
7373 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7374 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7375 if (SDValue Res = reduceLoadWidth(N))
7376 return Res;
7377
7378 if (LegalTypes) {
7379 // Attempt to propagate the AND back up to the leaves which, if they're
7380 // loads, can be combined to narrow loads and the AND node can be removed.
7381 // Perform after legalization so that extend nodes will already be
7382 // combined into the loads.
7383 if (BackwardsPropagateMask(N))
7384 return SDValue(N, 0);
7385 }
7386
7387 if (SDValue Combined = visitANDLike(N0, N1, N))
7388 return Combined;
7389
7390 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7391 if (N0.getOpcode() == N1.getOpcode())
7392 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7393 return V;
7394
7395 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7396 return R;
7397 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7398 return R;
7399
7400 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7401 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7402 SDValue X, Y, Z, NotY;
7403 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7404 if (sd_match(N,
7405 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7406 sd_match(NotY, m_Not(m_Value(Y))) &&
7407 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7408 return DAG.getNode(ISD::AND, DL, VT, X,
7409 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7410
7411 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7412 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7413 if (sd_match(N, m_And(m_Value(X),
7414 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7415 sd_match(NotY, m_Not(m_Value(Y))) &&
7416 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7417 return DAG.getNode(ISD::AND, DL, VT, X,
7418 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7419
7420 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7421 // If we are shifting down an extended sign bit, see if we can simplify
7422 // this to shifting the MSB directly to expose further simplifications.
7423 // This pattern often appears after sext_inreg legalization.
7424 APInt Amt;
7425 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7426 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7427 return DAG.getNode(ISD::SRL, DL, VT, X,
7428 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7429
7430 // Masking the negated extension of a boolean is just the zero-extended
7431 // boolean:
7432 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7433 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7434 //
7435 // Note: the SimplifyDemandedBits fold below can make an information-losing
7436 // transform, and then we have no way to find this better fold.
7437 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7438 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7439 X.getOperand(0).getScalarValueSizeInBits() == 1)
7440 return X;
7441 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7442 X.getOperand(0).getScalarValueSizeInBits() == 1)
7443 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7444 }
7445
7446 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7447 // fold (and (sra)) -> (and (srl)) when possible.
7449 return SDValue(N, 0);
7450
7451 // fold (zext_inreg (extload x)) -> (zextload x)
7452 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7453 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7454 (ISD::isEXTLoad(N0.getNode()) ||
7455 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7456 auto *LN0 = cast<LoadSDNode>(N0);
7457 EVT MemVT = LN0->getMemoryVT();
7458 // If we zero all the possible extended bits, then we can turn this into
7459 // a zextload if we are running before legalize or the operation is legal.
7460 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7461 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7462 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7463 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7464 ((!LegalOperations && LN0->isSimple()) ||
7465 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7466 SDValue ExtLoad =
7467 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7468 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7469 AddToWorklist(N);
7470 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7471 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7472 }
7473 }
7474
7475 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7476 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7477 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7478 N0.getOperand(1), false))
7479 return BSwap;
7480 }
7481
7482 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7483 return Shifts;
7484
7485 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7486 return V;
7487
7488 // Recognize the following pattern:
7489 //
7490 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7491 //
7492 // where bitmask is a mask that clears the upper bits of AndVT. The
7493 // number of bits in bitmask must be a power of two.
7494 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7495 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7496 return false;
7497
7498 auto *C = dyn_cast<ConstantSDNode>(RHS);
7499 if (!C)
7500 return false;
7501
7502 if (!C->getAPIntValue().isMask(
7503 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7504 return false;
7505
7506 return true;
7507 };
7508
7509 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7510 if (IsAndZeroExtMask(N0, N1))
7511 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7512
7513 if (hasOperation(ISD::USUBSAT, VT))
7514 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7515 return V;
7516
7517 // Postpone until legalization completed to avoid interference with bswap
7518 // folding
7519 if (LegalOperations || VT.isVector())
7520 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7521 return R;
7522
7523 return SDValue();
7524}
7525
7526/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7527SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7528 bool DemandHighBits) {
7529 if (!LegalOperations)
7530 return SDValue();
7531
7532 EVT VT = N->getValueType(0);
7533 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7534 return SDValue();
7536 return SDValue();
7537
7538 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7539 bool LookPassAnd0 = false;
7540 bool LookPassAnd1 = false;
7541 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7542 std::swap(N0, N1);
7543 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7544 std::swap(N0, N1);
7545 if (N0.getOpcode() == ISD::AND) {
7546 if (!N0->hasOneUse())
7547 return SDValue();
7548 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7549 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7550 // This is needed for X86.
7551 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7552 N01C->getZExtValue() != 0xFFFF))
7553 return SDValue();
7554 N0 = N0.getOperand(0);
7555 LookPassAnd0 = true;
7556 }
7557
7558 if (N1.getOpcode() == ISD::AND) {
7559 if (!N1->hasOneUse())
7560 return SDValue();
7561 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7562 if (!N11C || N11C->getZExtValue() != 0xFF)
7563 return SDValue();
7564 N1 = N1.getOperand(0);
7565 LookPassAnd1 = true;
7566 }
7567
7568 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7569 std::swap(N0, N1);
7570 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7571 return SDValue();
7572 if (!N0->hasOneUse() || !N1->hasOneUse())
7573 return SDValue();
7574
7575 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7576 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7577 if (!N01C || !N11C)
7578 return SDValue();
7579 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7580 return SDValue();
7581
7582 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7583 SDValue N00 = N0->getOperand(0);
7584 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7585 if (!N00->hasOneUse())
7586 return SDValue();
7587 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7588 if (!N001C || N001C->getZExtValue() != 0xFF)
7589 return SDValue();
7590 N00 = N00.getOperand(0);
7591 LookPassAnd0 = true;
7592 }
7593
7594 SDValue N10 = N1->getOperand(0);
7595 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7596 if (!N10->hasOneUse())
7597 return SDValue();
7598 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7599 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7600 // for X86.
7601 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7602 N101C->getZExtValue() != 0xFFFF))
7603 return SDValue();
7604 N10 = N10.getOperand(0);
7605 LookPassAnd1 = true;
7606 }
7607
7608 if (N00 != N10)
7609 return SDValue();
7610
7611 // Make sure everything beyond the low halfword gets set to zero since the SRL
7612 // 16 will clear the top bits.
7613 unsigned OpSizeInBits = VT.getSizeInBits();
7614 if (OpSizeInBits > 16) {
7615 // If the left-shift isn't masked out then the only way this is a bswap is
7616 // if all bits beyond the low 8 are 0. In that case the entire pattern
7617 // reduces to a left shift anyway: leave it for other parts of the combiner.
7618 if (DemandHighBits && !LookPassAnd0)
7619 return SDValue();
7620
7621 // However, if the right shift isn't masked out then it might be because
7622 // it's not needed. See if we can spot that too. If the high bits aren't
7623 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7624 // upper bits to be zero.
7625 if (!LookPassAnd1) {
7626 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7627 if (!DAG.MaskedValueIsZero(N10,
7628 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7629 return SDValue();
7630 }
7631 }
7632
7633 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7634 if (OpSizeInBits > 16) {
7635 SDLoc DL(N);
7636 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7637 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7638 }
7639 return Res;
7640}
7641
7642/// Return true if the specified node is an element that makes up a 32-bit
7643/// packed halfword byteswap.
7644/// ((x & 0x000000ff) << 8) |
7645/// ((x & 0x0000ff00) >> 8) |
7646/// ((x & 0x00ff0000) << 8) |
7647/// ((x & 0xff000000) >> 8)
7649 if (!N->hasOneUse())
7650 return false;
7651
7652 unsigned Opc = N.getOpcode();
7653 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7654 return false;
7655
7656 SDValue N0 = N.getOperand(0);
7657 unsigned Opc0 = N0.getOpcode();
7658 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7659 return false;
7660
7661 ConstantSDNode *N1C = nullptr;
7662 // SHL or SRL: look upstream for AND mask operand
7663 if (Opc == ISD::AND)
7664 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7665 else if (Opc0 == ISD::AND)
7666 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7667 if (!N1C)
7668 return false;
7669
7670 unsigned MaskByteOffset;
7671 switch (N1C->getZExtValue()) {
7672 default:
7673 return false;
7674 case 0xFF: MaskByteOffset = 0; break;
7675 case 0xFF00: MaskByteOffset = 1; break;
7676 case 0xFFFF:
7677 // In case demanded bits didn't clear the bits that will be shifted out.
7678 // This is needed for X86.
7679 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7680 MaskByteOffset = 1;
7681 break;
7682 }
7683 return false;
7684 case 0xFF0000: MaskByteOffset = 2; break;
7685 case 0xFF000000: MaskByteOffset = 3; break;
7686 }
7687
7688 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7689 if (Opc == ISD::AND) {
7690 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7691 // (x >> 8) & 0xff
7692 // (x >> 8) & 0xff0000
7693 if (Opc0 != ISD::SRL)
7694 return false;
7695 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7696 if (!C || C->getZExtValue() != 8)
7697 return false;
7698 } else {
7699 // (x << 8) & 0xff00
7700 // (x << 8) & 0xff000000
7701 if (Opc0 != ISD::SHL)
7702 return false;
7703 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7704 if (!C || C->getZExtValue() != 8)
7705 return false;
7706 }
7707 } else if (Opc == ISD::SHL) {
7708 // (x & 0xff) << 8
7709 // (x & 0xff0000) << 8
7710 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7711 return false;
7712 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7713 if (!C || C->getZExtValue() != 8)
7714 return false;
7715 } else { // Opc == ISD::SRL
7716 // (x & 0xff00) >> 8
7717 // (x & 0xff000000) >> 8
7718 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7719 return false;
7720 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7721 if (!C || C->getZExtValue() != 8)
7722 return false;
7723 }
7724
7725 if (Parts[MaskByteOffset])
7726 return false;
7727
7728 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7729 return true;
7730}
7731
7732// Match 2 elements of a packed halfword bswap.
7734 if (N.getOpcode() == ISD::OR)
7735 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7736 isBSwapHWordElement(N.getOperand(1), Parts);
7737
7738 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7739 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7740 if (!C || C->getAPIntValue() != 16)
7741 return false;
7742 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7743 return true;
7744 }
7745
7746 return false;
7747}
7748
7749// Match this pattern:
7750// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7751// And rewrite this to:
7752// (rotr (bswap A), 16)
7754 SelectionDAG &DAG, SDNode *N, SDValue N0,
7755 SDValue N1, EVT VT) {
7756 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7757 "MatchBSwapHWordOrAndAnd: expecting i32");
7758 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7759 return SDValue();
7760 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7761 return SDValue();
7762 // TODO: this is too restrictive; lifting this restriction requires more tests
7763 if (!N0->hasOneUse() || !N1->hasOneUse())
7764 return SDValue();
7767 if (!Mask0 || !Mask1)
7768 return SDValue();
7769 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7770 Mask1->getAPIntValue() != 0x00ff00ff)
7771 return SDValue();
7772 SDValue Shift0 = N0.getOperand(0);
7773 SDValue Shift1 = N1.getOperand(0);
7774 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7775 return SDValue();
7776 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7777 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7778 if (!ShiftAmt0 || !ShiftAmt1)
7779 return SDValue();
7780 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7781 return SDValue();
7782 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7783 return SDValue();
7784
7785 SDLoc DL(N);
7786 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7787 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7788 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7789}
7790
7791/// Match a 32-bit packed halfword bswap. That is
7792/// ((x & 0x000000ff) << 8) |
7793/// ((x & 0x0000ff00) >> 8) |
7794/// ((x & 0x00ff0000) << 8) |
7795/// ((x & 0xff000000) >> 8)
7796/// => (rotl (bswap x), 16)
7797SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7798 if (!LegalOperations)
7799 return SDValue();
7800
7801 EVT VT = N->getValueType(0);
7802 if (VT != MVT::i32)
7803 return SDValue();
7805 return SDValue();
7806
7807 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
7808 return BSwap;
7809
7810 // Try again with commuted operands.
7811 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
7812 return BSwap;
7813
7814
7815 // Look for either
7816 // (or (bswaphpair), (bswaphpair))
7817 // (or (or (bswaphpair), (and)), (and))
7818 // (or (or (and), (bswaphpair)), (and))
7819 SDNode *Parts[4] = {};
7820
7821 if (isBSwapHWordPair(N0, Parts)) {
7822 // (or (or (and), (and)), (or (and), (and)))
7823 if (!isBSwapHWordPair(N1, Parts))
7824 return SDValue();
7825 } else if (N0.getOpcode() == ISD::OR) {
7826 // (or (or (or (and), (and)), (and)), (and))
7827 if (!isBSwapHWordElement(N1, Parts))
7828 return SDValue();
7829 SDValue N00 = N0.getOperand(0);
7830 SDValue N01 = N0.getOperand(1);
7831 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7832 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7833 return SDValue();
7834 } else {
7835 return SDValue();
7836 }
7837
7838 // Make sure the parts are all coming from the same node.
7839 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7840 return SDValue();
7841
7842 SDLoc DL(N);
7843 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7844 SDValue(Parts[0], 0));
7845
7846 // Result of the bswap should be rotated by 16. If it's not legal, then
7847 // do (x << 16) | (x >> 16).
7848 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7850 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7852 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7853 return DAG.getNode(ISD::OR, DL, VT,
7854 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7855 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7856}
7857
7858/// This contains all DAGCombine rules which reduce two values combined by
7859/// an Or operation to a single value \see visitANDLike().
7860SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7861 EVT VT = N1.getValueType();
7862
7863 // fold (or x, undef) -> -1
7864 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7865 return DAG.getAllOnesConstant(DL, VT);
7866
7867 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7868 return V;
7869
7870 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7871 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7872 // Don't increase # computations.
7873 (N0->hasOneUse() || N1->hasOneUse())) {
7874 // We can only do this xform if we know that bits from X that are set in C2
7875 // but not in C1 are already zero. Likewise for Y.
7876 if (const ConstantSDNode *N0O1C =
7878 if (const ConstantSDNode *N1O1C =
7880 // We can only do this xform if we know that bits from X that are set in
7881 // C2 but not in C1 are already zero. Likewise for Y.
7882 const APInt &LHSMask = N0O1C->getAPIntValue();
7883 const APInt &RHSMask = N1O1C->getAPIntValue();
7884
7885 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7886 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7887 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7888 N0.getOperand(0), N1.getOperand(0));
7889 return DAG.getNode(ISD::AND, DL, VT, X,
7890 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7891 }
7892 }
7893 }
7894 }
7895
7896 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7897 if (N0.getOpcode() == ISD::AND &&
7898 N1.getOpcode() == ISD::AND &&
7899 N0.getOperand(0) == N1.getOperand(0) &&
7900 // Don't increase # computations.
7901 (N0->hasOneUse() || N1->hasOneUse())) {
7902 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7903 N0.getOperand(1), N1.getOperand(1));
7904 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7905 }
7906
7907 return SDValue();
7908}
7909
7910/// OR combines for which the commuted variant will be tried as well.
7912 SDNode *N) {
7913 EVT VT = N0.getValueType();
7914 unsigned BW = VT.getScalarSizeInBits();
7915 SDLoc DL(N);
7916
7917 auto peekThroughResize = [](SDValue V) {
7918 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7919 return V->getOperand(0);
7920 return V;
7921 };
7922
7923 SDValue N0Resized = peekThroughResize(N0);
7924 if (N0Resized.getOpcode() == ISD::AND) {
7925 SDValue N1Resized = peekThroughResize(N1);
7926 SDValue N00 = N0Resized.getOperand(0);
7927 SDValue N01 = N0Resized.getOperand(1);
7928
7929 // fold or (and x, y), x --> x
7930 if (N00 == N1Resized || N01 == N1Resized)
7931 return N1;
7932
7933 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7934 // TODO: Set AllowUndefs = true.
7935 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7936 /* AllowUndefs */ false)) {
7937 if (peekThroughResize(NotOperand) == N1Resized)
7938 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7939 N1);
7940 }
7941
7942 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7943 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7944 /* AllowUndefs */ false)) {
7945 if (peekThroughResize(NotOperand) == N1Resized)
7946 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7947 N1);
7948 }
7949 }
7950
7951 SDValue X, Y;
7952
7953 // fold or (xor X, N1), N1 --> or X, N1
7954 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7955 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7956
7957 // fold or (xor x, y), (x and/or y) --> or x, y
7958 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7959 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7961 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7962
7963 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7964 return R;
7965
7966 auto peekThroughZext = [](SDValue V) {
7967 if (V->getOpcode() == ISD::ZERO_EXTEND)
7968 return V->getOperand(0);
7969 return V;
7970 };
7971
7972 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7973 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7974 N0.getOperand(0) == N1.getOperand(0) &&
7975 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7976 return N0;
7977
7978 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7979 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7980 N0.getOperand(1) == N1.getOperand(0) &&
7981 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7982 return N0;
7983
7984 // Attempt to match a legalized build_pair-esque pattern:
7985 // or(shl(aext(Hi),BW/2),zext(Lo))
7986 SDValue Lo, Hi;
7987 if (sd_match(N0,
7989 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7990 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7991 Lo.getValueType() == Hi.getValueType()) {
7992 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7993 SDValue NotLo, NotHi;
7994 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7995 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7996 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7997 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7998 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7999 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8000 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8001 }
8002 }
8003
8004 return SDValue();
8005}
8006
8007SDValue DAGCombiner::visitOR(SDNode *N) {
8008 SDValue N0 = N->getOperand(0);
8009 SDValue N1 = N->getOperand(1);
8010 EVT VT = N1.getValueType();
8011 SDLoc DL(N);
8012
8013 // x | x --> x
8014 if (N0 == N1)
8015 return N0;
8016
8017 // fold (or c1, c2) -> c1|c2
8018 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8019 return C;
8020
8021 // canonicalize constant to RHS
8024 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8025
8026 // fold vector ops
8027 if (VT.isVector()) {
8028 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8029 return FoldedVOp;
8030
8031 // fold (or x, 0) -> x, vector edition
8033 return N0;
8034
8035 // fold (or x, -1) -> -1, vector edition
8037 // do not return N1, because undef node may exist in N1
8038 return DAG.getAllOnesConstant(DL, N1.getValueType());
8039
8040 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8041 // Do this only if the resulting type / shuffle is legal.
8042 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8043 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8044 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8045 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8046 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8047 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8048 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8049 // Ensure both shuffles have a zero input.
8050 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8051 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8052 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8053 bool CanFold = true;
8054 int NumElts = VT.getVectorNumElements();
8055 SmallVector<int, 4> Mask(NumElts, -1);
8056
8057 for (int i = 0; i != NumElts; ++i) {
8058 int M0 = SV0->getMaskElt(i);
8059 int M1 = SV1->getMaskElt(i);
8060
8061 // Determine if either index is pointing to a zero vector.
8062 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8063 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8064
8065 // If one element is zero and the otherside is undef, keep undef.
8066 // This also handles the case that both are undef.
8067 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8068 continue;
8069
8070 // Make sure only one of the elements is zero.
8071 if (M0Zero == M1Zero) {
8072 CanFold = false;
8073 break;
8074 }
8075
8076 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8077
8078 // We have a zero and non-zero element. If the non-zero came from
8079 // SV0 make the index a LHS index. If it came from SV1, make it
8080 // a RHS index. We need to mod by NumElts because we don't care
8081 // which operand it came from in the original shuffles.
8082 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8083 }
8084
8085 if (CanFold) {
8086 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8087 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8088 SDValue LegalShuffle =
8089 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8090 if (LegalShuffle)
8091 return LegalShuffle;
8092 }
8093 }
8094 }
8095 }
8096
8097 // fold (or x, 0) -> x
8098 if (isNullConstant(N1))
8099 return N0;
8100
8101 // fold (or x, -1) -> -1
8102 if (isAllOnesConstant(N1))
8103 return N1;
8104
8105 if (SDValue NewSel = foldBinOpIntoSelect(N))
8106 return NewSel;
8107
8108 // fold (or x, c) -> c iff (x & ~c) == 0
8109 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8110 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8111 return N1;
8112
8113 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8114 return R;
8115
8116 if (SDValue Combined = visitORLike(N0, N1, DL))
8117 return Combined;
8118
8119 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8120 return Combined;
8121
8122 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8123 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8124 return BSwap;
8125 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8126 return BSwap;
8127
8128 // reassociate or
8129 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8130 return ROR;
8131
8132 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8133 if (SDValue SD =
8134 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8135 return SD;
8136
8137 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8138 // iff (c1 & c2) != 0 or c1/c2 are undef.
8139 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8140 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8141 };
8142 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8143 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8144 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8145 {N1, N0.getOperand(1)})) {
8146 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8147 AddToWorklist(IOR.getNode());
8148 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8149 }
8150 }
8151
8152 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8153 return Combined;
8154 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8155 return Combined;
8156
8157 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8158 if (N0.getOpcode() == N1.getOpcode())
8159 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8160 return V;
8161
8162 // See if this is some rotate idiom.
8163 if (SDValue Rot = MatchRotate(N0, N1, DL))
8164 return Rot;
8165
8166 if (SDValue Load = MatchLoadCombine(N))
8167 return Load;
8168
8169 // Simplify the operands using demanded-bits information.
8171 return SDValue(N, 0);
8172
8173 // If OR can be rewritten into ADD, try combines based on ADD.
8174 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8175 DAG.isADDLike(SDValue(N, 0)))
8176 if (SDValue Combined = visitADDLike(N))
8177 return Combined;
8178
8179 // Postpone until legalization completed to avoid interference with bswap
8180 // folding
8181 if (LegalOperations || VT.isVector())
8182 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8183 return R;
8184
8185 return SDValue();
8186}
8187
8189 SDValue &Mask) {
8190 if (Op.getOpcode() == ISD::AND &&
8191 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8192 Mask = Op.getOperand(1);
8193 return Op.getOperand(0);
8194 }
8195 return Op;
8196}
8197
8198/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8199static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8200 SDValue &Mask) {
8201 Op = stripConstantMask(DAG, Op, Mask);
8202 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8203 Shift = Op;
8204 return true;
8205 }
8206 return false;
8207}
8208
8209/// Helper function for visitOR to extract the needed side of a rotate idiom
8210/// from a shl/srl/mul/udiv. This is meant to handle cases where
8211/// InstCombine merged some outside op with one of the shifts from
8212/// the rotate pattern.
8213/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8214/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8215/// patterns:
8216///
8217/// (or (add v v) (shrl v bitwidth-1)):
8218/// expands (add v v) -> (shl v 1)
8219///
8220/// (or (mul v c0) (shrl (mul v c1) c2)):
8221/// expands (mul v c0) -> (shl (mul v c1) c3)
8222///
8223/// (or (udiv v c0) (shl (udiv v c1) c2)):
8224/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8225///
8226/// (or (shl v c0) (shrl (shl v c1) c2)):
8227/// expands (shl v c0) -> (shl (shl v c1) c3)
8228///
8229/// (or (shrl v c0) (shl (shrl v c1) c2)):
8230/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8231///
8232/// Such that in all cases, c3+c2==bitwidth(op v c1).
8234 SDValue ExtractFrom, SDValue &Mask,
8235 const SDLoc &DL) {
8236 assert(OppShift && ExtractFrom && "Empty SDValue");
8237 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8238 return SDValue();
8239
8240 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8241
8242 // Value and Type of the shift.
8243 SDValue OppShiftLHS = OppShift.getOperand(0);
8244 EVT ShiftedVT = OppShiftLHS.getValueType();
8245
8246 // Amount of the existing shift.
8247 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8248
8249 // (add v v) -> (shl v 1)
8250 // TODO: Should this be a general DAG canonicalization?
8251 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8252 ExtractFrom.getOpcode() == ISD::ADD &&
8253 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8254 ExtractFrom.getOperand(0) == OppShiftLHS &&
8255 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8256 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8257 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8258
8259 // Preconditions:
8260 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8261 //
8262 // Find opcode of the needed shift to be extracted from (op0 v c0).
8263 unsigned Opcode = ISD::DELETED_NODE;
8264 bool IsMulOrDiv = false;
8265 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8266 // opcode or its arithmetic (mul or udiv) variant.
8267 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8268 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8269 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8270 return false;
8271 Opcode = NeededShift;
8272 return true;
8273 };
8274 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8275 // that the needed shift can be extracted from.
8276 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8277 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8278 return SDValue();
8279
8280 // op0 must be the same opcode on both sides, have the same LHS argument,
8281 // and produce the same value type.
8282 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8283 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8284 ShiftedVT != ExtractFrom.getValueType())
8285 return SDValue();
8286
8287 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8288 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8289 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8290 ConstantSDNode *ExtractFromCst =
8291 isConstOrConstSplat(ExtractFrom.getOperand(1));
8292 // TODO: We should be able to handle non-uniform constant vectors for these values
8293 // Check that we have constant values.
8294 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8295 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8296 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8297 return SDValue();
8298
8299 // Compute the shift amount we need to extract to complete the rotate.
8300 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8301 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8302 return SDValue();
8303 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8304 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8305 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8306 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8307 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8308
8309 // Now try extract the needed shift from the ExtractFrom op and see if the
8310 // result matches up with the existing shift's LHS op.
8311 if (IsMulOrDiv) {
8312 // Op to extract from is a mul or udiv by a constant.
8313 // Check:
8314 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8315 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8316 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8317 NeededShiftAmt.getZExtValue());
8318 APInt ResultAmt;
8319 APInt Rem;
8320 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8321 if (Rem != 0 || ResultAmt != OppLHSAmt)
8322 return SDValue();
8323 } else {
8324 // Op to extract from is a shift by a constant.
8325 // Check:
8326 // c2 - (bitwidth(op0 v c0) - c1) == c0
8327 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8328 ExtractFromAmt.getBitWidth()))
8329 return SDValue();
8330 }
8331
8332 // Return the expanded shift op that should allow a rotate to be formed.
8333 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8334 EVT ResVT = ExtractFrom.getValueType();
8335 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8336 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8337}
8338
8339// Return true if we can prove that, whenever Neg and Pos are both in the
8340// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8341// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8342//
8343// (or (shift1 X, Neg), (shift2 X, Pos))
8344//
8345// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8346// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8347// to consider shift amounts with defined behavior.
8348//
8349// The IsRotate flag should be set when the LHS of both shifts is the same.
8350// Otherwise if matching a general funnel shift, it should be clear.
8351static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8352 SelectionDAG &DAG, bool IsRotate) {
8353 const auto &TLI = DAG.getTargetLoweringInfo();
8354 // If EltSize is a power of 2 then:
8355 //
8356 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8357 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8358 //
8359 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8360 // for the stronger condition:
8361 //
8362 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8363 //
8364 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8365 // we can just replace Neg with Neg' for the rest of the function.
8366 //
8367 // In other cases we check for the even stronger condition:
8368 //
8369 // Neg == EltSize - Pos [B]
8370 //
8371 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8372 // behavior if Pos == 0 (and consequently Neg == EltSize).
8373 //
8374 // We could actually use [A] whenever EltSize is a power of 2, but the
8375 // only extra cases that it would match are those uninteresting ones
8376 // where Neg and Pos are never in range at the same time. E.g. for
8377 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8378 // as well as (sub 32, Pos), but:
8379 //
8380 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8381 //
8382 // always invokes undefined behavior for 32-bit X.
8383 //
8384 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8385 // This allows us to peek through any operations that only affect Mask's
8386 // un-demanded bits.
8387 //
8388 // NOTE: We can only do this when matching operations which won't modify the
8389 // least Log2(EltSize) significant bits and not a general funnel shift.
8390 unsigned MaskLoBits = 0;
8391 if (IsRotate && isPowerOf2_64(EltSize)) {
8392 unsigned Bits = Log2_64(EltSize);
8393 unsigned NegBits = Neg.getScalarValueSizeInBits();
8394 if (NegBits >= Bits) {
8395 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8396 if (SDValue Inner =
8398 Neg = Inner;
8399 MaskLoBits = Bits;
8400 }
8401 }
8402 }
8403
8404 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8405 if (Neg.getOpcode() != ISD::SUB)
8406 return false;
8408 if (!NegC)
8409 return false;
8410 SDValue NegOp1 = Neg.getOperand(1);
8411
8412 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8413 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8414 // are redundant for the purpose of the equality.
8415 if (MaskLoBits) {
8416 unsigned PosBits = Pos.getScalarValueSizeInBits();
8417 if (PosBits >= MaskLoBits) {
8418 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8419 if (SDValue Inner =
8421 Pos = Inner;
8422 }
8423 }
8424 }
8425
8426 // The condition we need is now:
8427 //
8428 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8429 //
8430 // If NegOp1 == Pos then we need:
8431 //
8432 // EltSize & Mask == NegC & Mask
8433 //
8434 // (because "x & Mask" is a truncation and distributes through subtraction).
8435 //
8436 // We also need to account for a potential truncation of NegOp1 if the amount
8437 // has already been legalized to a shift amount type.
8438 APInt Width;
8439 if ((Pos == NegOp1) ||
8440 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8441 Width = NegC->getAPIntValue();
8442
8443 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8444 // Then the condition we want to prove becomes:
8445 //
8446 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8447 //
8448 // which, again because "x & Mask" is a truncation, becomes:
8449 //
8450 // NegC & Mask == (EltSize - PosC) & Mask
8451 // EltSize & Mask == (NegC + PosC) & Mask
8452 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8453 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8454 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8455 else
8456 return false;
8457 } else
8458 return false;
8459
8460 // Now we just need to check that EltSize & Mask == Width & Mask.
8461 if (MaskLoBits)
8462 // EltSize & Mask is 0 since Mask is EltSize - 1.
8463 return Width.getLoBits(MaskLoBits) == 0;
8464 return Width == EltSize;
8465}
8466
8467// A subroutine of MatchRotate used once we have found an OR of two opposite
8468// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8469// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8470// former being preferred if supported. InnerPos and InnerNeg are Pos and
8471// Neg with outer conversions stripped away.
8472SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8473 SDValue Neg, SDValue InnerPos,
8474 SDValue InnerNeg, bool HasPos,
8475 unsigned PosOpcode, unsigned NegOpcode,
8476 const SDLoc &DL) {
8477 // fold (or (shl x, (*ext y)),
8478 // (srl x, (*ext (sub 32, y)))) ->
8479 // (rotl x, y) or (rotr x, (sub 32, y))
8480 //
8481 // fold (or (shl x, (*ext (sub 32, y))),
8482 // (srl x, (*ext y))) ->
8483 // (rotr x, y) or (rotl x, (sub 32, y))
8484 EVT VT = Shifted.getValueType();
8485 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8486 /*IsRotate*/ true)) {
8487 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8488 HasPos ? Pos : Neg);
8489 }
8490
8491 return SDValue();
8492}
8493
8494// A subroutine of MatchRotate used once we have found an OR of two opposite
8495// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8496// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8497// former being preferred if supported. InnerPos and InnerNeg are Pos and
8498// Neg with outer conversions stripped away.
8499// TODO: Merge with MatchRotatePosNeg.
8500SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8501 SDValue Neg, SDValue InnerPos,
8502 SDValue InnerNeg, bool HasPos,
8503 unsigned PosOpcode, unsigned NegOpcode,
8504 const SDLoc &DL) {
8505 EVT VT = N0.getValueType();
8506 unsigned EltBits = VT.getScalarSizeInBits();
8507
8508 // fold (or (shl x0, (*ext y)),
8509 // (srl x1, (*ext (sub 32, y)))) ->
8510 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8511 //
8512 // fold (or (shl x0, (*ext (sub 32, y))),
8513 // (srl x1, (*ext y))) ->
8514 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8515 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8516 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8517 HasPos ? Pos : Neg);
8518 }
8519
8520 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8521 // so for now just use the PosOpcode case if its legal.
8522 // TODO: When can we use the NegOpcode case?
8523 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8524 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8525 if (Op.getOpcode() != BinOpc)
8526 return false;
8527 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8528 return Cst && (Cst->getAPIntValue() == Imm);
8529 };
8530
8531 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8532 // -> (fshl x0, x1, y)
8533 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8534 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8535 InnerPos == InnerNeg.getOperand(0) &&
8537 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8538 }
8539
8540 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8541 // -> (fshr x0, x1, y)
8542 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8543 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8544 InnerNeg == InnerPos.getOperand(0) &&
8546 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8547 }
8548
8549 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8550 // -> (fshr x0, x1, y)
8551 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8552 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8553 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8554 InnerNeg == InnerPos.getOperand(0) &&
8556 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8557 }
8558 }
8559
8560 return SDValue();
8561}
8562
8563// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8564// idioms for rotate, and if the target supports rotation instructions, generate
8565// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8566// with different shifted sources.
8567SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8568 EVT VT = LHS.getValueType();
8569
8570 // The target must have at least one rotate/funnel flavor.
8571 // We still try to match rotate by constant pre-legalization.
8572 // TODO: Support pre-legalization funnel-shift by constant.
8573 bool HasROTL = hasOperation(ISD::ROTL, VT);
8574 bool HasROTR = hasOperation(ISD::ROTR, VT);
8575 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8576 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8577
8578 // If the type is going to be promoted and the target has enabled custom
8579 // lowering for rotate, allow matching rotate by non-constants. Only allow
8580 // this for scalar types.
8581 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8585 }
8586
8587 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8588 return SDValue();
8589
8590 // Check for truncated rotate.
8591 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8592 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8593 assert(LHS.getValueType() == RHS.getValueType());
8594 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8595 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8596 }
8597 }
8598
8599 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8600 SDValue LHSShift; // The shift.
8601 SDValue LHSMask; // AND value if any.
8602 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8603
8604 SDValue RHSShift; // The shift.
8605 SDValue RHSMask; // AND value if any.
8606 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8607
8608 // If neither side matched a rotate half, bail
8609 if (!LHSShift && !RHSShift)
8610 return SDValue();
8611
8612 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8613 // side of the rotate, so try to handle that here. In all cases we need to
8614 // pass the matched shift from the opposite side to compute the opcode and
8615 // needed shift amount to extract. We still want to do this if both sides
8616 // matched a rotate half because one half may be a potential overshift that
8617 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8618 // single one).
8619
8620 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8621 if (LHSShift)
8622 if (SDValue NewRHSShift =
8623 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8624 RHSShift = NewRHSShift;
8625 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8626 if (RHSShift)
8627 if (SDValue NewLHSShift =
8628 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8629 LHSShift = NewLHSShift;
8630
8631 // If a side is still missing, nothing else we can do.
8632 if (!RHSShift || !LHSShift)
8633 return SDValue();
8634
8635 // At this point we've matched or extracted a shift op on each side.
8636
8637 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8638 return SDValue(); // Shifts must disagree.
8639
8640 // Canonicalize shl to left side in a shl/srl pair.
8641 if (RHSShift.getOpcode() == ISD::SHL) {
8642 std::swap(LHS, RHS);
8643 std::swap(LHSShift, RHSShift);
8644 std::swap(LHSMask, RHSMask);
8645 }
8646
8647 // Something has gone wrong - we've lost the shl/srl pair - bail.
8648 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8649 return SDValue();
8650
8651 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8652 SDValue LHSShiftArg = LHSShift.getOperand(0);
8653 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8654 SDValue RHSShiftArg = RHSShift.getOperand(0);
8655 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8656
8657 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8659 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8660 };
8661
8662 auto ApplyMasks = [&](SDValue Res) {
8663 // If there is an AND of either shifted operand, apply it to the result.
8664 if (LHSMask.getNode() || RHSMask.getNode()) {
8667
8668 if (LHSMask.getNode()) {
8669 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8670 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8671 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8672 }
8673 if (RHSMask.getNode()) {
8674 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8675 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8676 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8677 }
8678
8679 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8680 }
8681
8682 return Res;
8683 };
8684
8685 // TODO: Support pre-legalization funnel-shift by constant.
8686 bool IsRotate = LHSShiftArg == RHSShiftArg;
8687 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8688 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8689 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8690 // Look for a disguised rotate by constant.
8691 // The common shifted operand X may be hidden inside another 'or'.
8692 SDValue X, Y;
8693 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8694 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8695 return false;
8696 if (CommonOp == Or.getOperand(0)) {
8697 X = CommonOp;
8698 Y = Or.getOperand(1);
8699 return true;
8700 }
8701 if (CommonOp == Or.getOperand(1)) {
8702 X = CommonOp;
8703 Y = Or.getOperand(0);
8704 return true;
8705 }
8706 return false;
8707 };
8708
8709 SDValue Res;
8710 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8711 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8712 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8713 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8714 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8715 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8716 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8717 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8718 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8719 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8720 } else {
8721 return SDValue();
8722 }
8723
8724 return ApplyMasks(Res);
8725 }
8726
8727 return SDValue(); // Requires funnel shift support.
8728 }
8729
8730 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8731 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8732 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8733 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8734 // iff C1+C2 == EltSizeInBits
8735 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8736 SDValue Res;
8737 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8738 bool UseROTL = !LegalOperations || HasROTL;
8739 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8740 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8741 } else {
8742 bool UseFSHL = !LegalOperations || HasFSHL;
8743 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8744 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8745 }
8746
8747 return ApplyMasks(Res);
8748 }
8749
8750 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8751 // shift.
8752 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8753 return SDValue();
8754
8755 // If there is a mask here, and we have a variable shift, we can't be sure
8756 // that we're masking out the right stuff.
8757 if (LHSMask.getNode() || RHSMask.getNode())
8758 return SDValue();
8759
8760 // If the shift amount is sign/zext/any-extended just peel it off.
8761 SDValue LExtOp0 = LHSShiftAmt;
8762 SDValue RExtOp0 = RHSShiftAmt;
8763 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8764 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8765 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8766 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8767 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8768 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8769 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8770 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8771 LExtOp0 = LHSShiftAmt.getOperand(0);
8772 RExtOp0 = RHSShiftAmt.getOperand(0);
8773 }
8774
8775 if (IsRotate && (HasROTL || HasROTR)) {
8776 SDValue TryL =
8777 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8778 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8779 if (TryL)
8780 return TryL;
8781
8782 SDValue TryR =
8783 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8784 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8785 if (TryR)
8786 return TryR;
8787 }
8788
8789 SDValue TryL =
8790 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8791 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8792 if (TryL)
8793 return TryL;
8794
8795 SDValue TryR =
8796 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8797 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8798 if (TryR)
8799 return TryR;
8800
8801 return SDValue();
8802}
8803
8804/// Recursively traverses the expression calculating the origin of the requested
8805/// byte of the given value. Returns std::nullopt if the provider can't be
8806/// calculated.
8807///
8808/// For all the values except the root of the expression, we verify that the
8809/// value has exactly one use and if not then return std::nullopt. This way if
8810/// the origin of the byte is returned it's guaranteed that the values which
8811/// contribute to the byte are not used outside of this expression.
8812
8813/// However, there is a special case when dealing with vector loads -- we allow
8814/// more than one use if the load is a vector type. Since the values that
8815/// contribute to the byte ultimately come from the ExtractVectorElements of the
8816/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8817/// because those operations are independent from the pattern to be combined.
8818/// For vector loads, we simply care that the ByteProviders are adjacent
8819/// positions of the same vector, and their index matches the byte that is being
8820/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8821/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8822/// byte position we are trying to provide for the LoadCombine. If these do
8823/// not match, then we can not combine the vector loads. \p Index uses the
8824/// byte position we are trying to provide for and is matched against the
8825/// shl and load size. The \p Index algorithm ensures the requested byte is
8826/// provided for by the pattern, and the pattern does not over provide bytes.
8827///
8828///
8829/// The supported LoadCombine pattern for vector loads is as follows
8830/// or
8831/// / \
8832/// or shl
8833/// / \ |
8834/// or shl zext
8835/// / \ | |
8836/// shl zext zext EVE*
8837/// | | | |
8838/// zext EVE* EVE* LOAD
8839/// | | |
8840/// EVE* LOAD LOAD
8841/// |
8842/// LOAD
8843///
8844/// *ExtractVectorElement
8846
8847static std::optional<SDByteProvider>
8848calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8849 std::optional<uint64_t> VectorIndex,
8850 unsigned StartingIndex = 0) {
8851
8852 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8853 if (Depth == 10)
8854 return std::nullopt;
8855
8856 // Only allow multiple uses if the instruction is a vector load (in which
8857 // case we will use the load for every ExtractVectorElement)
8858 if (Depth && !Op.hasOneUse() &&
8859 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8860 return std::nullopt;
8861
8862 // Fail to combine if we have encountered anything but a LOAD after handling
8863 // an ExtractVectorElement.
8864 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8865 return std::nullopt;
8866
8867 unsigned BitWidth = Op.getValueSizeInBits();
8868 if (BitWidth % 8 != 0)
8869 return std::nullopt;
8870 unsigned ByteWidth = BitWidth / 8;
8871 assert(Index < ByteWidth && "invalid index requested");
8872 (void) ByteWidth;
8873
8874 switch (Op.getOpcode()) {
8875 case ISD::OR: {
8876 auto LHS =
8877 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8878 if (!LHS)
8879 return std::nullopt;
8880 auto RHS =
8881 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8882 if (!RHS)
8883 return std::nullopt;
8884
8885 if (LHS->isConstantZero())
8886 return RHS;
8887 if (RHS->isConstantZero())
8888 return LHS;
8889 return std::nullopt;
8890 }
8891 case ISD::SHL: {
8892 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8893 if (!ShiftOp)
8894 return std::nullopt;
8895
8896 uint64_t BitShift = ShiftOp->getZExtValue();
8897
8898 if (BitShift % 8 != 0)
8899 return std::nullopt;
8900 uint64_t ByteShift = BitShift / 8;
8901
8902 // If we are shifting by an amount greater than the index we are trying to
8903 // provide, then do not provide anything. Otherwise, subtract the index by
8904 // the amount we shifted by.
8905 return Index < ByteShift
8907 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8908 Depth + 1, VectorIndex, Index);
8909 }
8910 case ISD::ANY_EXTEND:
8911 case ISD::SIGN_EXTEND:
8912 case ISD::ZERO_EXTEND: {
8913 SDValue NarrowOp = Op->getOperand(0);
8914 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8915 if (NarrowBitWidth % 8 != 0)
8916 return std::nullopt;
8917 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8918
8919 if (Index >= NarrowByteWidth)
8920 return Op.getOpcode() == ISD::ZERO_EXTEND
8921 ? std::optional<SDByteProvider>(
8923 : std::nullopt;
8924 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8925 StartingIndex);
8926 }
8927 case ISD::BSWAP:
8928 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8929 Depth + 1, VectorIndex, StartingIndex);
8931 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8932 if (!OffsetOp)
8933 return std::nullopt;
8934
8935 VectorIndex = OffsetOp->getZExtValue();
8936
8937 SDValue NarrowOp = Op->getOperand(0);
8938 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8939 if (NarrowBitWidth % 8 != 0)
8940 return std::nullopt;
8941 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8942 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8943 // type, leaving the high bits undefined.
8944 if (Index >= NarrowByteWidth)
8945 return std::nullopt;
8946
8947 // Check to see if the position of the element in the vector corresponds
8948 // with the byte we are trying to provide for. In the case of a vector of
8949 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8950 // the element will provide a range of bytes. For example, if we have a
8951 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8952 // 3).
8953 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8954 return std::nullopt;
8955 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8956 return std::nullopt;
8957
8958 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8959 VectorIndex, StartingIndex);
8960 }
8961 case ISD::LOAD: {
8962 auto L = cast<LoadSDNode>(Op.getNode());
8963 if (!L->isSimple() || L->isIndexed())
8964 return std::nullopt;
8965
8966 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8967 if (NarrowBitWidth % 8 != 0)
8968 return std::nullopt;
8969 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8970
8971 // If the width of the load does not reach byte we are trying to provide for
8972 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8973 // question
8974 if (Index >= NarrowByteWidth)
8975 return L->getExtensionType() == ISD::ZEXTLOAD
8976 ? std::optional<SDByteProvider>(
8978 : std::nullopt;
8979
8980 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8981 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8982 }
8983 }
8984
8985 return std::nullopt;
8986}
8987
8988static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8989 return i;
8990}
8991
8992static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8993 return BW - i - 1;
8994}
8995
8996// Check if the bytes offsets we are looking at match with either big or
8997// little endian value loaded. Return true for big endian, false for little
8998// endian, and std::nullopt if match failed.
8999static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9000 int64_t FirstOffset) {
9001 // The endian can be decided only when it is 2 bytes at least.
9002 unsigned Width = ByteOffsets.size();
9003 if (Width < 2)
9004 return std::nullopt;
9005
9006 bool BigEndian = true, LittleEndian = true;
9007 for (unsigned i = 0; i < Width; i++) {
9008 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9009 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9010 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9011 if (!BigEndian && !LittleEndian)
9012 return std::nullopt;
9013 }
9014
9015 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9016 "little endian");
9017 return BigEndian;
9018}
9019
9020// Look through one layer of truncate or extend.
9022 switch (Value.getOpcode()) {
9023 case ISD::TRUNCATE:
9024 case ISD::ZERO_EXTEND:
9025 case ISD::SIGN_EXTEND:
9026 case ISD::ANY_EXTEND:
9027 return Value.getOperand(0);
9028 }
9029 return SDValue();
9030}
9031
9032/// Match a pattern where a wide type scalar value is stored by several narrow
9033/// stores. Fold it into a single store or a BSWAP and a store if the targets
9034/// supports it.
9035///
9036/// Assuming little endian target:
9037/// i8 *p = ...
9038/// i32 val = ...
9039/// p[0] = (val >> 0) & 0xFF;
9040/// p[1] = (val >> 8) & 0xFF;
9041/// p[2] = (val >> 16) & 0xFF;
9042/// p[3] = (val >> 24) & 0xFF;
9043/// =>
9044/// *((i32)p) = val;
9045///
9046/// i8 *p = ...
9047/// i32 val = ...
9048/// p[0] = (val >> 24) & 0xFF;
9049/// p[1] = (val >> 16) & 0xFF;
9050/// p[2] = (val >> 8) & 0xFF;
9051/// p[3] = (val >> 0) & 0xFF;
9052/// =>
9053/// *((i32)p) = BSWAP(val);
9054SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9055 // The matching looks for "store (trunc x)" patterns that appear early but are
9056 // likely to be replaced by truncating store nodes during combining.
9057 // TODO: If there is evidence that running this later would help, this
9058 // limitation could be removed. Legality checks may need to be added
9059 // for the created store and optional bswap/rotate.
9060 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9061 return SDValue();
9062
9063 // We only handle merging simple stores of 1-4 bytes.
9064 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9065 EVT MemVT = N->getMemoryVT();
9066 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9067 !N->isSimple() || N->isIndexed())
9068 return SDValue();
9069
9070 // Collect all of the stores in the chain, upto the maximum store width (i64).
9071 SDValue Chain = N->getChain();
9073 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9074 unsigned MaxWideNumBits = 64;
9075 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9076 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9077 // All stores must be the same size to ensure that we are writing all of the
9078 // bytes in the wide value.
9079 // This store should have exactly one use as a chain operand for another
9080 // store in the merging set. If there are other chain uses, then the
9081 // transform may not be safe because order of loads/stores outside of this
9082 // set may not be preserved.
9083 // TODO: We could allow multiple sizes by tracking each stored byte.
9084 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9085 Store->isIndexed() || !Store->hasOneUse())
9086 return SDValue();
9087 Stores.push_back(Store);
9088 Chain = Store->getChain();
9089 if (MaxStores < Stores.size())
9090 return SDValue();
9091 }
9092 // There is no reason to continue if we do not have at least a pair of stores.
9093 if (Stores.size() < 2)
9094 return SDValue();
9095
9096 // Handle simple types only.
9097 LLVMContext &Context = *DAG.getContext();
9098 unsigned NumStores = Stores.size();
9099 unsigned WideNumBits = NumStores * NarrowNumBits;
9100 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9101 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
9102 return SDValue();
9103
9104 // Check if all bytes of the source value that we are looking at are stored
9105 // to the same base address. Collect offsets from Base address into OffsetMap.
9106 SDValue SourceValue;
9107 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9108 int64_t FirstOffset = INT64_MAX;
9109 StoreSDNode *FirstStore = nullptr;
9110 std::optional<BaseIndexOffset> Base;
9111 for (auto *Store : Stores) {
9112 // All the stores store different parts of the CombinedValue. A truncate is
9113 // required to get the partial value.
9114 SDValue Trunc = Store->getValue();
9115 if (Trunc.getOpcode() != ISD::TRUNCATE)
9116 return SDValue();
9117 // Other than the first/last part, a shift operation is required to get the
9118 // offset.
9119 int64_t Offset = 0;
9120 SDValue WideVal = Trunc.getOperand(0);
9121 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9122 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9123 // The shift amount must be a constant multiple of the narrow type.
9124 // It is translated to the offset address in the wide source value "y".
9125 //
9126 // x = srl y, ShiftAmtC
9127 // i8 z = trunc x
9128 // store z, ...
9129 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9130 if (ShiftAmtC % NarrowNumBits != 0)
9131 return SDValue();
9132
9133 // Make sure we aren't reading bits that are shifted in.
9134 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9135 return SDValue();
9136
9137 Offset = ShiftAmtC / NarrowNumBits;
9138 WideVal = WideVal.getOperand(0);
9139 }
9140
9141 // Stores must share the same source value with different offsets.
9142 if (!SourceValue)
9143 SourceValue = WideVal;
9144 else if (SourceValue != WideVal) {
9145 // Truncate and extends can be stripped to see if the values are related.
9146 if (stripTruncAndExt(SourceValue) != WideVal &&
9147 stripTruncAndExt(WideVal) != SourceValue)
9148 return SDValue();
9149
9150 if (WideVal.getScalarValueSizeInBits() >
9151 SourceValue.getScalarValueSizeInBits())
9152 SourceValue = WideVal;
9153
9154 // Give up if the source value type is smaller than the store size.
9155 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
9156 return SDValue();
9157 }
9158
9159 // Stores must share the same base address.
9161 int64_t ByteOffsetFromBase = 0;
9162 if (!Base)
9163 Base = Ptr;
9164 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9165 return SDValue();
9166
9167 // Remember the first store.
9168 if (ByteOffsetFromBase < FirstOffset) {
9169 FirstStore = Store;
9170 FirstOffset = ByteOffsetFromBase;
9171 }
9172 // Map the offset in the store and the offset in the combined value, and
9173 // early return if it has been set before.
9174 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9175 return SDValue();
9176 OffsetMap[Offset] = ByteOffsetFromBase;
9177 }
9178
9179 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9180 assert(FirstStore && "First store must be set");
9181
9182 // Check that a store of the wide type is both allowed and fast on the target
9183 const DataLayout &Layout = DAG.getDataLayout();
9184 unsigned Fast = 0;
9185 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9186 *FirstStore->getMemOperand(), &Fast);
9187 if (!Allowed || !Fast)
9188 return SDValue();
9189
9190 // Check if the pieces of the value are going to the expected places in memory
9191 // to merge the stores.
9192 auto checkOffsets = [&](bool MatchLittleEndian) {
9193 if (MatchLittleEndian) {
9194 for (unsigned i = 0; i != NumStores; ++i)
9195 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9196 return false;
9197 } else { // MatchBigEndian by reversing loop counter.
9198 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9199 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9200 return false;
9201 }
9202 return true;
9203 };
9204
9205 // Check if the offsets line up for the native data layout of this target.
9206 bool NeedBswap = false;
9207 bool NeedRotate = false;
9208 if (!checkOffsets(Layout.isLittleEndian())) {
9209 // Special-case: check if byte offsets line up for the opposite endian.
9210 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9211 NeedBswap = true;
9212 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9213 NeedRotate = true;
9214 else
9215 return SDValue();
9216 }
9217
9218 SDLoc DL(N);
9219 if (WideVT != SourceValue.getValueType()) {
9220 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9221 "Unexpected store value to merge");
9222 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9223 }
9224
9225 // Before legalize we can introduce illegal bswaps/rotates which will be later
9226 // converted to an explicit bswap sequence. This way we end up with a single
9227 // store and byte shuffling instead of several stores and byte shuffling.
9228 if (NeedBswap) {
9229 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9230 } else if (NeedRotate) {
9231 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9232 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9233 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9234 }
9235
9236 SDValue NewStore =
9237 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9238 FirstStore->getPointerInfo(), FirstStore->getAlign());
9239
9240 // Rely on other DAG combine rules to remove the other individual stores.
9241 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9242 return NewStore;
9243}
9244
9245/// Match a pattern where a wide type scalar value is loaded by several narrow
9246/// loads and combined by shifts and ors. Fold it into a single load or a load
9247/// and a BSWAP if the targets supports it.
9248///
9249/// Assuming little endian target:
9250/// i8 *a = ...
9251/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9252/// =>
9253/// i32 val = *((i32)a)
9254///
9255/// i8 *a = ...
9256/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9257/// =>
9258/// i32 val = BSWAP(*((i32)a))
9259///
9260/// TODO: This rule matches complex patterns with OR node roots and doesn't
9261/// interact well with the worklist mechanism. When a part of the pattern is
9262/// updated (e.g. one of the loads) its direct users are put into the worklist,
9263/// but the root node of the pattern which triggers the load combine is not
9264/// necessarily a direct user of the changed node. For example, once the address
9265/// of t28 load is reassociated load combine won't be triggered:
9266/// t25: i32 = add t4, Constant:i32<2>
9267/// t26: i64 = sign_extend t25
9268/// t27: i64 = add t2, t26
9269/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9270/// t29: i32 = zero_extend t28
9271/// t32: i32 = shl t29, Constant:i8<8>
9272/// t33: i32 = or t23, t32
9273/// As a possible fix visitLoad can check if the load can be a part of a load
9274/// combine pattern and add corresponding OR roots to the worklist.
9275SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9276 assert(N->getOpcode() == ISD::OR &&
9277 "Can only match load combining against OR nodes");
9278
9279 // Handles simple types only
9280 EVT VT = N->getValueType(0);
9281 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9282 return SDValue();
9283 unsigned ByteWidth = VT.getSizeInBits() / 8;
9284
9285 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9286 auto MemoryByteOffset = [&](SDByteProvider P) {
9287 assert(P.hasSrc() && "Must be a memory byte provider");
9288 auto *Load = cast<LoadSDNode>(P.Src.value());
9289
9290 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9291
9292 assert(LoadBitWidth % 8 == 0 &&
9293 "can only analyze providers for individual bytes not bit");
9294 unsigned LoadByteWidth = LoadBitWidth / 8;
9295 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9296 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9297 };
9298
9299 std::optional<BaseIndexOffset> Base;
9300 SDValue Chain;
9301
9303 std::optional<SDByteProvider> FirstByteProvider;
9304 int64_t FirstOffset = INT64_MAX;
9305
9306 // Check if all the bytes of the OR we are looking at are loaded from the same
9307 // base address. Collect bytes offsets from Base address in ByteOffsets.
9308 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9309 unsigned ZeroExtendedBytes = 0;
9310 for (int i = ByteWidth - 1; i >= 0; --i) {
9311 auto P =
9312 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9313 /*StartingIndex*/ i);
9314 if (!P)
9315 return SDValue();
9316
9317 if (P->isConstantZero()) {
9318 // It's OK for the N most significant bytes to be 0, we can just
9319 // zero-extend the load.
9320 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9321 return SDValue();
9322 continue;
9323 }
9324 assert(P->hasSrc() && "provenance should either be memory or zero");
9325 auto *L = cast<LoadSDNode>(P->Src.value());
9326
9327 // All loads must share the same chain
9328 SDValue LChain = L->getChain();
9329 if (!Chain)
9330 Chain = LChain;
9331 else if (Chain != LChain)
9332 return SDValue();
9333
9334 // Loads must share the same base address
9336 int64_t ByteOffsetFromBase = 0;
9337
9338 // For vector loads, the expected load combine pattern will have an
9339 // ExtractElement for each index in the vector. While each of these
9340 // ExtractElements will be accessing the same base address as determined
9341 // by the load instruction, the actual bytes they interact with will differ
9342 // due to different ExtractElement indices. To accurately determine the
9343 // byte position of an ExtractElement, we offset the base load ptr with
9344 // the index multiplied by the byte size of each element in the vector.
9345 if (L->getMemoryVT().isVector()) {
9346 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9347 if (LoadWidthInBit % 8 != 0)
9348 return SDValue();
9349 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9350 Ptr.addToOffset(ByteOffsetFromVector);
9351 }
9352
9353 if (!Base)
9354 Base = Ptr;
9355
9356 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9357 return SDValue();
9358
9359 // Calculate the offset of the current byte from the base address
9360 ByteOffsetFromBase += MemoryByteOffset(*P);
9361 ByteOffsets[i] = ByteOffsetFromBase;
9362
9363 // Remember the first byte load
9364 if (ByteOffsetFromBase < FirstOffset) {
9365 FirstByteProvider = P;
9366 FirstOffset = ByteOffsetFromBase;
9367 }
9368
9369 Loads.insert(L);
9370 }
9371
9372 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9373 "memory, so there must be at least one load which produces the value");
9374 assert(Base && "Base address of the accessed memory location must be set");
9375 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9376
9377 bool NeedsZext = ZeroExtendedBytes > 0;
9378
9379 EVT MemVT =
9380 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9381
9382 if (!MemVT.isSimple())
9383 return SDValue();
9384
9385 // Before legalize we can introduce too wide illegal loads which will be later
9386 // split into legal sized loads. This enables us to combine i64 load by i8
9387 // patterns to a couple of i32 loads on 32 bit targets.
9388 if (LegalOperations &&
9390 MemVT))
9391 return SDValue();
9392
9393 // Check if the bytes of the OR we are looking at match with either big or
9394 // little endian value load
9395 std::optional<bool> IsBigEndian = isBigEndian(
9396 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9397 if (!IsBigEndian)
9398 return SDValue();
9399
9400 assert(FirstByteProvider && "must be set");
9401
9402 // Ensure that the first byte is loaded from zero offset of the first load.
9403 // So the combined value can be loaded from the first load address.
9404 if (MemoryByteOffset(*FirstByteProvider) != 0)
9405 return SDValue();
9406 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9407
9408 // The node we are looking at matches with the pattern, check if we can
9409 // replace it with a single (possibly zero-extended) load and bswap + shift if
9410 // needed.
9411
9412 // If the load needs byte swap check if the target supports it
9413 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9414
9415 // Before legalize we can introduce illegal bswaps which will be later
9416 // converted to an explicit bswap sequence. This way we end up with a single
9417 // load and byte shuffling instead of several loads and byte shuffling.
9418 // We do not introduce illegal bswaps when zero-extending as this tends to
9419 // introduce too many arithmetic instructions.
9420 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9421 !TLI.isOperationLegal(ISD::BSWAP, VT))
9422 return SDValue();
9423
9424 // If we need to bswap and zero extend, we have to insert a shift. Check that
9425 // it is legal.
9426 if (NeedsBswap && NeedsZext && LegalOperations &&
9427 !TLI.isOperationLegal(ISD::SHL, VT))
9428 return SDValue();
9429
9430 // Check that a load of the wide type is both allowed and fast on the target
9431 unsigned Fast = 0;
9432 bool Allowed =
9433 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9434 *FirstLoad->getMemOperand(), &Fast);
9435 if (!Allowed || !Fast)
9436 return SDValue();
9437
9438 SDValue NewLoad =
9439 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9440 Chain, FirstLoad->getBasePtr(),
9441 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9442
9443 // Transfer chain users from old loads to the new load.
9444 for (LoadSDNode *L : Loads)
9445 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9446
9447 if (!NeedsBswap)
9448 return NewLoad;
9449
9450 SDValue ShiftedLoad =
9451 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9452 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9453 VT, SDLoc(N)))
9454 : NewLoad;
9455 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9456}
9457
9458// If the target has andn, bsl, or a similar bit-select instruction,
9459// we want to unfold masked merge, with canonical pattern of:
9460// | A | |B|
9461// ((x ^ y) & m) ^ y
9462// | D |
9463// Into:
9464// (x & m) | (y & ~m)
9465// If y is a constant, m is not a 'not', and the 'andn' does not work with
9466// immediates, we unfold into a different pattern:
9467// ~(~x & m) & (m | y)
9468// If x is a constant, m is a 'not', and the 'andn' does not work with
9469// immediates, we unfold into a different pattern:
9470// (x | ~m) & ~(~m & ~y)
9471// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9472// the very least that breaks andnpd / andnps patterns, and because those
9473// patterns are simplified in IR and shouldn't be created in the DAG
9474SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9475 assert(N->getOpcode() == ISD::XOR);
9476
9477 // Don't touch 'not' (i.e. where y = -1).
9478 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9479 return SDValue();
9480
9481 EVT VT = N->getValueType(0);
9482
9483 // There are 3 commutable operators in the pattern,
9484 // so we have to deal with 8 possible variants of the basic pattern.
9485 SDValue X, Y, M;
9486 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9487 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9488 return false;
9489 SDValue Xor = And.getOperand(XorIdx);
9490 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9491 return false;
9492 SDValue Xor0 = Xor.getOperand(0);
9493 SDValue Xor1 = Xor.getOperand(1);
9494 // Don't touch 'not' (i.e. where y = -1).
9495 if (isAllOnesOrAllOnesSplat(Xor1))
9496 return false;
9497 if (Other == Xor0)
9498 std::swap(Xor0, Xor1);
9499 if (Other != Xor1)
9500 return false;
9501 X = Xor0;
9502 Y = Xor1;
9503 M = And.getOperand(XorIdx ? 0 : 1);
9504 return true;
9505 };
9506
9507 SDValue N0 = N->getOperand(0);
9508 SDValue N1 = N->getOperand(1);
9509 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9510 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9511 return SDValue();
9512
9513 // Don't do anything if the mask is constant. This should not be reachable.
9514 // InstCombine should have already unfolded this pattern, and DAGCombiner
9515 // probably shouldn't produce it, too.
9516 if (isa<ConstantSDNode>(M.getNode()))
9517 return SDValue();
9518
9519 // We can transform if the target has AndNot
9520 if (!TLI.hasAndNot(M))
9521 return SDValue();
9522
9523 SDLoc DL(N);
9524
9525 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9526 // a bitwise not that would already allow ANDN to be used.
9527 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9528 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9529 // If not, we need to do a bit more work to make sure andn is still used.
9530 SDValue NotX = DAG.getNOT(DL, X, VT);
9531 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9532 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9533 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9534 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9535 }
9536
9537 // If X is a constant and M is a bitwise not, check that 'andn' works with
9538 // immediates.
9539 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9540 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9541 // If not, we need to do a bit more work to make sure andn is still used.
9542 SDValue NotM = M.getOperand(0);
9543 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9544 SDValue NotY = DAG.getNOT(DL, Y, VT);
9545 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9546 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9547 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9548 }
9549
9550 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9551 SDValue NotM = DAG.getNOT(DL, M, VT);
9552 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9553
9554 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9555}
9556
9557SDValue DAGCombiner::visitXOR(SDNode *N) {
9558 SDValue N0 = N->getOperand(0);
9559 SDValue N1 = N->getOperand(1);
9560 EVT VT = N0.getValueType();
9561 SDLoc DL(N);
9562
9563 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9564 if (N0.isUndef() && N1.isUndef())
9565 return DAG.getConstant(0, DL, VT);
9566
9567 // fold (xor x, undef) -> undef
9568 if (N0.isUndef())
9569 return N0;
9570 if (N1.isUndef())
9571 return N1;
9572
9573 // fold (xor c1, c2) -> c1^c2
9574 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9575 return C;
9576
9577 // canonicalize constant to RHS
9580 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9581
9582 // fold vector ops
9583 if (VT.isVector()) {
9584 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9585 return FoldedVOp;
9586
9587 // fold (xor x, 0) -> x, vector edition
9589 return N0;
9590 }
9591
9592 // fold (xor x, 0) -> x
9593 if (isNullConstant(N1))
9594 return N0;
9595
9596 if (SDValue NewSel = foldBinOpIntoSelect(N))
9597 return NewSel;
9598
9599 // reassociate xor
9600 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9601 return RXOR;
9602
9603 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9604 if (SDValue SD =
9605 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9606 return SD;
9607
9608 // fold (a^b) -> (a|b) iff a and b share no bits.
9609 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9610 DAG.haveNoCommonBitsSet(N0, N1))
9611 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9612
9613 // look for 'add-like' folds:
9614 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9615 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9617 if (SDValue Combined = visitADDLike(N))
9618 return Combined;
9619
9620 // fold !(x cc y) -> (x !cc y)
9621 unsigned N0Opcode = N0.getOpcode();
9622 SDValue LHS, RHS, CC;
9623 if (TLI.isConstTrueVal(N1) &&
9624 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9625 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9626 LHS.getValueType());
9627 if (!LegalOperations ||
9628 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9629 switch (N0Opcode) {
9630 default:
9631 llvm_unreachable("Unhandled SetCC Equivalent!");
9632 case ISD::SETCC:
9633 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9634 case ISD::SELECT_CC:
9635 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9636 N0.getOperand(3), NotCC);
9637 case ISD::STRICT_FSETCC:
9638 case ISD::STRICT_FSETCCS: {
9639 if (N0.hasOneUse()) {
9640 // FIXME Can we handle multiple uses? Could we token factor the chain
9641 // results from the new/old setcc?
9642 SDValue SetCC =
9643 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9644 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9645 CombineTo(N, SetCC);
9646 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9647 recursivelyDeleteUnusedNodes(N0.getNode());
9648 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9649 }
9650 break;
9651 }
9652 }
9653 }
9654 }
9655
9656 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9657 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9658 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9659 SDValue V = N0.getOperand(0);
9660 SDLoc DL0(N0);
9661 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9662 DAG.getConstant(1, DL0, V.getValueType()));
9663 AddToWorklist(V.getNode());
9664 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9665 }
9666
9667 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9668 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9669 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9670 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9671 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9672 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9673 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9674 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9675 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9676 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9677 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9678 }
9679 }
9680 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9681 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9682 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9683 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9684 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9685 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9686 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9687 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9688 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9689 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9690 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9691 }
9692 }
9693
9694 // fold (not (neg x)) -> (add X, -1)
9695 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9696 // Y is a constant or the subtract has a single use.
9697 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9698 isNullConstant(N0.getOperand(0))) {
9699 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9700 DAG.getAllOnesConstant(DL, VT));
9701 }
9702
9703 // fold (not (add X, -1)) -> (neg X)
9704 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9706 return DAG.getNegative(N0.getOperand(0), DL, VT);
9707 }
9708
9709 // fold (xor (and x, y), y) -> (and (not x), y)
9710 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9711 SDValue X = N0.getOperand(0);
9712 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9713 AddToWorklist(NotX.getNode());
9714 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9715 }
9716
9717 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9718 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9719 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9720 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9721 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9722 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9723 SDValue S0 = S.getOperand(0);
9724 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9726 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9727 return DAG.getNode(ISD::ABS, DL, VT, S0);
9728 }
9729 }
9730
9731 // fold (xor x, x) -> 0
9732 if (N0 == N1)
9733 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9734
9735 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9736 // Here is a concrete example of this equivalence:
9737 // i16 x == 14
9738 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9739 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9740 //
9741 // =>
9742 //
9743 // i16 ~1 == 0b1111111111111110
9744 // i16 rol(~1, 14) == 0b1011111111111111
9745 //
9746 // Some additional tips to help conceptualize this transform:
9747 // - Try to see the operation as placing a single zero in a value of all ones.
9748 // - There exists no value for x which would allow the result to contain zero.
9749 // - Values of x larger than the bitwidth are undefined and do not require a
9750 // consistent result.
9751 // - Pushing the zero left requires shifting one bits in from the right.
9752 // A rotate left of ~1 is a nice way of achieving the desired result.
9753 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9755 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
9756 N0.getOperand(1));
9757 }
9758
9759 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9760 if (N0Opcode == N1.getOpcode())
9761 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9762 return V;
9763
9764 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9765 return R;
9766 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9767 return R;
9768 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9769 return R;
9770
9771 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9772 if (SDValue MM = unfoldMaskedMerge(N))
9773 return MM;
9774
9775 // Simplify the expression using non-local knowledge.
9777 return SDValue(N, 0);
9778
9779 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9780 return Combined;
9781
9782 return SDValue();
9783}
9784
9785/// If we have a shift-by-constant of a bitwise logic op that itself has a
9786/// shift-by-constant operand with identical opcode, we may be able to convert
9787/// that into 2 independent shifts followed by the logic op. This is a
9788/// throughput improvement.
9790 // Match a one-use bitwise logic op.
9791 SDValue LogicOp = Shift->getOperand(0);
9792 if (!LogicOp.hasOneUse())
9793 return SDValue();
9794
9795 unsigned LogicOpcode = LogicOp.getOpcode();
9796 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9797 LogicOpcode != ISD::XOR)
9798 return SDValue();
9799
9800 // Find a matching one-use shift by constant.
9801 unsigned ShiftOpcode = Shift->getOpcode();
9802 SDValue C1 = Shift->getOperand(1);
9803 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9804 assert(C1Node && "Expected a shift with constant operand");
9805 const APInt &C1Val = C1Node->getAPIntValue();
9806 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9807 const APInt *&ShiftAmtVal) {
9808 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9809 return false;
9810
9811 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9812 if (!ShiftCNode)
9813 return false;
9814
9815 // Capture the shifted operand and shift amount value.
9816 ShiftOp = V.getOperand(0);
9817 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9818
9819 // Shift amount types do not have to match their operand type, so check that
9820 // the constants are the same width.
9821 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9822 return false;
9823
9824 // The fold is not valid if the sum of the shift values doesn't fit in the
9825 // given shift amount type.
9826 bool Overflow = false;
9827 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9828 if (Overflow)
9829 return false;
9830
9831 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9832 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9833 return false;
9834
9835 return true;
9836 };
9837
9838 // Logic ops are commutative, so check each operand for a match.
9839 SDValue X, Y;
9840 const APInt *C0Val;
9841 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9842 Y = LogicOp.getOperand(1);
9843 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9844 Y = LogicOp.getOperand(0);
9845 else
9846 return SDValue();
9847
9848 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9849 SDLoc DL(Shift);
9850 EVT VT = Shift->getValueType(0);
9851 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9852 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9853 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9854 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9855 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9856 LogicOp->getFlags());
9857}
9858
9859/// Handle transforms common to the three shifts, when the shift amount is a
9860/// constant.
9861/// We are looking for: (shift being one of shl/sra/srl)
9862/// shift (binop X, C0), C1
9863/// And want to transform into:
9864/// binop (shift X, C1), (shift C0, C1)
9865SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9866 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9867
9868 // Do not turn a 'not' into a regular xor.
9869 if (isBitwiseNot(N->getOperand(0)))
9870 return SDValue();
9871
9872 // The inner binop must be one-use, since we want to replace it.
9873 SDValue LHS = N->getOperand(0);
9874 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9875 return SDValue();
9876
9877 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9878 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9879 return R;
9880
9881 // We want to pull some binops through shifts, so that we have (and (shift))
9882 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9883 // thing happens with address calculations, so it's important to canonicalize
9884 // it.
9885 switch (LHS.getOpcode()) {
9886 default:
9887 return SDValue();
9888 case ISD::OR:
9889 case ISD::XOR:
9890 case ISD::AND:
9891 break;
9892 case ISD::ADD:
9893 if (N->getOpcode() != ISD::SHL)
9894 return SDValue(); // only shl(add) not sr[al](add).
9895 break;
9896 }
9897
9898 // FIXME: disable this unless the input to the binop is a shift by a constant
9899 // or is copy/select. Enable this in other cases when figure out it's exactly
9900 // profitable.
9901 SDValue BinOpLHSVal = LHS.getOperand(0);
9902 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9903 BinOpLHSVal.getOpcode() == ISD::SRA ||
9904 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9905 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9906 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9907 BinOpLHSVal.getOpcode() == ISD::SELECT;
9908
9909 if (!IsShiftByConstant && !IsCopyOrSelect)
9910 return SDValue();
9911
9912 if (IsCopyOrSelect && N->hasOneUse())
9913 return SDValue();
9914
9915 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9916 SDLoc DL(N);
9917 EVT VT = N->getValueType(0);
9918 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9919 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9920 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9921 N->getOperand(1));
9922 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9923 }
9924
9925 return SDValue();
9926}
9927
9928SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9929 assert(N->getOpcode() == ISD::TRUNCATE);
9930 assert(N->getOperand(0).getOpcode() == ISD::AND);
9931
9932 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9933 EVT TruncVT = N->getValueType(0);
9934 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9935 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9936 SDValue N01 = N->getOperand(0).getOperand(1);
9937 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9938 SDLoc DL(N);
9939 SDValue N00 = N->getOperand(0).getOperand(0);
9940 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9941 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9942 AddToWorklist(Trunc00.getNode());
9943 AddToWorklist(Trunc01.getNode());
9944 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9945 }
9946 }
9947
9948 return SDValue();
9949}
9950
9951SDValue DAGCombiner::visitRotate(SDNode *N) {
9952 SDLoc dl(N);
9953 SDValue N0 = N->getOperand(0);
9954 SDValue N1 = N->getOperand(1);
9955 EVT VT = N->getValueType(0);
9956 unsigned Bitsize = VT.getScalarSizeInBits();
9957
9958 // fold (rot x, 0) -> x
9959 if (isNullOrNullSplat(N1))
9960 return N0;
9961
9962 // fold (rot x, c) -> x iff (c % BitSize) == 0
9963 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9964 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9965 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9966 return N0;
9967 }
9968
9969 // fold (rot x, c) -> (rot x, c % BitSize)
9970 bool OutOfRange = false;
9971 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9972 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9973 return true;
9974 };
9975 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9976 EVT AmtVT = N1.getValueType();
9977 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9978 if (SDValue Amt =
9979 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9980 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9981 }
9982
9983 // rot i16 X, 8 --> bswap X
9984 auto *RotAmtC = isConstOrConstSplat(N1);
9985 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9986 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9987 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9988
9989 // Simplify the operands using demanded-bits information.
9991 return SDValue(N, 0);
9992
9993 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9994 if (N1.getOpcode() == ISD::TRUNCATE &&
9995 N1.getOperand(0).getOpcode() == ISD::AND) {
9996 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9997 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9998 }
9999
10000 unsigned NextOp = N0.getOpcode();
10001
10002 // fold (rot* (rot* x, c2), c1)
10003 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10004 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10005 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10007 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10008 EVT ShiftVT = N1.getValueType();
10009 bool SameSide = (N->getOpcode() == NextOp);
10010 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10011 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10012 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10013 {N1, BitsizeC});
10014 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10015 {N0.getOperand(1), BitsizeC});
10016 if (Norm1 && Norm2)
10017 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10018 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10019 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10020 {CombinedShift, BitsizeC});
10021 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10022 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10023 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10024 CombinedShiftNorm);
10025 }
10026 }
10027 }
10028 return SDValue();
10029}
10030
10031SDValue DAGCombiner::visitSHL(SDNode *N) {
10032 SDValue N0 = N->getOperand(0);
10033 SDValue N1 = N->getOperand(1);
10034 if (SDValue V = DAG.simplifyShift(N0, N1))
10035 return V;
10036
10037 SDLoc DL(N);
10038 EVT VT = N0.getValueType();
10039 EVT ShiftVT = N1.getValueType();
10040 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10041
10042 // fold (shl c1, c2) -> c1<<c2
10043 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10044 return C;
10045
10046 // fold vector ops
10047 if (VT.isVector()) {
10048 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10049 return FoldedVOp;
10050
10051 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10052 // If setcc produces all-one true value then:
10053 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10054 if (N1CV && N1CV->isConstant()) {
10055 if (N0.getOpcode() == ISD::AND) {
10056 SDValue N00 = N0->getOperand(0);
10057 SDValue N01 = N0->getOperand(1);
10058 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10059
10060 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10063 if (SDValue C =
10064 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10065 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10066 }
10067 }
10068 }
10069 }
10070
10071 if (SDValue NewSel = foldBinOpIntoSelect(N))
10072 return NewSel;
10073
10074 // if (shl x, c) is known to be zero, return 0
10075 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10076 return DAG.getConstant(0, DL, VT);
10077
10078 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10079 if (N1.getOpcode() == ISD::TRUNCATE &&
10080 N1.getOperand(0).getOpcode() == ISD::AND) {
10081 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10082 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10083 }
10084
10085 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10086 if (N0.getOpcode() == ISD::SHL) {
10087 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10089 APInt c1 = LHS->getAPIntValue();
10090 APInt c2 = RHS->getAPIntValue();
10091 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10092 return (c1 + c2).uge(OpSizeInBits);
10093 };
10094 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10095 return DAG.getConstant(0, DL, VT);
10096
10097 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10099 APInt c1 = LHS->getAPIntValue();
10100 APInt c2 = RHS->getAPIntValue();
10101 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10102 return (c1 + c2).ult(OpSizeInBits);
10103 };
10104 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10105 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10106 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10107 }
10108 }
10109
10110 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10111 // For this to be valid, the second form must not preserve any of the bits
10112 // that are shifted out by the inner shift in the first form. This means
10113 // the outer shift size must be >= the number of bits added by the ext.
10114 // As a corollary, we don't care what kind of ext it is.
10115 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10116 N0.getOpcode() == ISD::ANY_EXTEND ||
10117 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10118 N0.getOperand(0).getOpcode() == ISD::SHL) {
10119 SDValue N0Op0 = N0.getOperand(0);
10120 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10121 EVT InnerVT = N0Op0.getValueType();
10122 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10123
10124 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10126 APInt c1 = LHS->getAPIntValue();
10127 APInt c2 = RHS->getAPIntValue();
10128 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10129 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10130 (c1 + c2).uge(OpSizeInBits);
10131 };
10132 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10133 /*AllowUndefs*/ false,
10134 /*AllowTypeMismatch*/ true))
10135 return DAG.getConstant(0, DL, VT);
10136
10137 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10139 APInt c1 = LHS->getAPIntValue();
10140 APInt c2 = RHS->getAPIntValue();
10141 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10142 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10143 (c1 + c2).ult(OpSizeInBits);
10144 };
10145 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10146 /*AllowUndefs*/ false,
10147 /*AllowTypeMismatch*/ true)) {
10148 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10149 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10150 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10151 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10152 }
10153 }
10154
10155 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10156 // Only fold this if the inner zext has no other uses to avoid increasing
10157 // the total number of instructions.
10158 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10159 N0.getOperand(0).getOpcode() == ISD::SRL) {
10160 SDValue N0Op0 = N0.getOperand(0);
10161 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10162
10163 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10164 APInt c1 = LHS->getAPIntValue();
10165 APInt c2 = RHS->getAPIntValue();
10166 zeroExtendToMatch(c1, c2);
10167 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10168 };
10169 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10170 /*AllowUndefs*/ false,
10171 /*AllowTypeMismatch*/ true)) {
10172 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10173 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10174 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10175 AddToWorklist(NewSHL.getNode());
10176 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10177 }
10178 }
10179
10180 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10181 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10183 const APInt &LHSC = LHS->getAPIntValue();
10184 const APInt &RHSC = RHS->getAPIntValue();
10185 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10186 LHSC.getZExtValue() <= RHSC.getZExtValue();
10187 };
10188
10189 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10190 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10191 if (N0->getFlags().hasExact()) {
10192 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10193 /*AllowUndefs*/ false,
10194 /*AllowTypeMismatch*/ true)) {
10195 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10196 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10197 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10198 }
10199 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10200 /*AllowUndefs*/ false,
10201 /*AllowTypeMismatch*/ true)) {
10202 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10203 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10204 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10205 }
10206 }
10207
10208 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10209 // (and (srl x, (sub c1, c2), MASK)
10210 // Only fold this if the inner shift has no other uses -- if it does,
10211 // folding this will increase the total number of instructions.
10212 if (N0.getOpcode() == ISD::SRL &&
10213 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10215 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10216 /*AllowUndefs*/ false,
10217 /*AllowTypeMismatch*/ true)) {
10218 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10219 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10220 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10221 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10222 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10223 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10224 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10225 }
10226 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10227 /*AllowUndefs*/ false,
10228 /*AllowTypeMismatch*/ true)) {
10229 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10230 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10231 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10232 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10233 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10234 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10235 }
10236 }
10237 }
10238
10239 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10240 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10241 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10242 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10243 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10244 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10245 }
10246
10247 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10248 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10249 // Variant of version done on multiply, except mul by a power of 2 is turned
10250 // into a shift.
10251 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10252 TLI.isDesirableToCommuteWithShift(N, Level)) {
10253 SDValue N01 = N0.getOperand(1);
10254 if (SDValue Shl1 =
10255 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10256 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10257 AddToWorklist(Shl0.getNode());
10259 // Preserve the disjoint flag for Or.
10260 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10262 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10263 }
10264 }
10265
10266 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10267 // TODO: Add zext/add_nuw variant with suitable test coverage
10268 // TODO: Should we limit this with isLegalAddImmediate?
10269 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10270 N0.getOperand(0).getOpcode() == ISD::ADD &&
10271 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10272 TLI.isDesirableToCommuteWithShift(N, Level)) {
10273 SDValue Add = N0.getOperand(0);
10274 SDLoc DL(N0);
10275 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10276 {Add.getOperand(1)})) {
10277 if (SDValue ShlC =
10278 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10279 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10280 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10281 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10282 }
10283 }
10284 }
10285
10286 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10287 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10288 SDValue N01 = N0.getOperand(1);
10289 if (SDValue Shl =
10290 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10291 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10292 }
10293
10295 if (N1C && !N1C->isOpaque())
10296 if (SDValue NewSHL = visitShiftByConstant(N))
10297 return NewSHL;
10298
10299 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10300 // target.
10301 if (((N1.getOpcode() == ISD::CTTZ &&
10302 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10303 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10304 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10306 SDValue Y = N1.getOperand(0);
10307 SDLoc DL(N);
10308 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10309 SDValue And =
10310 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10311 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10312 }
10313
10315 return SDValue(N, 0);
10316
10317 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10318 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10319 const APInt &C0 = N0.getConstantOperandAPInt(0);
10320 const APInt &C1 = N1C->getAPIntValue();
10321 return DAG.getVScale(DL, VT, C0 << C1);
10322 }
10323
10324 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10325 APInt ShlVal;
10326 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10327 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10328 const APInt &C0 = N0.getConstantOperandAPInt(0);
10329 if (ShlVal.ult(C0.getBitWidth())) {
10330 APInt NewStep = C0 << ShlVal;
10331 return DAG.getStepVector(DL, VT, NewStep);
10332 }
10333 }
10334
10335 return SDValue();
10336}
10337
10338// Transform a right shift of a multiply into a multiply-high.
10339// Examples:
10340// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10341// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10343 const TargetLowering &TLI) {
10344 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10345 "SRL or SRA node is required here!");
10346
10347 // Check the shift amount. Proceed with the transformation if the shift
10348 // amount is constant.
10349 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10350 if (!ShiftAmtSrc)
10351 return SDValue();
10352
10353 // The operation feeding into the shift must be a multiply.
10354 SDValue ShiftOperand = N->getOperand(0);
10355 if (ShiftOperand.getOpcode() != ISD::MUL)
10356 return SDValue();
10357
10358 // Both operands must be equivalent extend nodes.
10359 SDValue LeftOp = ShiftOperand.getOperand(0);
10360 SDValue RightOp = ShiftOperand.getOperand(1);
10361
10362 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10363 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10364
10365 if (!IsSignExt && !IsZeroExt)
10366 return SDValue();
10367
10368 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10369 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10370
10371 // return true if U may use the lower bits of its operands
10372 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10373 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10374 return true;
10375 }
10376 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10377 if (!UShiftAmtSrc) {
10378 return true;
10379 }
10380 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10381 return UShiftAmt < NarrowVTSize;
10382 };
10383
10384 // If the lower part of the MUL is also used and MUL_LOHI is supported
10385 // do not introduce the MULH in favor of MUL_LOHI
10386 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10387 if (!ShiftOperand.hasOneUse() &&
10388 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10389 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10390 return SDValue();
10391 }
10392
10393 SDValue MulhRightOp;
10395 unsigned ActiveBits = IsSignExt
10396 ? Constant->getAPIntValue().getSignificantBits()
10397 : Constant->getAPIntValue().getActiveBits();
10398 if (ActiveBits > NarrowVTSize)
10399 return SDValue();
10400 MulhRightOp = DAG.getConstant(
10401 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10402 NarrowVT);
10403 } else {
10404 if (LeftOp.getOpcode() != RightOp.getOpcode())
10405 return SDValue();
10406 // Check that the two extend nodes are the same type.
10407 if (NarrowVT != RightOp.getOperand(0).getValueType())
10408 return SDValue();
10409 MulhRightOp = RightOp.getOperand(0);
10410 }
10411
10412 EVT WideVT = LeftOp.getValueType();
10413 // Proceed with the transformation if the wide types match.
10414 assert((WideVT == RightOp.getValueType()) &&
10415 "Cannot have a multiply node with two different operand types.");
10416
10417 // Proceed with the transformation if the wide type is twice as large
10418 // as the narrow type.
10419 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10420 return SDValue();
10421
10422 // Check the shift amount with the narrow type size.
10423 // Proceed with the transformation if the shift amount is the width
10424 // of the narrow type.
10425 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10426 if (ShiftAmt != NarrowVTSize)
10427 return SDValue();
10428
10429 // If the operation feeding into the MUL is a sign extend (sext),
10430 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10431 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10432
10433 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10434 // or if it is a vector type then we could transform to an acceptable type and
10435 // rely on legalization to split/combine the result.
10436 if (NarrowVT.isVector()) {
10437 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10438 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10439 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10440 return SDValue();
10441 } else {
10442 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10443 return SDValue();
10444 }
10445
10446 SDValue Result =
10447 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10448 bool IsSigned = N->getOpcode() == ISD::SRA;
10449 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10450}
10451
10452// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10453// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10455 unsigned Opcode = N->getOpcode();
10456 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10457 return SDValue();
10458
10459 SDValue N0 = N->getOperand(0);
10460 EVT VT = N->getValueType(0);
10461 SDLoc DL(N);
10462 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10463 SDValue OldLHS = N0.getOperand(0);
10464 SDValue OldRHS = N0.getOperand(1);
10465
10466 // If both operands are bswap/bitreverse, ignore the multiuse
10467 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10468 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10469 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10470 OldRHS.getOperand(0));
10471 }
10472
10473 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10474 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10475 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10476 NewBitReorder);
10477 }
10478
10479 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10480 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10481 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10482 OldRHS.getOperand(0));
10483 }
10484 }
10485 return SDValue();
10486}
10487
10488SDValue DAGCombiner::visitSRA(SDNode *N) {
10489 SDValue N0 = N->getOperand(0);
10490 SDValue N1 = N->getOperand(1);
10491 if (SDValue V = DAG.simplifyShift(N0, N1))
10492 return V;
10493
10494 SDLoc DL(N);
10495 EVT VT = N0.getValueType();
10496 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10497
10498 // fold (sra c1, c2) -> (sra c1, c2)
10499 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10500 return C;
10501
10502 // Arithmetic shifting an all-sign-bit value is a no-op.
10503 // fold (sra 0, x) -> 0
10504 // fold (sra -1, x) -> -1
10505 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10506 return N0;
10507
10508 // fold vector ops
10509 if (VT.isVector())
10510 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10511 return FoldedVOp;
10512
10513 if (SDValue NewSel = foldBinOpIntoSelect(N))
10514 return NewSel;
10515
10517
10518 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10519 // clamp (add c1, c2) to max shift.
10520 if (N0.getOpcode() == ISD::SRA) {
10521 EVT ShiftVT = N1.getValueType();
10522 EVT ShiftSVT = ShiftVT.getScalarType();
10523 SmallVector<SDValue, 16> ShiftValues;
10524
10525 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10526 APInt c1 = LHS->getAPIntValue();
10527 APInt c2 = RHS->getAPIntValue();
10528 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10529 APInt Sum = c1 + c2;
10530 unsigned ShiftSum =
10531 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10532 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10533 return true;
10534 };
10535 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10536 SDValue ShiftValue;
10537 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10538 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10539 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10540 assert(ShiftValues.size() == 1 &&
10541 "Expected matchBinaryPredicate to return one element for "
10542 "SPLAT_VECTORs");
10543 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10544 } else
10545 ShiftValue = ShiftValues[0];
10546 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10547 }
10548 }
10549
10550 // fold (sra (shl X, m), (sub result_size, n))
10551 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10552 // result_size - n != m.
10553 // If truncate is free for the target sext(shl) is likely to result in better
10554 // code.
10555 if (N0.getOpcode() == ISD::SHL && N1C) {
10556 // Get the two constants of the shifts, CN0 = m, CN = n.
10557 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10558 if (N01C) {
10559 LLVMContext &Ctx = *DAG.getContext();
10560 // Determine what the truncate's result bitsize and type would be.
10561 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10562
10563 if (VT.isVector())
10564 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10565
10566 // Determine the residual right-shift amount.
10567 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10568
10569 // If the shift is not a no-op (in which case this should be just a sign
10570 // extend already), the truncated to type is legal, sign_extend is legal
10571 // on that type, and the truncate to that type is both legal and free,
10572 // perform the transform.
10573 if ((ShiftAmt > 0) &&
10576 TLI.isTruncateFree(VT, TruncVT)) {
10577 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10578 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10579 N0.getOperand(0), Amt);
10580 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10581 Shift);
10582 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10583 N->getValueType(0), Trunc);
10584 }
10585 }
10586 }
10587
10588 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10589 // sra (add (shl X, N1C), AddC), N1C -->
10590 // sext (add (trunc X to (width - N1C)), AddC')
10591 // sra (sub AddC, (shl X, N1C)), N1C -->
10592 // sext (sub AddC1',(trunc X to (width - N1C)))
10593 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10594 N0.hasOneUse()) {
10595 bool IsAdd = N0.getOpcode() == ISD::ADD;
10596 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10597 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10598 Shl.hasOneUse()) {
10599 // TODO: AddC does not need to be a splat.
10600 if (ConstantSDNode *AddC =
10601 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10602 // Determine what the truncate's type would be and ask the target if
10603 // that is a free operation.
10604 LLVMContext &Ctx = *DAG.getContext();
10605 unsigned ShiftAmt = N1C->getZExtValue();
10606 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10607 if (VT.isVector())
10608 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10609
10610 // TODO: The simple type check probably belongs in the default hook
10611 // implementation and/or target-specific overrides (because
10612 // non-simple types likely require masking when legalized), but
10613 // that restriction may conflict with other transforms.
10614 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10615 TLI.isTruncateFree(VT, TruncVT)) {
10616 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10617 SDValue ShiftC =
10618 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10619 TruncVT.getScalarSizeInBits()),
10620 DL, TruncVT);
10621 SDValue Add;
10622 if (IsAdd)
10623 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10624 else
10625 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10626 return DAG.getSExtOrTrunc(Add, DL, VT);
10627 }
10628 }
10629 }
10630 }
10631
10632 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10633 if (N1.getOpcode() == ISD::TRUNCATE &&
10634 N1.getOperand(0).getOpcode() == ISD::AND) {
10635 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10636 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10637 }
10638
10639 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10640 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10641 // if c1 is equal to the number of bits the trunc removes
10642 // TODO - support non-uniform vector shift amounts.
10643 if (N0.getOpcode() == ISD::TRUNCATE &&
10644 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10645 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10646 N0.getOperand(0).hasOneUse() &&
10647 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10648 SDValue N0Op0 = N0.getOperand(0);
10649 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10650 EVT LargeVT = N0Op0.getValueType();
10651 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10652 if (LargeShift->getAPIntValue() == TruncBits) {
10653 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10654 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10655 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10656 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10657 SDValue SRA =
10658 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10659 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10660 }
10661 }
10662 }
10663
10664 // Simplify, based on bits shifted out of the LHS.
10666 return SDValue(N, 0);
10667
10668 // If the sign bit is known to be zero, switch this to a SRL.
10669 if (DAG.SignBitIsZero(N0))
10670 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10671
10672 if (N1C && !N1C->isOpaque())
10673 if (SDValue NewSRA = visitShiftByConstant(N))
10674 return NewSRA;
10675
10676 // Try to transform this shift into a multiply-high if
10677 // it matches the appropriate pattern detected in combineShiftToMULH.
10678 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10679 return MULH;
10680
10681 // Attempt to convert a sra of a load into a narrower sign-extending load.
10682 if (SDValue NarrowLoad = reduceLoadWidth(N))
10683 return NarrowLoad;
10684
10685 if (SDValue AVG = foldShiftToAvg(N))
10686 return AVG;
10687
10688 return SDValue();
10689}
10690
10691SDValue DAGCombiner::visitSRL(SDNode *N) {
10692 SDValue N0 = N->getOperand(0);
10693 SDValue N1 = N->getOperand(1);
10694 if (SDValue V = DAG.simplifyShift(N0, N1))
10695 return V;
10696
10697 SDLoc DL(N);
10698 EVT VT = N0.getValueType();
10699 EVT ShiftVT = N1.getValueType();
10700 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10701
10702 // fold (srl c1, c2) -> c1 >>u c2
10703 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10704 return C;
10705
10706 // fold vector ops
10707 if (VT.isVector())
10708 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10709 return FoldedVOp;
10710
10711 if (SDValue NewSel = foldBinOpIntoSelect(N))
10712 return NewSel;
10713
10714 // if (srl x, c) is known to be zero, return 0
10716 if (N1C &&
10717 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10718 return DAG.getConstant(0, DL, VT);
10719
10720 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10721 if (N0.getOpcode() == ISD::SRL) {
10722 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10724 APInt c1 = LHS->getAPIntValue();
10725 APInt c2 = RHS->getAPIntValue();
10726 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10727 return (c1 + c2).uge(OpSizeInBits);
10728 };
10729 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10730 return DAG.getConstant(0, DL, VT);
10731
10732 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10734 APInt c1 = LHS->getAPIntValue();
10735 APInt c2 = RHS->getAPIntValue();
10736 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10737 return (c1 + c2).ult(OpSizeInBits);
10738 };
10739 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10740 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10741 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10742 }
10743 }
10744
10745 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10746 N0.getOperand(0).getOpcode() == ISD::SRL) {
10747 SDValue InnerShift = N0.getOperand(0);
10748 // TODO - support non-uniform vector shift amounts.
10749 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10750 uint64_t c1 = N001C->getZExtValue();
10751 uint64_t c2 = N1C->getZExtValue();
10752 EVT InnerShiftVT = InnerShift.getValueType();
10753 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10754 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10755 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10756 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10757 if (c1 + OpSizeInBits == InnerShiftSize) {
10758 if (c1 + c2 >= InnerShiftSize)
10759 return DAG.getConstant(0, DL, VT);
10760 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10761 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10762 InnerShift.getOperand(0), NewShiftAmt);
10763 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10764 }
10765 // In the more general case, we can clear the high bits after the shift:
10766 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10767 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10768 c1 + c2 < InnerShiftSize) {
10769 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10770 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10771 InnerShift.getOperand(0), NewShiftAmt);
10772 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10773 OpSizeInBits - c2),
10774 DL, InnerShiftVT);
10775 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10776 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10777 }
10778 }
10779 }
10780
10781 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10782 // (and (srl x, (sub c2, c1), MASK)
10783 if (N0.getOpcode() == ISD::SHL &&
10784 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10786 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10788 const APInt &LHSC = LHS->getAPIntValue();
10789 const APInt &RHSC = RHS->getAPIntValue();
10790 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10791 LHSC.getZExtValue() <= RHSC.getZExtValue();
10792 };
10793 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10794 /*AllowUndefs*/ false,
10795 /*AllowTypeMismatch*/ true)) {
10796 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10797 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10798 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10799 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10800 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10801 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10802 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10803 }
10804 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10805 /*AllowUndefs*/ false,
10806 /*AllowTypeMismatch*/ true)) {
10807 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10808 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10809 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10810 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10811 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10812 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10813 }
10814 }
10815
10816 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10817 // TODO - support non-uniform vector shift amounts.
10818 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10819 // Shifting in all undef bits?
10820 EVT SmallVT = N0.getOperand(0).getValueType();
10821 unsigned BitSize = SmallVT.getScalarSizeInBits();
10822 if (N1C->getAPIntValue().uge(BitSize))
10823 return DAG.getUNDEF(VT);
10824
10825 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10826 uint64_t ShiftAmt = N1C->getZExtValue();
10827 SDLoc DL0(N0);
10828 SDValue SmallShift =
10829 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
10830 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
10831 AddToWorklist(SmallShift.getNode());
10832 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10833 return DAG.getNode(ISD::AND, DL, VT,
10834 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10835 DAG.getConstant(Mask, DL, VT));
10836 }
10837 }
10838
10839 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10840 // bit, which is unmodified by sra.
10841 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10842 if (N0.getOpcode() == ISD::SRA)
10843 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10844 }
10845
10846 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10847 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10848 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10849 isPowerOf2_32(OpSizeInBits) &&
10850 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10851 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10852
10853 // If any of the input bits are KnownOne, then the input couldn't be all
10854 // zeros, thus the result of the srl will always be zero.
10855 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10856
10857 // If all of the bits input the to ctlz node are known to be zero, then
10858 // the result of the ctlz is "32" and the result of the shift is one.
10859 APInt UnknownBits = ~Known.Zero;
10860 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10861
10862 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10863 if (UnknownBits.isPowerOf2()) {
10864 // Okay, we know that only that the single bit specified by UnknownBits
10865 // could be set on input to the CTLZ node. If this bit is set, the SRL
10866 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10867 // to an SRL/XOR pair, which is likely to simplify more.
10868 unsigned ShAmt = UnknownBits.countr_zero();
10869 SDValue Op = N0.getOperand(0);
10870
10871 if (ShAmt) {
10872 SDLoc DL(N0);
10873 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10874 DAG.getShiftAmountConstant(ShAmt, VT, DL));
10875 AddToWorklist(Op.getNode());
10876 }
10877 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10878 }
10879 }
10880
10881 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10882 if (N1.getOpcode() == ISD::TRUNCATE &&
10883 N1.getOperand(0).getOpcode() == ISD::AND) {
10884 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10885 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10886 }
10887
10888 // fold operands of srl based on knowledge that the low bits are not
10889 // demanded.
10891 return SDValue(N, 0);
10892
10893 if (N1C && !N1C->isOpaque())
10894 if (SDValue NewSRL = visitShiftByConstant(N))
10895 return NewSRL;
10896
10897 // Attempt to convert a srl of a load into a narrower zero-extending load.
10898 if (SDValue NarrowLoad = reduceLoadWidth(N))
10899 return NarrowLoad;
10900
10901 // Here is a common situation. We want to optimize:
10902 //
10903 // %a = ...
10904 // %b = and i32 %a, 2
10905 // %c = srl i32 %b, 1
10906 // brcond i32 %c ...
10907 //
10908 // into
10909 //
10910 // %a = ...
10911 // %b = and %a, 2
10912 // %c = setcc eq %b, 0
10913 // brcond %c ...
10914 //
10915 // However when after the source operand of SRL is optimized into AND, the SRL
10916 // itself may not be optimized further. Look for it and add the BRCOND into
10917 // the worklist.
10918 //
10919 // The also tends to happen for binary operations when SimplifyDemandedBits
10920 // is involved.
10921 //
10922 // FIXME: This is unecessary if we process the DAG in topological order,
10923 // which we plan to do. This workaround can be removed once the DAG is
10924 // processed in topological order.
10925 if (N->hasOneUse()) {
10926 SDNode *User = *N->user_begin();
10927
10928 // Look pass the truncate.
10929 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
10930 User = *User->user_begin();
10931
10932 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
10933 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
10934 AddToWorklist(User);
10935 }
10936
10937 // Try to transform this shift into a multiply-high if
10938 // it matches the appropriate pattern detected in combineShiftToMULH.
10939 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10940 return MULH;
10941
10942 if (SDValue AVG = foldShiftToAvg(N))
10943 return AVG;
10944
10945 return SDValue();
10946}
10947
10948SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10949 EVT VT = N->getValueType(0);
10950 SDValue N0 = N->getOperand(0);
10951 SDValue N1 = N->getOperand(1);
10952 SDValue N2 = N->getOperand(2);
10953 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10954 unsigned BitWidth = VT.getScalarSizeInBits();
10955 SDLoc DL(N);
10956
10957 // fold (fshl N0, N1, 0) -> N0
10958 // fold (fshr N0, N1, 0) -> N1
10960 if (DAG.MaskedValueIsZero(
10961 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10962 return IsFSHL ? N0 : N1;
10963
10964 auto IsUndefOrZero = [](SDValue V) {
10965 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10966 };
10967
10968 // TODO - support non-uniform vector shift amounts.
10969 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10970 EVT ShAmtTy = N2.getValueType();
10971
10972 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10973 if (Cst->getAPIntValue().uge(BitWidth)) {
10974 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10975 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10976 DAG.getConstant(RotAmt, DL, ShAmtTy));
10977 }
10978
10979 unsigned ShAmt = Cst->getZExtValue();
10980 if (ShAmt == 0)
10981 return IsFSHL ? N0 : N1;
10982
10983 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10984 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10985 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10986 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10987 if (IsUndefOrZero(N0))
10988 return DAG.getNode(
10989 ISD::SRL, DL, VT, N1,
10990 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10991 if (IsUndefOrZero(N1))
10992 return DAG.getNode(
10993 ISD::SHL, DL, VT, N0,
10994 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10995
10996 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10997 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10998 // TODO - bigendian support once we have test coverage.
10999 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11000 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11001 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11002 !DAG.getDataLayout().isBigEndian()) {
11003 auto *LHS = dyn_cast<LoadSDNode>(N0);
11004 auto *RHS = dyn_cast<LoadSDNode>(N1);
11005 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11006 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11007 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
11008 ISD::isNON_EXTLoad(LHS)) {
11009 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11010 SDLoc DL(RHS);
11011 uint64_t PtrOff =
11012 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11013 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11014 unsigned Fast = 0;
11015 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11016 RHS->getAddressSpace(), NewAlign,
11017 RHS->getMemOperand()->getFlags(), &Fast) &&
11018 Fast) {
11019 SDValue NewPtr = DAG.getMemBasePlusOffset(
11020 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11021 AddToWorklist(NewPtr.getNode());
11022 SDValue Load = DAG.getLoad(
11023 VT, DL, RHS->getChain(), NewPtr,
11024 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11025 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11026 // Replace the old load's chain with the new load's chain.
11027 WorklistRemover DeadNodes(*this);
11028 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
11029 return Load;
11030 }
11031 }
11032 }
11033 }
11034 }
11035
11036 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11037 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11038 // iff We know the shift amount is in range.
11039 // TODO: when is it worth doing SUB(BW, N2) as well?
11040 if (isPowerOf2_32(BitWidth)) {
11041 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11042 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11043 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11044 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11045 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11046 }
11047
11048 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11049 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11050 // TODO: Investigate flipping this rotate if only one is legal.
11051 // If funnel shift is legal as well we might be better off avoiding
11052 // non-constant (BW - N2).
11053 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11054 if (N0 == N1 && hasOperation(RotOpc, VT))
11055 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11056
11057 // Simplify, based on bits shifted out of N0/N1.
11059 return SDValue(N, 0);
11060
11061 return SDValue();
11062}
11063
11064SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11065 SDValue N0 = N->getOperand(0);
11066 SDValue N1 = N->getOperand(1);
11067 if (SDValue V = DAG.simplifyShift(N0, N1))
11068 return V;
11069
11070 SDLoc DL(N);
11071 EVT VT = N0.getValueType();
11072
11073 // fold (*shlsat c1, c2) -> c1<<c2
11074 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11075 return C;
11076
11078
11079 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11080 // fold (sshlsat x, c) -> (shl x, c)
11081 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11082 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11083 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11084
11085 // fold (ushlsat x, c) -> (shl x, c)
11086 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11087 N1C->getAPIntValue().ule(
11089 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11090 }
11091
11092 return SDValue();
11093}
11094
11095// Given a ABS node, detect the following patterns:
11096// (ABS (SUB (EXTEND a), (EXTEND b))).
11097// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11098// Generates UABD/SABD instruction.
11099SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11100 EVT SrcVT = N->getValueType(0);
11101
11102 if (N->getOpcode() == ISD::TRUNCATE)
11103 N = N->getOperand(0).getNode();
11104
11105 if (N->getOpcode() != ISD::ABS)
11106 return SDValue();
11107
11108 EVT VT = N->getValueType(0);
11109 SDValue AbsOp1 = N->getOperand(0);
11110 SDValue Op0, Op1;
11111
11112 if (AbsOp1.getOpcode() != ISD::SUB)
11113 return SDValue();
11114
11115 Op0 = AbsOp1.getOperand(0);
11116 Op1 = AbsOp1.getOperand(1);
11117
11118 unsigned Opc0 = Op0.getOpcode();
11119
11120 // Check if the operands of the sub are (zero|sign)-extended.
11121 // TODO: Should we use ValueTracking instead?
11122 if (Opc0 != Op1.getOpcode() ||
11123 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11124 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11125 // fold (abs (sub nsw x, y)) -> abds(x, y)
11126 // Don't fold this for unsupported types as we lose the NSW handling.
11127 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11128 TLI.preferABDSToABSWithNSW(VT)) {
11129 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11130 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11131 }
11132 return SDValue();
11133 }
11134
11135 EVT VT0, VT1;
11136 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11137 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11138 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11139 } else {
11140 VT0 = Op0.getOperand(0).getValueType();
11141 VT1 = Op1.getOperand(0).getValueType();
11142 }
11143 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11144
11145 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11146 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11147 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11148 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11149 (VT1 == MaxVT || Op1->hasOneUse()) &&
11150 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11151 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11152 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11153 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11154 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11155 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11156 }
11157
11158 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11159 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11160 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11161 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11162 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11163 }
11164
11165 return SDValue();
11166}
11167
11168SDValue DAGCombiner::visitABS(SDNode *N) {
11169 SDValue N0 = N->getOperand(0);
11170 EVT VT = N->getValueType(0);
11171 SDLoc DL(N);
11172
11173 // fold (abs c1) -> c2
11174 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11175 return C;
11176 // fold (abs (abs x)) -> (abs x)
11177 if (N0.getOpcode() == ISD::ABS)
11178 return N0;
11179 // fold (abs x) -> x iff not-negative
11180 if (DAG.SignBitIsZero(N0))
11181 return N0;
11182
11183 if (SDValue ABD = foldABSToABD(N, DL))
11184 return ABD;
11185
11186 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11187 // iff zero_extend/truncate are free.
11188 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11189 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11190 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11191 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11192 hasOperation(ISD::ABS, ExtVT)) {
11193 return DAG.getNode(
11194 ISD::ZERO_EXTEND, DL, VT,
11195 DAG.getNode(ISD::ABS, DL, ExtVT,
11196 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11197 }
11198 }
11199
11200 return SDValue();
11201}
11202
11203SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11204 SDValue N0 = N->getOperand(0);
11205 EVT VT = N->getValueType(0);
11206 SDLoc DL(N);
11207
11208 // fold (bswap c1) -> c2
11209 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11210 return C;
11211 // fold (bswap (bswap x)) -> x
11212 if (N0.getOpcode() == ISD::BSWAP)
11213 return N0.getOperand(0);
11214
11215 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11216 // isn't supported, it will be expanded to bswap followed by a manual reversal
11217 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11218 // the two bswaps if the bitreverse gets expanded.
11219 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11220 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11221 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11222 }
11223
11224 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11225 // iff x >= bw/2 (i.e. lower half is known zero)
11226 unsigned BW = VT.getScalarSizeInBits();
11227 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11228 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11229 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11230 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11231 ShAmt->getZExtValue() >= (BW / 2) &&
11232 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11233 TLI.isTruncateFree(VT, HalfVT) &&
11234 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11235 SDValue Res = N0.getOperand(0);
11236 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11237 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11238 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11239 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11240 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11241 return DAG.getZExtOrTrunc(Res, DL, VT);
11242 }
11243 }
11244
11245 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11246 // inverse-shift-of-bswap:
11247 // bswap (X u<< C) --> (bswap X) u>> C
11248 // bswap (X u>> C) --> (bswap X) u<< C
11249 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11250 N0.hasOneUse()) {
11251 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11252 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11253 ShAmt->getZExtValue() % 8 == 0) {
11254 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11255 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11256 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11257 }
11258 }
11259
11260 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11261 return V;
11262
11263 return SDValue();
11264}
11265
11266SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11267 SDValue N0 = N->getOperand(0);
11268 EVT VT = N->getValueType(0);
11269 SDLoc DL(N);
11270
11271 // fold (bitreverse c1) -> c2
11272 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11273 return C;
11274
11275 // fold (bitreverse (bitreverse x)) -> x
11276 if (N0.getOpcode() == ISD::BITREVERSE)
11277 return N0.getOperand(0);
11278
11279 SDValue X, Y;
11280
11281 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11282 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11284 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11285
11286 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11287 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11289 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11290
11291 return SDValue();
11292}
11293
11294SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11295 SDValue N0 = N->getOperand(0);
11296 EVT VT = N->getValueType(0);
11297 SDLoc DL(N);
11298
11299 // fold (ctlz c1) -> c2
11300 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11301 return C;
11302
11303 // If the value is known never to be zero, switch to the undef version.
11304 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11305 if (DAG.isKnownNeverZero(N0))
11306 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11307
11308 return SDValue();
11309}
11310
11311SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11312 SDValue N0 = N->getOperand(0);
11313 EVT VT = N->getValueType(0);
11314 SDLoc DL(N);
11315
11316 // fold (ctlz_zero_undef c1) -> c2
11317 if (SDValue C =
11319 return C;
11320 return SDValue();
11321}
11322
11323SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11324 SDValue N0 = N->getOperand(0);
11325 EVT VT = N->getValueType(0);
11326 SDLoc DL(N);
11327
11328 // fold (cttz c1) -> c2
11329 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11330 return C;
11331
11332 // If the value is known never to be zero, switch to the undef version.
11333 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11334 if (DAG.isKnownNeverZero(N0))
11335 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11336
11337 return SDValue();
11338}
11339
11340SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11341 SDValue N0 = N->getOperand(0);
11342 EVT VT = N->getValueType(0);
11343 SDLoc DL(N);
11344
11345 // fold (cttz_zero_undef c1) -> c2
11346 if (SDValue C =
11348 return C;
11349 return SDValue();
11350}
11351
11352SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11353 SDValue N0 = N->getOperand(0);
11354 EVT VT = N->getValueType(0);
11355 unsigned NumBits = VT.getScalarSizeInBits();
11356 SDLoc DL(N);
11357
11358 // fold (ctpop c1) -> c2
11359 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11360 return C;
11361
11362 // If the source is being shifted, but doesn't affect any active bits,
11363 // then we can call CTPOP on the shift source directly.
11364 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11365 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11366 const APInt &Amt = AmtC->getAPIntValue();
11367 if (Amt.ult(NumBits)) {
11368 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11369 if ((N0.getOpcode() == ISD::SRL &&
11370 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11371 (N0.getOpcode() == ISD::SHL &&
11372 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11373 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11374 }
11375 }
11376 }
11377 }
11378
11379 // If the upper bits are known to be zero, then see if its profitable to
11380 // only count the lower bits.
11381 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11382 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11383 if (hasOperation(ISD::CTPOP, HalfVT) &&
11384 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11385 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11386 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11387 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11388 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11389 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11390 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11391 }
11392 }
11393 }
11394
11395 return SDValue();
11396}
11397
11399 SDValue RHS, const SDNodeFlags Flags,
11400 const TargetLowering &TLI) {
11401 EVT VT = LHS.getValueType();
11402 if (!VT.isFloatingPoint())
11403 return false;
11404
11405 const TargetOptions &Options = DAG.getTarget().Options;
11406
11407 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11409 (Flags.hasNoNaNs() ||
11410 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11411}
11412
11414 SDValue RHS, SDValue True, SDValue False,
11416 const TargetLowering &TLI,
11417 SelectionDAG &DAG) {
11418 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11419 switch (CC) {
11420 case ISD::SETOLT:
11421 case ISD::SETOLE:
11422 case ISD::SETLT:
11423 case ISD::SETLE:
11424 case ISD::SETULT:
11425 case ISD::SETULE: {
11426 // Since it's known never nan to get here already, either fminnum or
11427 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11428 // expanded in terms of it.
11429 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11430 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11431 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11432
11433 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11434 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11435 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11436 return SDValue();
11437 }
11438 case ISD::SETOGT:
11439 case ISD::SETOGE:
11440 case ISD::SETGT:
11441 case ISD::SETGE:
11442 case ISD::SETUGT:
11443 case ISD::SETUGE: {
11444 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11445 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11446 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11447
11448 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11449 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11450 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11451 return SDValue();
11452 }
11453 default:
11454 return SDValue();
11455 }
11456}
11457
11458SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
11459 const unsigned Opcode = N->getOpcode();
11460
11461 // Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11462 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11463 return SDValue();
11464
11465 unsigned FloorISD = 0;
11466 auto VT = N->getValueType(0);
11467 bool IsUnsigned = false;
11468
11469 // Decide wether signed or unsigned.
11470 switch (Opcode) {
11471 case ISD::SRA:
11472 if (!hasOperation(ISD::AVGFLOORS, VT))
11473 return SDValue();
11474 FloorISD = ISD::AVGFLOORS;
11475 break;
11476 case ISD::SRL:
11477 IsUnsigned = true;
11478 if (!hasOperation(ISD::AVGFLOORU, VT))
11479 return SDValue();
11480 FloorISD = ISD::AVGFLOORU;
11481 break;
11482 default:
11483 return SDValue();
11484 }
11485
11486 // Captured values.
11487 SDValue A, B, Add;
11488
11489 // Match floor average as it is common to both floor/ceil avgs.
11490 if (!sd_match(N, m_BinOp(Opcode,
11492 m_One())))
11493 return SDValue();
11494
11495 // Can't optimize adds that may wrap.
11496 if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap())
11497 return SDValue();
11498
11499 if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())
11500 return SDValue();
11501
11502 return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
11503}
11504
11505/// Generate Min/Max node
11506SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11507 SDValue RHS, SDValue True,
11508 SDValue False, ISD::CondCode CC) {
11509 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11510 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11511
11512 // If we can't directly match this, try to see if we can pull an fneg out of
11513 // the select.
11515 True, DAG, LegalOperations, ForCodeSize);
11516 if (!NegTrue)
11517 return SDValue();
11518
11519 HandleSDNode NegTrueHandle(NegTrue);
11520
11521 // Try to unfold an fneg from the select if we are comparing the negated
11522 // constant.
11523 //
11524 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11525 //
11526 // TODO: Handle fabs
11527 if (LHS == NegTrue) {
11528 // If we can't directly match this, try to see if we can pull an fneg out of
11529 // the select.
11531 RHS, DAG, LegalOperations, ForCodeSize);
11532 if (NegRHS) {
11533 HandleSDNode NegRHSHandle(NegRHS);
11534 if (NegRHS == False) {
11535 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11536 False, CC, TLI, DAG);
11537 if (Combined)
11538 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11539 }
11540 }
11541 }
11542
11543 return SDValue();
11544}
11545
11546/// If a (v)select has a condition value that is a sign-bit test, try to smear
11547/// the condition operand sign-bit across the value width and use it as a mask.
11549 SelectionDAG &DAG) {
11550 SDValue Cond = N->getOperand(0);
11551 SDValue C1 = N->getOperand(1);
11552 SDValue C2 = N->getOperand(2);
11554 return SDValue();
11555
11556 EVT VT = N->getValueType(0);
11557 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11558 VT != Cond.getOperand(0).getValueType())
11559 return SDValue();
11560
11561 // The inverted-condition + commuted-select variants of these patterns are
11562 // canonicalized to these forms in IR.
11563 SDValue X = Cond.getOperand(0);
11564 SDValue CondC = Cond.getOperand(1);
11565 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11566 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11568 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11569 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11570 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11571 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11572 }
11573 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11574 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11575 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11576 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11577 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11578 }
11579 return SDValue();
11580}
11581
11583 const TargetLowering &TLI) {
11584 if (!TLI.convertSelectOfConstantsToMath(VT))
11585 return false;
11586
11587 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11588 return true;
11590 return true;
11591
11592 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11593 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11594 return true;
11595 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11596 return true;
11597
11598 return false;
11599}
11600
11601SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11602 SDValue Cond = N->getOperand(0);
11603 SDValue N1 = N->getOperand(1);
11604 SDValue N2 = N->getOperand(2);
11605 EVT VT = N->getValueType(0);
11606 EVT CondVT = Cond.getValueType();
11607 SDLoc DL(N);
11608
11609 if (!VT.isInteger())
11610 return SDValue();
11611
11612 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11613 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11614 if (!C1 || !C2)
11615 return SDValue();
11616
11617 if (CondVT != MVT::i1 || LegalOperations) {
11618 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11619 // We can't do this reliably if integer based booleans have different contents
11620 // to floating point based booleans. This is because we can't tell whether we
11621 // have an integer-based boolean or a floating-point-based boolean unless we
11622 // can find the SETCC that produced it and inspect its operands. This is
11623 // fairly easy if C is the SETCC node, but it can potentially be
11624 // undiscoverable (or not reasonably discoverable). For example, it could be
11625 // in another basic block or it could require searching a complicated
11626 // expression.
11627 if (CondVT.isInteger() &&
11628 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11630 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11632 C1->isZero() && C2->isOne()) {
11633 SDValue NotCond =
11634 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11635 if (VT.bitsEq(CondVT))
11636 return NotCond;
11637 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11638 }
11639
11640 return SDValue();
11641 }
11642
11643 // Only do this before legalization to avoid conflicting with target-specific
11644 // transforms in the other direction (create a select from a zext/sext). There
11645 // is also a target-independent combine here in DAGCombiner in the other
11646 // direction for (select Cond, -1, 0) when the condition is not i1.
11647 assert(CondVT == MVT::i1 && !LegalOperations);
11648
11649 // select Cond, 1, 0 --> zext (Cond)
11650 if (C1->isOne() && C2->isZero())
11651 return DAG.getZExtOrTrunc(Cond, DL, VT);
11652
11653 // select Cond, -1, 0 --> sext (Cond)
11654 if (C1->isAllOnes() && C2->isZero())
11655 return DAG.getSExtOrTrunc(Cond, DL, VT);
11656
11657 // select Cond, 0, 1 --> zext (!Cond)
11658 if (C1->isZero() && C2->isOne()) {
11659 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11660 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11661 return NotCond;
11662 }
11663
11664 // select Cond, 0, -1 --> sext (!Cond)
11665 if (C1->isZero() && C2->isAllOnes()) {
11666 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11667 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11668 return NotCond;
11669 }
11670
11671 // Use a target hook because some targets may prefer to transform in the
11672 // other direction.
11674 return SDValue();
11675
11676 // For any constants that differ by 1, we can transform the select into
11677 // an extend and add.
11678 const APInt &C1Val = C1->getAPIntValue();
11679 const APInt &C2Val = C2->getAPIntValue();
11680
11681 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11682 if (C1Val - 1 == C2Val) {
11683 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11684 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11685 }
11686
11687 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11688 if (C1Val + 1 == C2Val) {
11689 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11690 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11691 }
11692
11693 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11694 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11695 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11696 SDValue ShAmtC =
11697 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11698 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11699 }
11700
11701 // select Cond, -1, C --> or (sext Cond), C
11702 if (C1->isAllOnes()) {
11703 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11704 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11705 }
11706
11707 // select Cond, C, -1 --> or (sext (not Cond)), C
11708 if (C2->isAllOnes()) {
11709 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11710 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11711 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11712 }
11713
11715 return V;
11716
11717 return SDValue();
11718}
11719
11720template <class MatchContextClass>
11722 SelectionDAG &DAG) {
11723 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11724 N->getOpcode() == ISD::VP_SELECT) &&
11725 "Expected a (v)(vp.)select");
11726 SDValue Cond = N->getOperand(0);
11727 SDValue T = N->getOperand(1), F = N->getOperand(2);
11728 EVT VT = N->getValueType(0);
11729 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11730 MatchContextClass matcher(DAG, TLI, N);
11731
11732 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11733 return SDValue();
11734
11735 // select Cond, Cond, F --> or Cond, freeze(F)
11736 // select Cond, 1, F --> or Cond, freeze(F)
11737 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11738 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
11739
11740 // select Cond, T, Cond --> and Cond, freeze(T)
11741 // select Cond, T, 0 --> and Cond, freeze(T)
11742 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11743 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
11744
11745 // select Cond, T, 1 --> or (not Cond), freeze(T)
11746 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11747 SDValue NotCond =
11748 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11749 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
11750 }
11751
11752 // select Cond, 0, F --> and (not Cond), freeze(F)
11753 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11754 SDValue NotCond =
11755 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11756 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
11757 }
11758
11759 return SDValue();
11760}
11761
11763 SDValue N0 = N->getOperand(0);
11764 SDValue N1 = N->getOperand(1);
11765 SDValue N2 = N->getOperand(2);
11766 EVT VT = N->getValueType(0);
11767 unsigned EltSizeInBits = VT.getScalarSizeInBits();
11768
11769 SDValue Cond0, Cond1;
11771 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
11772 m_CondCode(CC)))) ||
11773 VT != Cond0.getValueType())
11774 return SDValue();
11775
11776 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11777 // compare is inverted from that pattern ("Cond0 s> -1").
11778 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11779 ; // This is the pattern we are looking for.
11780 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11781 std::swap(N1, N2);
11782 else
11783 return SDValue();
11784
11785 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11786 if (isNullOrNullSplat(N2)) {
11787 SDLoc DL(N);
11788 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11789 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11790 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
11791 }
11792
11793 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11794 if (isAllOnesOrAllOnesSplat(N1)) {
11795 SDLoc DL(N);
11796 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11797 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11798 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
11799 }
11800
11801 // If we have to invert the sign bit mask, only do that transform if the
11802 // target has a bitwise 'and not' instruction (the invert is free).
11803 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11804 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11805 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11806 SDLoc DL(N);
11807 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11808 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11809 SDValue Not = DAG.getNOT(DL, Sra, VT);
11810 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
11811 }
11812
11813 // TODO: There's another pattern in this family, but it may require
11814 // implementing hasOrNot() to check for profitability:
11815 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11816
11817 return SDValue();
11818}
11819
11820// Match SELECTs with absolute difference patterns.
11821// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
11822// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
11823// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
11824// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
11825SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
11826 SDValue False, ISD::CondCode CC,
11827 const SDLoc &DL) {
11828 bool IsSigned = isSignedIntSetCC(CC);
11829 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
11830 EVT VT = LHS.getValueType();
11831
11832 if (LegalOperations && !hasOperation(ABDOpc, VT))
11833 return SDValue();
11834
11835 switch (CC) {
11836 case ISD::SETGT:
11837 case ISD::SETGE:
11838 case ISD::SETUGT:
11839 case ISD::SETUGE:
11840 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11841 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
11842 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11843 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11844 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11845 hasOperation(ABDOpc, VT))
11846 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11847 break;
11848 case ISD::SETLT:
11849 case ISD::SETLE:
11850 case ISD::SETULT:
11851 case ISD::SETULE:
11852 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11853 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
11854 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11855 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11856 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11857 hasOperation(ABDOpc, VT))
11858 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11859 break;
11860 default:
11861 break;
11862 }
11863
11864 return SDValue();
11865}
11866
11867SDValue DAGCombiner::visitSELECT(SDNode *N) {
11868 SDValue N0 = N->getOperand(0);
11869 SDValue N1 = N->getOperand(1);
11870 SDValue N2 = N->getOperand(2);
11871 EVT VT = N->getValueType(0);
11872 EVT VT0 = N0.getValueType();
11873 SDLoc DL(N);
11874 SDNodeFlags Flags = N->getFlags();
11875
11876 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11877 return V;
11878
11879 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
11880 return V;
11881
11882 // select (not Cond), N1, N2 -> select Cond, N2, N1
11883 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11884 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11885 SelectOp->setFlags(Flags);
11886 return SelectOp;
11887 }
11888
11889 if (SDValue V = foldSelectOfConstants(N))
11890 return V;
11891
11892 // If we can fold this based on the true/false value, do so.
11893 if (SimplifySelectOps(N, N1, N2))
11894 return SDValue(N, 0); // Don't revisit N.
11895
11896 if (VT0 == MVT::i1) {
11897 // The code in this block deals with the following 2 equivalences:
11898 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11899 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11900 // The target can specify its preferred form with the
11901 // shouldNormalizeToSelectSequence() callback. However we always transform
11902 // to the right anyway if we find the inner select exists in the DAG anyway
11903 // and we always transform to the left side if we know that we can further
11904 // optimize the combination of the conditions.
11905 bool normalizeToSequence =
11907 // select (and Cond0, Cond1), X, Y
11908 // -> select Cond0, (select Cond1, X, Y), Y
11909 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11910 SDValue Cond0 = N0->getOperand(0);
11911 SDValue Cond1 = N0->getOperand(1);
11912 SDValue InnerSelect =
11913 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11914 if (normalizeToSequence || !InnerSelect.use_empty())
11915 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11916 InnerSelect, N2, Flags);
11917 // Cleanup on failure.
11918 if (InnerSelect.use_empty())
11919 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11920 }
11921 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11922 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11923 SDValue Cond0 = N0->getOperand(0);
11924 SDValue Cond1 = N0->getOperand(1);
11925 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11926 Cond1, N1, N2, Flags);
11927 if (normalizeToSequence || !InnerSelect.use_empty())
11928 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11929 InnerSelect, Flags);
11930 // Cleanup on failure.
11931 if (InnerSelect.use_empty())
11932 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11933 }
11934
11935 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11936 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11937 SDValue N1_0 = N1->getOperand(0);
11938 SDValue N1_1 = N1->getOperand(1);
11939 SDValue N1_2 = N1->getOperand(2);
11940 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11941 // Create the actual and node if we can generate good code for it.
11942 if (!normalizeToSequence) {
11943 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11944 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11945 N2, Flags);
11946 }
11947 // Otherwise see if we can optimize the "and" to a better pattern.
11948 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11949 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11950 N2, Flags);
11951 }
11952 }
11953 }
11954 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11955 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11956 SDValue N2_0 = N2->getOperand(0);
11957 SDValue N2_1 = N2->getOperand(1);
11958 SDValue N2_2 = N2->getOperand(2);
11959 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11960 // Create the actual or node if we can generate good code for it.
11961 if (!normalizeToSequence) {
11962 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11963 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11964 N2_2, Flags);
11965 }
11966 // Otherwise see if we can optimize to a better pattern.
11967 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11968 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11969 N2_2, Flags);
11970 }
11971 }
11972
11973 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
11974 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11975 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
11976 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11977 N2.getOperand(1) == N1.getOperand(0) &&
11978 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11979 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
11980
11981 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
11982 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11983 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
11984 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11985 N2.getOperand(1) == N1.getOperand(0) &&
11986 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11987 return DAG.getNegative(
11988 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
11989 DL, VT);
11990 }
11991
11992 // Fold selects based on a setcc into other things, such as min/max/abs.
11993 if (N0.getOpcode() == ISD::SETCC) {
11994 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11995 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11996
11997 // select (fcmp lt x, y), x, y -> fminnum x, y
11998 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11999 //
12000 // This is OK if we don't care what happens if either operand is a NaN.
12001 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12002 if (SDValue FMinMax =
12003 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12004 return FMinMax;
12005
12006 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12007 // This is conservatively limited to pre-legal-operations to give targets
12008 // a chance to reverse the transform if they want to do that. Also, it is
12009 // unlikely that the pattern would be formed late, so it's probably not
12010 // worth going through the other checks.
12011 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12012 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12013 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12014 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12015 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12016 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12017 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12018 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12019 //
12020 // The IR equivalent of this transform would have this form:
12021 // %a = add %x, C
12022 // %c = icmp ugt %x, ~C
12023 // %r = select %c, -1, %a
12024 // =>
12025 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12026 // %u0 = extractvalue %u, 0
12027 // %u1 = extractvalue %u, 1
12028 // %r = select %u1, -1, %u0
12029 SDVTList VTs = DAG.getVTList(VT, VT0);
12030 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12031 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12032 }
12033 }
12034
12035 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12036 (!LegalOperations &&
12038 // Any flags available in a select/setcc fold will be on the setcc as they
12039 // migrated from fcmp
12040 Flags = N0->getFlags();
12041 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
12042 N2, N0.getOperand(2));
12043 SelectNode->setFlags(Flags);
12044 return SelectNode;
12045 }
12046
12047 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12048 return ABD;
12049
12050 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12051 return NewSel;
12052 }
12053
12054 if (!VT.isVector())
12055 if (SDValue BinOp = foldSelectOfBinops(N))
12056 return BinOp;
12057
12058 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12059 return R;
12060
12061 return SDValue();
12062}
12063
12064// This function assumes all the vselect's arguments are CONCAT_VECTOR
12065// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12067 SDLoc DL(N);
12068 SDValue Cond = N->getOperand(0);
12069 SDValue LHS = N->getOperand(1);
12070 SDValue RHS = N->getOperand(2);
12071 EVT VT = N->getValueType(0);
12072 int NumElems = VT.getVectorNumElements();
12073 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12074 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12075 Cond.getOpcode() == ISD::BUILD_VECTOR);
12076
12077 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12078 // binary ones here.
12079 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12080 return SDValue();
12081
12082 // We're sure we have an even number of elements due to the
12083 // concat_vectors we have as arguments to vselect.
12084 // Skip BV elements until we find one that's not an UNDEF
12085 // After we find an UNDEF element, keep looping until we get to half the
12086 // length of the BV and see if all the non-undef nodes are the same.
12087 ConstantSDNode *BottomHalf = nullptr;
12088 for (int i = 0; i < NumElems / 2; ++i) {
12089 if (Cond->getOperand(i)->isUndef())
12090 continue;
12091
12092 if (BottomHalf == nullptr)
12093 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12094 else if (Cond->getOperand(i).getNode() != BottomHalf)
12095 return SDValue();
12096 }
12097
12098 // Do the same for the second half of the BuildVector
12099 ConstantSDNode *TopHalf = nullptr;
12100 for (int i = NumElems / 2; i < NumElems; ++i) {
12101 if (Cond->getOperand(i)->isUndef())
12102 continue;
12103
12104 if (TopHalf == nullptr)
12105 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12106 else if (Cond->getOperand(i).getNode() != TopHalf)
12107 return SDValue();
12108 }
12109
12110 assert(TopHalf && BottomHalf &&
12111 "One half of the selector was all UNDEFs and the other was all the "
12112 "same value. This should have been addressed before this function.");
12113 return DAG.getNode(
12115 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12116 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12117}
12118
12119bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12120 SelectionDAG &DAG, const SDLoc &DL) {
12121
12122 // Only perform the transformation when existing operands can be reused.
12123 if (IndexIsScaled)
12124 return false;
12125
12126 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12127 return false;
12128
12129 EVT VT = BasePtr.getValueType();
12130
12131 if (SDValue SplatVal = DAG.getSplatValue(Index);
12132 SplatVal && !isNullConstant(SplatVal) &&
12133 SplatVal.getValueType() == VT) {
12134 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12135 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12136 return true;
12137 }
12138
12139 if (Index.getOpcode() != ISD::ADD)
12140 return false;
12141
12142 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12143 SplatVal && SplatVal.getValueType() == VT) {
12144 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12145 Index = Index.getOperand(1);
12146 return true;
12147 }
12148 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12149 SplatVal && SplatVal.getValueType() == VT) {
12150 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12151 Index = Index.getOperand(0);
12152 return true;
12153 }
12154 return false;
12155}
12156
12157// Fold sext/zext of index into index type.
12158bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12159 SelectionDAG &DAG) {
12160 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12161
12162 // It's always safe to look through zero extends.
12163 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12164 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12165 IndexType = ISD::UNSIGNED_SCALED;
12166 Index = Index.getOperand(0);
12167 return true;
12168 }
12169 if (ISD::isIndexTypeSigned(IndexType)) {
12170 IndexType = ISD::UNSIGNED_SCALED;
12171 return true;
12172 }
12173 }
12174
12175 // It's only safe to look through sign extends when Index is signed.
12176 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12177 ISD::isIndexTypeSigned(IndexType) &&
12178 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12179 Index = Index.getOperand(0);
12180 return true;
12181 }
12182
12183 return false;
12184}
12185
12186SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12187 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12188 SDValue Mask = MSC->getMask();
12189 SDValue Chain = MSC->getChain();
12190 SDValue Index = MSC->getIndex();
12191 SDValue Scale = MSC->getScale();
12192 SDValue StoreVal = MSC->getValue();
12193 SDValue BasePtr = MSC->getBasePtr();
12194 SDValue VL = MSC->getVectorLength();
12195 ISD::MemIndexType IndexType = MSC->getIndexType();
12196 SDLoc DL(N);
12197
12198 // Zap scatters with a zero mask.
12200 return Chain;
12201
12202 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12203 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12204 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12205 DL, Ops, MSC->getMemOperand(), IndexType);
12206 }
12207
12208 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12209 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12210 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12211 DL, Ops, MSC->getMemOperand(), IndexType);
12212 }
12213
12214 return SDValue();
12215}
12216
12217SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12218 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12219 SDValue Mask = MSC->getMask();
12220 SDValue Chain = MSC->getChain();
12221 SDValue Index = MSC->getIndex();
12222 SDValue Scale = MSC->getScale();
12223 SDValue StoreVal = MSC->getValue();
12224 SDValue BasePtr = MSC->getBasePtr();
12225 ISD::MemIndexType IndexType = MSC->getIndexType();
12226 SDLoc DL(N);
12227
12228 // Zap scatters with a zero mask.
12230 return Chain;
12231
12232 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12233 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12234 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12235 DL, Ops, MSC->getMemOperand(), IndexType,
12236 MSC->isTruncatingStore());
12237 }
12238
12239 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12240 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12241 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12242 DL, Ops, MSC->getMemOperand(), IndexType,
12243 MSC->isTruncatingStore());
12244 }
12245
12246 return SDValue();
12247}
12248
12249SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12250 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12251 SDValue Mask = MST->getMask();
12252 SDValue Chain = MST->getChain();
12253 SDValue Value = MST->getValue();
12254 SDValue Ptr = MST->getBasePtr();
12255 SDLoc DL(N);
12256
12257 // Zap masked stores with a zero mask.
12259 return Chain;
12260
12261 // Remove a masked store if base pointers and masks are equal.
12262 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12263 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12264 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12265 !MST->getBasePtr().isUndef() &&
12266 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12267 MST1->getMemoryVT().getStoreSize()) ||
12269 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12270 MST->getMemoryVT().getStoreSize())) {
12271 CombineTo(MST1, MST1->getChain());
12272 if (N->getOpcode() != ISD::DELETED_NODE)
12273 AddToWorklist(N);
12274 return SDValue(N, 0);
12275 }
12276 }
12277
12278 // If this is a masked load with an all ones mask, we can use a unmasked load.
12279 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12280 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12281 !MST->isCompressingStore() && !MST->isTruncatingStore())
12282 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12283 MST->getBasePtr(), MST->getPointerInfo(),
12284 MST->getOriginalAlign(),
12285 MST->getMemOperand()->getFlags(), MST->getAAInfo());
12286
12287 // Try transforming N to an indexed store.
12288 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12289 return SDValue(N, 0);
12290
12291 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12292 Value.getValueType().isInteger() &&
12293 (!isa<ConstantSDNode>(Value) ||
12294 !cast<ConstantSDNode>(Value)->isOpaque())) {
12295 APInt TruncDemandedBits =
12296 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12298
12299 // See if we can simplify the operation with
12300 // SimplifyDemandedBits, which only works if the value has a single use.
12301 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12302 // Re-visit the store if anything changed and the store hasn't been merged
12303 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12304 // node back to the worklist if necessary, but we also need to re-visit
12305 // the Store node itself.
12306 if (N->getOpcode() != ISD::DELETED_NODE)
12307 AddToWorklist(N);
12308 return SDValue(N, 0);
12309 }
12310 }
12311
12312 // If this is a TRUNC followed by a masked store, fold this into a masked
12313 // truncating store. We can do this even if this is already a masked
12314 // truncstore.
12315 // TODO: Try combine to masked compress store if possiable.
12316 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12317 MST->isUnindexed() && !MST->isCompressingStore() &&
12318 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12319 MST->getMemoryVT(), LegalOperations)) {
12320 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12321 Value.getOperand(0).getValueType());
12322 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12323 MST->getOffset(), Mask, MST->getMemoryVT(),
12324 MST->getMemOperand(), MST->getAddressingMode(),
12325 /*IsTruncating=*/true);
12326 }
12327
12328 return SDValue();
12329}
12330
12331SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12332 auto *SST = cast<VPStridedStoreSDNode>(N);
12333 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12334 // Combine strided stores with unit-stride to a regular VP store.
12335 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12336 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12337 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12338 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12339 SST->getVectorLength(), SST->getMemoryVT(),
12340 SST->getMemOperand(), SST->getAddressingMode(),
12341 SST->isTruncatingStore(), SST->isCompressingStore());
12342 }
12343 return SDValue();
12344}
12345
12346SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12347 SDLoc DL(N);
12348 SDValue Vec = N->getOperand(0);
12349 SDValue Mask = N->getOperand(1);
12350 SDValue Passthru = N->getOperand(2);
12351 EVT VecVT = Vec.getValueType();
12352
12353 bool HasPassthru = !Passthru.isUndef();
12354
12355 APInt SplatVal;
12356 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12357 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12358
12359 if (Vec.isUndef() || Mask.isUndef())
12360 return Passthru;
12361
12362 // No need for potentially expensive compress if the mask is constant.
12365 EVT ScalarVT = VecVT.getVectorElementType();
12366 unsigned NumSelected = 0;
12367 unsigned NumElmts = VecVT.getVectorNumElements();
12368 for (unsigned I = 0; I < NumElmts; ++I) {
12369 SDValue MaskI = Mask.getOperand(I);
12370 // We treat undef mask entries as "false".
12371 if (MaskI.isUndef())
12372 continue;
12373
12374 if (TLI.isConstTrueVal(MaskI)) {
12375 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12376 DAG.getVectorIdxConstant(I, DL));
12377 Ops.push_back(VecI);
12378 NumSelected++;
12379 }
12380 }
12381 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12382 SDValue Val =
12383 HasPassthru
12384 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12385 DAG.getVectorIdxConstant(Rest, DL))
12386 : DAG.getUNDEF(ScalarVT);
12387 Ops.push_back(Val);
12388 }
12389 return DAG.getBuildVector(VecVT, DL, Ops);
12390 }
12391
12392 return SDValue();
12393}
12394
12395SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12396 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12397 SDValue Mask = MGT->getMask();
12398 SDValue Chain = MGT->getChain();
12399 SDValue Index = MGT->getIndex();
12400 SDValue Scale = MGT->getScale();
12401 SDValue BasePtr = MGT->getBasePtr();
12402 SDValue VL = MGT->getVectorLength();
12403 ISD::MemIndexType IndexType = MGT->getIndexType();
12404 SDLoc DL(N);
12405
12406 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12407 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12408 return DAG.getGatherVP(
12409 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12410 Ops, MGT->getMemOperand(), IndexType);
12411 }
12412
12413 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12414 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12415 return DAG.getGatherVP(
12416 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12417 Ops, MGT->getMemOperand(), IndexType);
12418 }
12419
12420 return SDValue();
12421}
12422
12423SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12424 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12425 SDValue Mask = MGT->getMask();
12426 SDValue Chain = MGT->getChain();
12427 SDValue Index = MGT->getIndex();
12428 SDValue Scale = MGT->getScale();
12429 SDValue PassThru = MGT->getPassThru();
12430 SDValue BasePtr = MGT->getBasePtr();
12431 ISD::MemIndexType IndexType = MGT->getIndexType();
12432 SDLoc DL(N);
12433
12434 // Zap gathers with a zero mask.
12436 return CombineTo(N, PassThru, MGT->getChain());
12437
12438 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12439 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12440 return DAG.getMaskedGather(
12441 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12442 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12443 }
12444
12445 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12446 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12447 return DAG.getMaskedGather(
12448 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12449 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12450 }
12451
12452 return SDValue();
12453}
12454
12455SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12456 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12457 SDValue Mask = MLD->getMask();
12458 SDLoc DL(N);
12459
12460 // Zap masked loads with a zero mask.
12462 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12463
12464 // If this is a masked load with an all ones mask, we can use a unmasked load.
12465 // FIXME: Can we do this for indexed, expanding, or extending loads?
12466 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12467 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12468 SDValue NewLd = DAG.getLoad(
12469 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12470 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12471 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12472 return CombineTo(N, NewLd, NewLd.getValue(1));
12473 }
12474
12475 // Try transforming N to an indexed load.
12476 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12477 return SDValue(N, 0);
12478
12479 return SDValue();
12480}
12481
12482SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12483 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12484 SDValue Chain = HG->getChain();
12485 SDValue Inc = HG->getInc();
12486 SDValue Mask = HG->getMask();
12487 SDValue BasePtr = HG->getBasePtr();
12488 SDValue Index = HG->getIndex();
12489 SDLoc DL(HG);
12490
12491 EVT MemVT = HG->getMemoryVT();
12492 MachineMemOperand *MMO = HG->getMemOperand();
12493 ISD::MemIndexType IndexType = HG->getIndexType();
12494
12496 return Chain;
12497
12498 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12499 HG->getScale(), HG->getIntID()};
12500 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
12501 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12502 MMO, IndexType);
12503
12504 EVT DataVT = Index.getValueType();
12505 if (refineIndexType(Index, IndexType, DataVT, DAG))
12506 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12507 MMO, IndexType);
12508 return SDValue();
12509}
12510
12511SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12512 auto *SLD = cast<VPStridedLoadSDNode>(N);
12513 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12514 // Combine strided loads with unit-stride to a regular VP load.
12515 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12516 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12517 SDValue NewLd = DAG.getLoadVP(
12518 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12519 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12520 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12521 SLD->getMemOperand(), SLD->isExpandingLoad());
12522 return CombineTo(N, NewLd, NewLd.getValue(1));
12523 }
12524 return SDValue();
12525}
12526
12527/// A vector select of 2 constant vectors can be simplified to math/logic to
12528/// avoid a variable select instruction and possibly avoid constant loads.
12529SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12530 SDValue Cond = N->getOperand(0);
12531 SDValue N1 = N->getOperand(1);
12532 SDValue N2 = N->getOperand(2);
12533 EVT VT = N->getValueType(0);
12534 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12538 return SDValue();
12539
12540 // Check if we can use the condition value to increment/decrement a single
12541 // constant value. This simplifies a select to an add and removes a constant
12542 // load/materialization from the general case.
12543 bool AllAddOne = true;
12544 bool AllSubOne = true;
12545 unsigned Elts = VT.getVectorNumElements();
12546 for (unsigned i = 0; i != Elts; ++i) {
12547 SDValue N1Elt = N1.getOperand(i);
12548 SDValue N2Elt = N2.getOperand(i);
12549 if (N1Elt.isUndef() || N2Elt.isUndef())
12550 continue;
12551 if (N1Elt.getValueType() != N2Elt.getValueType()) {
12552 AllAddOne = false;
12553 AllSubOne = false;
12554 break;
12555 }
12556
12557 const APInt &C1 = N1Elt->getAsAPIntVal();
12558 const APInt &C2 = N2Elt->getAsAPIntVal();
12559 if (C1 != C2 + 1)
12560 AllAddOne = false;
12561 if (C1 != C2 - 1)
12562 AllSubOne = false;
12563 }
12564
12565 // Further simplifications for the extra-special cases where the constants are
12566 // all 0 or all -1 should be implemented as folds of these patterns.
12567 SDLoc DL(N);
12568 if (AllAddOne || AllSubOne) {
12569 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12570 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12571 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12572 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12573 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12574 }
12575
12576 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12577 APInt Pow2C;
12578 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12579 isNullOrNullSplat(N2)) {
12580 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12581 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12582 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12583 }
12584
12586 return V;
12587
12588 // The general case for select-of-constants:
12589 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12590 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12591 // leave that to a machine-specific pass.
12592 return SDValue();
12593}
12594
12595SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12596 SDValue N0 = N->getOperand(0);
12597 SDValue N1 = N->getOperand(1);
12598 SDValue N2 = N->getOperand(2);
12599 SDLoc DL(N);
12600
12601 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12602 return V;
12603
12604 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
12605 return V;
12606
12607 return SDValue();
12608}
12609
12610SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12611 SDValue N0 = N->getOperand(0);
12612 SDValue N1 = N->getOperand(1);
12613 SDValue N2 = N->getOperand(2);
12614 EVT VT = N->getValueType(0);
12615 SDLoc DL(N);
12616
12617 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12618 return V;
12619
12620 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12621 return V;
12622
12623 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12624 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12625 return DAG.getSelect(DL, VT, F, N2, N1);
12626
12627 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12628 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12631 TLI.getBooleanContents(N0.getValueType()) ==
12633 return DAG.getNode(
12634 ISD::ADD, DL, N1.getValueType(), N2,
12635 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12636 }
12637
12638 // Canonicalize integer abs.
12639 // vselect (setg[te] X, 0), X, -X ->
12640 // vselect (setgt X, -1), X, -X ->
12641 // vselect (setl[te] X, 0), -X, X ->
12642 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12643 if (N0.getOpcode() == ISD::SETCC) {
12644 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12645 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12646 bool isAbs = false;
12647 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12648
12649 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12650 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12651 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12653 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12654 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12656
12657 if (isAbs) {
12659 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12660
12661 SDValue Shift = DAG.getNode(
12662 ISD::SRA, DL, VT, LHS,
12663 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12664 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12665 AddToWorklist(Shift.getNode());
12666 AddToWorklist(Add.getNode());
12667 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12668 }
12669
12670 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12671 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12672 //
12673 // This is OK if we don't care about what happens if either operand is a
12674 // NaN.
12675 //
12676 if (N0.hasOneUse() &&
12677 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12678 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12679 return FMinMax;
12680 }
12681
12682 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12683 return S;
12684 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12685 return S;
12686
12687 // If this select has a condition (setcc) with narrower operands than the
12688 // select, try to widen the compare to match the select width.
12689 // TODO: This should be extended to handle any constant.
12690 // TODO: This could be extended to handle non-loading patterns, but that
12691 // requires thorough testing to avoid regressions.
12692 if (isNullOrNullSplat(RHS)) {
12693 EVT NarrowVT = LHS.getValueType();
12695 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12696 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12697 unsigned WideWidth = WideVT.getScalarSizeInBits();
12698 bool IsSigned = isSignedIntSetCC(CC);
12699 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12700 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12701 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12702 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12703 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12704 // Both compare operands can be widened for free. The LHS can use an
12705 // extended load, and the RHS is a constant:
12706 // vselect (ext (setcc load(X), C)), N1, N2 -->
12707 // vselect (setcc extload(X), C'), N1, N2
12708 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12709 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12710 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12711 EVT WideSetCCVT = getSetCCResultType(WideVT);
12712 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12713 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12714 }
12715 }
12716
12717 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
12718 return ABD;
12719
12720 // Match VSELECTs into add with unsigned saturation.
12721 if (hasOperation(ISD::UADDSAT, VT)) {
12722 // Check if one of the arms of the VSELECT is vector with all bits set.
12723 // If it's on the left side invert the predicate to simplify logic below.
12724 SDValue Other;
12725 ISD::CondCode SatCC = CC;
12727 Other = N2;
12728 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12729 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12730 Other = N1;
12731 }
12732
12733 if (Other && Other.getOpcode() == ISD::ADD) {
12734 SDValue CondLHS = LHS, CondRHS = RHS;
12735 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12736
12737 // Canonicalize condition operands.
12738 if (SatCC == ISD::SETUGE) {
12739 std::swap(CondLHS, CondRHS);
12740 SatCC = ISD::SETULE;
12741 }
12742
12743 // We can test against either of the addition operands.
12744 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12745 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12746 if (SatCC == ISD::SETULE && Other == CondRHS &&
12747 (OpLHS == CondLHS || OpRHS == CondLHS))
12748 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12749
12750 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12751 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12752 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12753 CondLHS == OpLHS) {
12754 // If the RHS is a constant we have to reverse the const
12755 // canonicalization.
12756 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12757 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12758 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12759 };
12760 if (SatCC == ISD::SETULE &&
12761 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12762 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12763 }
12764 }
12765 }
12766
12767 // Match VSELECTs into sub with unsigned saturation.
12768 if (hasOperation(ISD::USUBSAT, VT)) {
12769 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12770 // the left side invert the predicate to simplify logic below.
12771 SDValue Other;
12772 ISD::CondCode SatCC = CC;
12774 Other = N2;
12775 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12777 Other = N1;
12778 }
12779
12780 // zext(x) >= y ? trunc(zext(x) - y) : 0
12781 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12782 // zext(x) > y ? trunc(zext(x) - y) : 0
12783 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12784 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12785 Other.getOperand(0).getOpcode() == ISD::SUB &&
12786 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12787 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12788 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12789 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12790 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12791 DAG, DL))
12792 return R;
12793 }
12794
12795 if (Other && Other.getNumOperands() == 2) {
12796 SDValue CondRHS = RHS;
12797 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12798
12799 if (OpLHS == LHS) {
12800 // Look for a general sub with unsigned saturation first.
12801 // x >= y ? x-y : 0 --> usubsat x, y
12802 // x > y ? x-y : 0 --> usubsat x, y
12803 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12804 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12805 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12806
12807 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12808 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12809 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12810 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12811 // If the RHS is a constant we have to reverse the const
12812 // canonicalization.
12813 // x > C-1 ? x+-C : 0 --> usubsat x, C
12814 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12815 return (!Op && !Cond) ||
12816 (Op && Cond &&
12817 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12818 };
12819 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12820 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12821 /*AllowUndefs*/ true)) {
12822 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12823 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12824 }
12825
12826 // Another special case: If C was a sign bit, the sub has been
12827 // canonicalized into a xor.
12828 // FIXME: Would it be better to use computeKnownBits to
12829 // determine whether it's safe to decanonicalize the xor?
12830 // x s< 0 ? x^C : 0 --> usubsat x, C
12831 APInt SplatValue;
12832 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12833 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12835 SplatValue.isSignMask()) {
12836 // Note that we have to rebuild the RHS constant here to
12837 // ensure we don't rely on particular values of undef lanes.
12838 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12839 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12840 }
12841 }
12842 }
12843 }
12844 }
12845 }
12846 }
12847
12848 if (SimplifySelectOps(N, N1, N2))
12849 return SDValue(N, 0); // Don't revisit N.
12850
12851 // Fold (vselect all_ones, N1, N2) -> N1
12853 return N1;
12854 // Fold (vselect all_zeros, N1, N2) -> N2
12856 return N2;
12857
12858 // The ConvertSelectToConcatVector function is assuming both the above
12859 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12860 // and addressed.
12861 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12864 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12865 return CV;
12866 }
12867
12868 if (SDValue V = foldVSelectOfConstants(N))
12869 return V;
12870
12871 if (hasOperation(ISD::SRA, VT))
12873 return V;
12874
12876 return SDValue(N, 0);
12877
12878 return SDValue();
12879}
12880
12881SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12882 SDValue N0 = N->getOperand(0);
12883 SDValue N1 = N->getOperand(1);
12884 SDValue N2 = N->getOperand(2);
12885 SDValue N3 = N->getOperand(3);
12886 SDValue N4 = N->getOperand(4);
12887 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12888 SDLoc DL(N);
12889
12890 // fold select_cc lhs, rhs, x, x, cc -> x
12891 if (N2 == N3)
12892 return N2;
12893
12894 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12895 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12896 isNullConstant(N1))
12897 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
12898
12899 // Determine if the condition we're dealing with is constant
12900 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12901 CC, DL, false)) {
12902 AddToWorklist(SCC.getNode());
12903
12904 // cond always true -> true val
12905 // cond always false -> false val
12906 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12907 return SCCC->isZero() ? N3 : N2;
12908
12909 // When the condition is UNDEF, just return the first operand. This is
12910 // coherent the DAG creation, no setcc node is created in this case
12911 if (SCC->isUndef())
12912 return N2;
12913
12914 // Fold to a simpler select_cc
12915 if (SCC.getOpcode() == ISD::SETCC) {
12916 SDValue SelectOp =
12917 DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0),
12918 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12919 SelectOp->setFlags(SCC->getFlags());
12920 return SelectOp;
12921 }
12922 }
12923
12924 // If we can fold this based on the true/false value, do so.
12925 if (SimplifySelectOps(N, N2, N3))
12926 return SDValue(N, 0); // Don't revisit N.
12927
12928 // fold select_cc into other things, such as min/max/abs
12929 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
12930}
12931
12932SDValue DAGCombiner::visitSETCC(SDNode *N) {
12933 // setcc is very commonly used as an argument to brcond. This pattern
12934 // also lend itself to numerous combines and, as a result, it is desired
12935 // we keep the argument to a brcond as a setcc as much as possible.
12936 bool PreferSetCC =
12937 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
12938
12939 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12940 EVT VT = N->getValueType(0);
12941 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12942 SDLoc DL(N);
12943
12944 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
12945 // If we prefer to have a setcc, and we don't, we'll try our best to
12946 // recreate one using rebuildSetCC.
12947 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12948 SDValue NewSetCC = rebuildSetCC(Combined);
12949
12950 // We don't have anything interesting to combine to.
12951 if (NewSetCC.getNode() == N)
12952 return SDValue();
12953
12954 if (NewSetCC)
12955 return NewSetCC;
12956 }
12957 return Combined;
12958 }
12959
12960 // Optimize
12961 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12962 // or
12963 // 2) (icmp eq/ne X, (rotate X, C1))
12964 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12965 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12966 // Then:
12967 // If C1 is a power of 2, then the rotate and shift+and versions are
12968 // equivilent, so we can interchange them depending on target preference.
12969 // Otherwise, if we have the shift+and version we can interchange srl/shl
12970 // which inturn affects the constant C0. We can use this to get better
12971 // constants again determined by target preference.
12972 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12973 auto IsAndWithShift = [](SDValue A, SDValue B) {
12974 return A.getOpcode() == ISD::AND &&
12975 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12976 A.getOperand(0) == B.getOperand(0);
12977 };
12978 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12979 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12980 B.getOperand(0) == A;
12981 };
12982 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12983 bool IsRotate = false;
12984
12985 // Find either shift+and or rotate pattern.
12986 if (IsAndWithShift(N0, N1)) {
12987 AndOrOp = N0;
12988 ShiftOrRotate = N1;
12989 } else if (IsAndWithShift(N1, N0)) {
12990 AndOrOp = N1;
12991 ShiftOrRotate = N0;
12992 } else if (IsRotateWithOp(N0, N1)) {
12993 IsRotate = true;
12994 AndOrOp = N0;
12995 ShiftOrRotate = N1;
12996 } else if (IsRotateWithOp(N1, N0)) {
12997 IsRotate = true;
12998 AndOrOp = N1;
12999 ShiftOrRotate = N0;
13000 }
13001
13002 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13003 (IsRotate || AndOrOp.hasOneUse())) {
13004 EVT OpVT = N0.getValueType();
13005 // Get constant shift/rotate amount and possibly mask (if its shift+and
13006 // variant).
13007 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13008 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13009 /*AllowTrunc*/ false);
13010 if (CNode == nullptr)
13011 return std::nullopt;
13012 return CNode->getAPIntValue();
13013 };
13014 std::optional<APInt> AndCMask =
13015 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13016 std::optional<APInt> ShiftCAmt =
13017 GetAPIntValue(ShiftOrRotate.getOperand(1));
13018 unsigned NumBits = OpVT.getScalarSizeInBits();
13019
13020 // We found constants.
13021 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13022 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13023 // Check that the constants meet the constraints.
13024 bool CanTransform = IsRotate;
13025 if (!CanTransform) {
13026 // Check that mask and shift compliment eachother
13027 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13028 // Check that we are comparing all bits
13029 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13030 // Check that the and mask is correct for the shift
13031 CanTransform &=
13032 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13033 }
13034
13035 // See if target prefers another shift/rotate opcode.
13036 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13037 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13038 // Transform is valid and we have a new preference.
13039 if (CanTransform && NewShiftOpc != ShiftOpc) {
13040 SDValue NewShiftOrRotate =
13041 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13042 ShiftOrRotate.getOperand(1));
13043 SDValue NewAndOrOp = SDValue();
13044
13045 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13046 APInt NewMask =
13047 NewShiftOpc == ISD::SHL
13048 ? APInt::getHighBitsSet(NumBits,
13049 NumBits - ShiftCAmt->getZExtValue())
13050 : APInt::getLowBitsSet(NumBits,
13051 NumBits - ShiftCAmt->getZExtValue());
13052 NewAndOrOp =
13053 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13054 DAG.getConstant(NewMask, DL, OpVT));
13055 } else {
13056 NewAndOrOp = ShiftOrRotate.getOperand(0);
13057 }
13058
13059 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13060 }
13061 }
13062 }
13063 }
13064 return SDValue();
13065}
13066
13067SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13068 SDValue LHS = N->getOperand(0);
13069 SDValue RHS = N->getOperand(1);
13070 SDValue Carry = N->getOperand(2);
13071 SDValue Cond = N->getOperand(3);
13072
13073 // If Carry is false, fold to a regular SETCC.
13074 if (isNullConstant(Carry))
13075 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13076
13077 return SDValue();
13078}
13079
13080/// Check if N satisfies:
13081/// N is used once.
13082/// N is a Load.
13083/// The load is compatible with ExtOpcode. It means
13084/// If load has explicit zero/sign extension, ExpOpcode must have the same
13085/// extension.
13086/// Otherwise returns true.
13087static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13088 if (!N.hasOneUse())
13089 return false;
13090
13091 if (!isa<LoadSDNode>(N))
13092 return false;
13093
13094 LoadSDNode *Load = cast<LoadSDNode>(N);
13095 ISD::LoadExtType LoadExt = Load->getExtensionType();
13096 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13097 return true;
13098
13099 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13100 // extension.
13101 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13102 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13103 return false;
13104
13105 return true;
13106}
13107
13108/// Fold
13109/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13110/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13111/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13112/// This function is called by the DAGCombiner when visiting sext/zext/aext
13113/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13115 SelectionDAG &DAG, const SDLoc &DL,
13116 CombineLevel Level) {
13117 unsigned Opcode = N->getOpcode();
13118 SDValue N0 = N->getOperand(0);
13119 EVT VT = N->getValueType(0);
13120 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13121 Opcode == ISD::ANY_EXTEND) &&
13122 "Expected EXTEND dag node in input!");
13123
13124 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13125 !N0.hasOneUse())
13126 return SDValue();
13127
13128 SDValue Op1 = N0->getOperand(1);
13129 SDValue Op2 = N0->getOperand(2);
13130 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13131 return SDValue();
13132
13133 auto ExtLoadOpcode = ISD::EXTLOAD;
13134 if (Opcode == ISD::SIGN_EXTEND)
13135 ExtLoadOpcode = ISD::SEXTLOAD;
13136 else if (Opcode == ISD::ZERO_EXTEND)
13137 ExtLoadOpcode = ISD::ZEXTLOAD;
13138
13139 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13140 // Combine2), so we should conservatively check the OperationAction.
13141 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13142 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13143 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13144 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13145 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13147 return SDValue();
13148
13149 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13150 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13151 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13152}
13153
13154/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13155/// a build_vector of constants.
13156/// This function is called by the DAGCombiner when visiting sext/zext/aext
13157/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13158/// Vector extends are not folded if operations are legal; this is to
13159/// avoid introducing illegal build_vector dag nodes.
13161 const TargetLowering &TLI,
13162 SelectionDAG &DAG, bool LegalTypes) {
13163 unsigned Opcode = N->getOpcode();
13164 SDValue N0 = N->getOperand(0);
13165 EVT VT = N->getValueType(0);
13166
13167 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13168 "Expected EXTEND dag node in input!");
13169
13170 // fold (sext c1) -> c1
13171 // fold (zext c1) -> c1
13172 // fold (aext c1) -> c1
13173 if (isa<ConstantSDNode>(N0))
13174 return DAG.getNode(Opcode, DL, VT, N0);
13175
13176 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13177 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13178 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13179 if (N0->getOpcode() == ISD::SELECT) {
13180 SDValue Op1 = N0->getOperand(1);
13181 SDValue Op2 = N0->getOperand(2);
13182 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13183 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13184 // For any_extend, choose sign extension of the constants to allow a
13185 // possible further transform to sign_extend_inreg.i.e.
13186 //
13187 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13188 // t2: i64 = any_extend t1
13189 // -->
13190 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13191 // -->
13192 // t4: i64 = sign_extend_inreg t3
13193 unsigned FoldOpc = Opcode;
13194 if (FoldOpc == ISD::ANY_EXTEND)
13195 FoldOpc = ISD::SIGN_EXTEND;
13196 return DAG.getSelect(DL, VT, N0->getOperand(0),
13197 DAG.getNode(FoldOpc, DL, VT, Op1),
13198 DAG.getNode(FoldOpc, DL, VT, Op2));
13199 }
13200 }
13201
13202 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13203 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13204 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13205 EVT SVT = VT.getScalarType();
13206 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13208 return SDValue();
13209
13210 // We can fold this node into a build_vector.
13211 unsigned VTBits = SVT.getSizeInBits();
13212 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13214 unsigned NumElts = VT.getVectorNumElements();
13215
13216 for (unsigned i = 0; i != NumElts; ++i) {
13217 SDValue Op = N0.getOperand(i);
13218 if (Op.isUndef()) {
13219 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13220 Elts.push_back(DAG.getUNDEF(SVT));
13221 else
13222 Elts.push_back(DAG.getConstant(0, DL, SVT));
13223 continue;
13224 }
13225
13226 SDLoc DL(Op);
13227 // Get the constant value and if needed trunc it to the size of the type.
13228 // Nodes like build_vector might have constants wider than the scalar type.
13229 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13230 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13231 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13232 else
13233 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13234 }
13235
13236 return DAG.getBuildVector(VT, DL, Elts);
13237}
13238
13239// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13240// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13241// transformation. Returns true if extension are possible and the above
13242// mentioned transformation is profitable.
13244 unsigned ExtOpc,
13245 SmallVectorImpl<SDNode *> &ExtendNodes,
13246 const TargetLowering &TLI) {
13247 bool HasCopyToRegUses = false;
13248 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13249 for (SDUse &Use : N0->uses()) {
13250 SDNode *User = Use.getUser();
13251 if (User == N)
13252 continue;
13253 if (Use.getResNo() != N0.getResNo())
13254 continue;
13255 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13256 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13257 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13258 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13259 // Sign bits will be lost after a zext.
13260 return false;
13261 bool Add = false;
13262 for (unsigned i = 0; i != 2; ++i) {
13263 SDValue UseOp = User->getOperand(i);
13264 if (UseOp == N0)
13265 continue;
13266 if (!isa<ConstantSDNode>(UseOp))
13267 return false;
13268 Add = true;
13269 }
13270 if (Add)
13271 ExtendNodes.push_back(User);
13272 continue;
13273 }
13274 // If truncates aren't free and there are users we can't
13275 // extend, it isn't worthwhile.
13276 if (!isTruncFree)
13277 return false;
13278 // Remember if this value is live-out.
13279 if (User->getOpcode() == ISD::CopyToReg)
13280 HasCopyToRegUses = true;
13281 }
13282
13283 if (HasCopyToRegUses) {
13284 bool BothLiveOut = false;
13285 for (SDUse &Use : N->uses()) {
13286 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13287 BothLiveOut = true;
13288 break;
13289 }
13290 }
13291 if (BothLiveOut)
13292 // Both unextended and extended values are live out. There had better be
13293 // a good reason for the transformation.
13294 return !ExtendNodes.empty();
13295 }
13296 return true;
13297}
13298
13299void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13300 SDValue OrigLoad, SDValue ExtLoad,
13301 ISD::NodeType ExtType) {
13302 // Extend SetCC uses if necessary.
13303 SDLoc DL(ExtLoad);
13304 for (SDNode *SetCC : SetCCs) {
13306
13307 for (unsigned j = 0; j != 2; ++j) {
13308 SDValue SOp = SetCC->getOperand(j);
13309 if (SOp == OrigLoad)
13310 Ops.push_back(ExtLoad);
13311 else
13312 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13313 }
13314
13315 Ops.push_back(SetCC->getOperand(2));
13316 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13317 }
13318}
13319
13320// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13321SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13322 SDValue N0 = N->getOperand(0);
13323 EVT DstVT = N->getValueType(0);
13324 EVT SrcVT = N0.getValueType();
13325
13326 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13327 N->getOpcode() == ISD::ZERO_EXTEND) &&
13328 "Unexpected node type (not an extend)!");
13329
13330 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13331 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13332 // (v8i32 (sext (v8i16 (load x))))
13333 // into:
13334 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13335 // (v4i32 (sextload (x + 16)))))
13336 // Where uses of the original load, i.e.:
13337 // (v8i16 (load x))
13338 // are replaced with:
13339 // (v8i16 (truncate
13340 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13341 // (v4i32 (sextload (x + 16)))))))
13342 //
13343 // This combine is only applicable to illegal, but splittable, vectors.
13344 // All legal types, and illegal non-vector types, are handled elsewhere.
13345 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13346 //
13347 if (N0->getOpcode() != ISD::LOAD)
13348 return SDValue();
13349
13350 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13351
13352 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13353 !N0.hasOneUse() || !LN0->isSimple() ||
13354 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13356 return SDValue();
13357
13359 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13360 return SDValue();
13361
13362 ISD::LoadExtType ExtType =
13363 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13364
13365 // Try to split the vector types to get down to legal types.
13366 EVT SplitSrcVT = SrcVT;
13367 EVT SplitDstVT = DstVT;
13368 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13369 SplitSrcVT.getVectorNumElements() > 1) {
13370 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13371 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13372 }
13373
13374 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13375 return SDValue();
13376
13377 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13378
13379 SDLoc DL(N);
13380 const unsigned NumSplits =
13381 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13382 const unsigned Stride = SplitSrcVT.getStoreSize();
13385
13386 SDValue BasePtr = LN0->getBasePtr();
13387 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13388 const unsigned Offset = Idx * Stride;
13389
13390 SDValue SplitLoad =
13391 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13392 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13393 SplitSrcVT, LN0->getOriginalAlign(),
13394 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13395
13396 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13397
13398 Loads.push_back(SplitLoad.getValue(0));
13399 Chains.push_back(SplitLoad.getValue(1));
13400 }
13401
13402 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13403 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13404
13405 // Simplify TF.
13406 AddToWorklist(NewChain.getNode());
13407
13408 CombineTo(N, NewValue);
13409
13410 // Replace uses of the original load (before extension)
13411 // with a truncate of the concatenated sextloaded vectors.
13412 SDValue Trunc =
13413 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13414 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13415 CombineTo(N0.getNode(), Trunc, NewChain);
13416 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13417}
13418
13419// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13420// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13421SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13422 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13423 EVT VT = N->getValueType(0);
13424 EVT OrigVT = N->getOperand(0).getValueType();
13425 if (TLI.isZExtFree(OrigVT, VT))
13426 return SDValue();
13427
13428 // and/or/xor
13429 SDValue N0 = N->getOperand(0);
13430 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13431 N0.getOperand(1).getOpcode() != ISD::Constant ||
13432 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13433 return SDValue();
13434
13435 // shl/shr
13436 SDValue N1 = N0->getOperand(0);
13437 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13438 N1.getOperand(1).getOpcode() != ISD::Constant ||
13439 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13440 return SDValue();
13441
13442 // load
13443 if (!isa<LoadSDNode>(N1.getOperand(0)))
13444 return SDValue();
13445 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13446 EVT MemVT = Load->getMemoryVT();
13447 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13448 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13449 return SDValue();
13450
13451
13452 // If the shift op is SHL, the logic op must be AND, otherwise the result
13453 // will be wrong.
13454 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13455 return SDValue();
13456
13457 if (!N0.hasOneUse() || !N1.hasOneUse())
13458 return SDValue();
13459
13461 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13462 ISD::ZERO_EXTEND, SetCCs, TLI))
13463 return SDValue();
13464
13465 // Actually do the transformation.
13466 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13467 Load->getChain(), Load->getBasePtr(),
13468 Load->getMemoryVT(), Load->getMemOperand());
13469
13470 SDLoc DL1(N1);
13471 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13472 N1.getOperand(1));
13473
13475 SDLoc DL0(N0);
13476 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13477 DAG.getConstant(Mask, DL0, VT));
13478
13479 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13480 CombineTo(N, And);
13481 if (SDValue(Load, 0).hasOneUse()) {
13482 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13483 } else {
13484 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13485 Load->getValueType(0), ExtLoad);
13486 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13487 }
13488
13489 // N0 is dead at this point.
13490 recursivelyDeleteUnusedNodes(N0.getNode());
13491
13492 return SDValue(N,0); // Return N so it doesn't get rechecked!
13493}
13494
13495/// If we're narrowing or widening the result of a vector select and the final
13496/// size is the same size as a setcc (compare) feeding the select, then try to
13497/// apply the cast operation to the select's operands because matching vector
13498/// sizes for a select condition and other operands should be more efficient.
13499SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13500 unsigned CastOpcode = Cast->getOpcode();
13501 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13502 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13503 CastOpcode == ISD::FP_ROUND) &&
13504 "Unexpected opcode for vector select narrowing/widening");
13505
13506 // We only do this transform before legal ops because the pattern may be
13507 // obfuscated by target-specific operations after legalization. Do not create
13508 // an illegal select op, however, because that may be difficult to lower.
13509 EVT VT = Cast->getValueType(0);
13510 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13511 return SDValue();
13512
13513 SDValue VSel = Cast->getOperand(0);
13514 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13515 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13516 return SDValue();
13517
13518 // Does the setcc have the same vector size as the casted select?
13519 SDValue SetCC = VSel.getOperand(0);
13520 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13521 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13522 return SDValue();
13523
13524 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13525 SDValue A = VSel.getOperand(1);
13526 SDValue B = VSel.getOperand(2);
13527 SDValue CastA, CastB;
13528 SDLoc DL(Cast);
13529 if (CastOpcode == ISD::FP_ROUND) {
13530 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13531 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13532 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13533 } else {
13534 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13535 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13536 }
13537 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13538}
13539
13540// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13541// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13543 const TargetLowering &TLI, EVT VT,
13544 bool LegalOperations, SDNode *N,
13545 SDValue N0, ISD::LoadExtType ExtLoadType) {
13546 SDNode *N0Node = N0.getNode();
13547 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13548 : ISD::isZEXTLoad(N0Node);
13549 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13550 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13551 return SDValue();
13552
13553 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13554 EVT MemVT = LN0->getMemoryVT();
13555 if ((LegalOperations || !LN0->isSimple() ||
13556 VT.isVector()) &&
13557 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13558 return SDValue();
13559
13560 SDValue ExtLoad =
13561 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13562 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13563 Combiner.CombineTo(N, ExtLoad);
13564 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13565 if (LN0->use_empty())
13566 Combiner.recursivelyDeleteUnusedNodes(LN0);
13567 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13568}
13569
13570// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13571// Only generate vector extloads when 1) they're legal, and 2) they are
13572// deemed desirable by the target. NonNegZExt can be set to true if a zero
13573// extend has the nonneg flag to allow use of sextload if profitable.
13575 const TargetLowering &TLI, EVT VT,
13576 bool LegalOperations, SDNode *N, SDValue N0,
13577 ISD::LoadExtType ExtLoadType,
13578 ISD::NodeType ExtOpc,
13579 bool NonNegZExt = false) {
13581 return {};
13582
13583 // If this is zext nneg, see if it would make sense to treat it as a sext.
13584 if (NonNegZExt) {
13585 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13586 "Unexpected load type or opcode");
13587 for (SDNode *User : N0->users()) {
13588 if (User->getOpcode() == ISD::SETCC) {
13589 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13591 ExtLoadType = ISD::SEXTLOAD;
13592 ExtOpc = ISD::SIGN_EXTEND;
13593 break;
13594 }
13595 }
13596 }
13597 }
13598
13599 // TODO: isFixedLengthVector() should be removed and any negative effects on
13600 // code generation being the result of that target's implementation of
13601 // isVectorLoadExtDesirable().
13602 if ((LegalOperations || VT.isFixedLengthVector() ||
13603 !cast<LoadSDNode>(N0)->isSimple()) &&
13604 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13605 return {};
13606
13607 bool DoXform = true;
13609 if (!N0.hasOneUse())
13610 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13611 if (VT.isVector())
13612 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13613 if (!DoXform)
13614 return {};
13615
13616 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13617 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13618 LN0->getBasePtr(), N0.getValueType(),
13619 LN0->getMemOperand());
13620 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13621 // If the load value is used only by N, replace it via CombineTo N.
13622 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13623 Combiner.CombineTo(N, ExtLoad);
13624 if (NoReplaceTrunc) {
13625 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13626 Combiner.recursivelyDeleteUnusedNodes(LN0);
13627 } else {
13628 SDValue Trunc =
13629 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13630 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13631 }
13632 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13633}
13634
13635static SDValue
13637 bool LegalOperations, SDNode *N, SDValue N0,
13638 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13639 if (!N0.hasOneUse())
13640 return SDValue();
13641
13642 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13643 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13644 return SDValue();
13645
13646 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13647 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13648 return SDValue();
13649
13650 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13651 return SDValue();
13652
13653 SDLoc dl(Ld);
13654 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13655 SDValue NewLoad = DAG.getMaskedLoad(
13656 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13657 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13658 ExtLoadType, Ld->isExpandingLoad());
13659 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13660 return NewLoad;
13661}
13662
13663// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13665 const TargetLowering &TLI, EVT VT,
13666 SDValue N0,
13667 ISD::LoadExtType ExtLoadType) {
13668 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13669 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13670 return {};
13671 EVT MemoryVT = ALoad->getMemoryVT();
13672 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13673 return {};
13674 // Can't fold into ALoad if it is already extending differently.
13675 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13676 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13677 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13678 return {};
13679
13680 EVT OrigVT = ALoad->getValueType(0);
13681 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13682 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13683 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13684 ALoad->getBasePtr(), ALoad->getMemOperand()));
13685 NewALoad->setExtensionType(ExtLoadType);
13687 SDValue(ALoad, 0),
13688 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13689 // Update the chain uses.
13690 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13691 return SDValue(NewALoad, 0);
13692}
13693
13695 bool LegalOperations) {
13696 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13697 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13698
13699 SDValue SetCC = N->getOperand(0);
13700 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13701 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13702 return SDValue();
13703
13704 SDValue X = SetCC.getOperand(0);
13705 SDValue Ones = SetCC.getOperand(1);
13706 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13707 EVT VT = N->getValueType(0);
13708 EVT XVT = X.getValueType();
13709 // setge X, C is canonicalized to setgt, so we do not need to match that
13710 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13711 // not require the 'not' op.
13712 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13713 // Invert and smear/shift the sign bit:
13714 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13715 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13716 SDLoc DL(N);
13717 unsigned ShCt = VT.getSizeInBits() - 1;
13718 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13719 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13720 SDValue NotX = DAG.getNOT(DL, X, VT);
13721 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13722 auto ShiftOpcode =
13723 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13724 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13725 }
13726 }
13727 return SDValue();
13728}
13729
13730SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13731 SDValue N0 = N->getOperand(0);
13732 if (N0.getOpcode() != ISD::SETCC)
13733 return SDValue();
13734
13735 SDValue N00 = N0.getOperand(0);
13736 SDValue N01 = N0.getOperand(1);
13737 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13738 EVT VT = N->getValueType(0);
13739 EVT N00VT = N00.getValueType();
13740 SDLoc DL(N);
13741
13742 // Propagate fast-math-flags.
13743 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13744
13745 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13746 // the same size as the compared operands. Try to optimize sext(setcc())
13747 // if this is the case.
13748 if (VT.isVector() && !LegalOperations &&
13749 TLI.getBooleanContents(N00VT) ==
13751 EVT SVT = getSetCCResultType(N00VT);
13752
13753 // If we already have the desired type, don't change it.
13754 if (SVT != N0.getValueType()) {
13755 // We know that the # elements of the results is the same as the
13756 // # elements of the compare (and the # elements of the compare result
13757 // for that matter). Check to see that they are the same size. If so,
13758 // we know that the element size of the sext'd result matches the
13759 // element size of the compare operands.
13760 if (VT.getSizeInBits() == SVT.getSizeInBits())
13761 return DAG.getSetCC(DL, VT, N00, N01, CC);
13762
13763 // If the desired elements are smaller or larger than the source
13764 // elements, we can use a matching integer vector type and then
13765 // truncate/sign extend.
13766 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13767 if (SVT == MatchingVecType) {
13768 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13769 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13770 }
13771 }
13772
13773 // Try to eliminate the sext of a setcc by zexting the compare operands.
13774 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13776 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13777 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13778 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13779
13780 // We have an unsupported narrow vector compare op that would be legal
13781 // if extended to the destination type. See if the compare operands
13782 // can be freely extended to the destination type.
13783 auto IsFreeToExtend = [&](SDValue V) {
13784 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13785 return true;
13786 // Match a simple, non-extended load that can be converted to a
13787 // legal {z/s}ext-load.
13788 // TODO: Allow widening of an existing {z/s}ext-load?
13789 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13790 ISD::isUNINDEXEDLoad(V.getNode()) &&
13791 cast<LoadSDNode>(V)->isSimple() &&
13792 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13793 return false;
13794
13795 // Non-chain users of this value must either be the setcc in this
13796 // sequence or extends that can be folded into the new {z/s}ext-load.
13797 for (SDUse &Use : V->uses()) {
13798 // Skip uses of the chain and the setcc.
13799 SDNode *User = Use.getUser();
13800 if (Use.getResNo() != 0 || User == N0.getNode())
13801 continue;
13802 // Extra users must have exactly the same cast we are about to create.
13803 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13804 // is enhanced similarly.
13805 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13806 return false;
13807 }
13808 return true;
13809 };
13810
13811 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13812 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13813 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13814 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13815 }
13816 }
13817 }
13818
13819 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13820 // Here, T can be 1 or -1, depending on the type of the setcc and
13821 // getBooleanContents().
13822 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13823
13824 // To determine the "true" side of the select, we need to know the high bit
13825 // of the value returned by the setcc if it evaluates to true.
13826 // If the type of the setcc is i1, then the true case of the select is just
13827 // sext(i1 1), that is, -1.
13828 // If the type of the setcc is larger (say, i8) then the value of the high
13829 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13830 // of the appropriate width.
13831 SDValue ExtTrueVal = (SetCCWidth == 1)
13832 ? DAG.getAllOnesConstant(DL, VT)
13833 : DAG.getBoolConstant(true, DL, VT, N00VT);
13834 SDValue Zero = DAG.getConstant(0, DL, VT);
13835 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13836 return SCC;
13837
13838 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13839 EVT SetCCVT = getSetCCResultType(N00VT);
13840 // Don't do this transform for i1 because there's a select transform
13841 // that would reverse it.
13842 // TODO: We should not do this transform at all without a target hook
13843 // because a sext is likely cheaper than a select?
13844 if (SetCCVT.getScalarSizeInBits() != 1 &&
13845 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13846 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13847 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13848 }
13849 }
13850
13851 return SDValue();
13852}
13853
13854SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13855 SDValue N0 = N->getOperand(0);
13856 EVT VT = N->getValueType(0);
13857 SDLoc DL(N);
13858
13859 if (VT.isVector())
13860 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13861 return FoldedVOp;
13862
13863 // sext(undef) = 0 because the top bit will all be the same.
13864 if (N0.isUndef())
13865 return DAG.getConstant(0, DL, VT);
13866
13867 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13868 return Res;
13869
13870 // fold (sext (sext x)) -> (sext x)
13871 // fold (sext (aext x)) -> (sext x)
13872 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13873 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13874
13875 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13876 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13880 N0.getOperand(0));
13881
13882 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13883 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13884 SDValue N00 = N0.getOperand(0);
13885 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13886 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13887 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13888 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13889 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13890 }
13891 }
13892
13893 if (N0.getOpcode() == ISD::TRUNCATE) {
13894 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13895 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13896 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13897 SDNode *oye = N0.getOperand(0).getNode();
13898 if (NarrowLoad.getNode() != N0.getNode()) {
13899 CombineTo(N0.getNode(), NarrowLoad);
13900 // CombineTo deleted the truncate, if needed, but not what's under it.
13901 AddToWorklist(oye);
13902 }
13903 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13904 }
13905
13906 // See if the value being truncated is already sign extended. If so, just
13907 // eliminate the trunc/sext pair.
13908 SDValue Op = N0.getOperand(0);
13909 unsigned OpBits = Op.getScalarValueSizeInBits();
13910 unsigned MidBits = N0.getScalarValueSizeInBits();
13911 unsigned DestBits = VT.getScalarSizeInBits();
13912
13913 if (N0->getFlags().hasNoSignedWrap() ||
13914 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
13915 if (OpBits == DestBits) {
13916 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13917 // bits, it is already ready.
13918 return Op;
13919 }
13920
13921 if (OpBits < DestBits) {
13922 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13923 // bits, just sext from i32.
13924 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13925 }
13926
13927 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13928 // bits, just truncate to i32.
13930 Flags.setNoSignedWrap(true);
13931 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
13932 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
13933 }
13934
13935 // fold (sext (truncate x)) -> (sextinreg x).
13936 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13937 N0.getValueType())) {
13938 if (OpBits < DestBits)
13939 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13940 else if (OpBits > DestBits)
13941 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13942 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13943 DAG.getValueType(N0.getValueType()));
13944 }
13945 }
13946
13947 // Try to simplify (sext (load x)).
13948 if (SDValue foldedExt =
13949 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13951 return foldedExt;
13952
13953 if (SDValue foldedExt =
13954 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13956 return foldedExt;
13957
13958 // fold (sext (load x)) to multiple smaller sextloads.
13959 // Only on illegal but splittable vectors.
13960 if (SDValue ExtLoad = CombineExtLoad(N))
13961 return ExtLoad;
13962
13963 // Try to simplify (sext (sextload x)).
13964 if (SDValue foldedExt = tryToFoldExtOfExtload(
13965 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13966 return foldedExt;
13967
13968 // Try to simplify (sext (atomic_load x)).
13969 if (SDValue foldedExt =
13970 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13971 return foldedExt;
13972
13973 // fold (sext (and/or/xor (load x), cst)) ->
13974 // (and/or/xor (sextload x), (sext cst))
13975 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13976 isa<LoadSDNode>(N0.getOperand(0)) &&
13977 N0.getOperand(1).getOpcode() == ISD::Constant &&
13978 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13979 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13980 EVT MemVT = LN00->getMemoryVT();
13981 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13982 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13984 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13985 ISD::SIGN_EXTEND, SetCCs, TLI);
13986 if (DoXform) {
13987 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13988 LN00->getChain(), LN00->getBasePtr(),
13989 LN00->getMemoryVT(),
13990 LN00->getMemOperand());
13992 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13993 ExtLoad, DAG.getConstant(Mask, DL, VT));
13994 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13995 bool NoReplaceTruncAnd = !N0.hasOneUse();
13996 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13997 CombineTo(N, And);
13998 // If N0 has multiple uses, change other uses as well.
13999 if (NoReplaceTruncAnd) {
14000 SDValue TruncAnd =
14002 CombineTo(N0.getNode(), TruncAnd);
14003 }
14004 if (NoReplaceTrunc) {
14005 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14006 } else {
14007 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14008 LN00->getValueType(0), ExtLoad);
14009 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14010 }
14011 return SDValue(N,0); // Return N so it doesn't get rechecked!
14012 }
14013 }
14014 }
14015
14016 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14017 return V;
14018
14019 if (SDValue V = foldSextSetcc(N))
14020 return V;
14021
14022 // fold (sext x) -> (zext x) if the sign bit is known zero.
14023 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14024 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14025 DAG.SignBitIsZero(N0))
14026 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14027
14028 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14029 return NewVSel;
14030
14031 // Eliminate this sign extend by doing a negation in the destination type:
14032 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14033 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14037 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14038 return DAG.getNegative(Zext, DL, VT);
14039 }
14040 // Eliminate this sign extend by doing a decrement in the destination type:
14041 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14042 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14046 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14047 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14048 }
14049
14050 // fold sext (not i1 X) -> add (zext i1 X), -1
14051 // TODO: This could be extended to handle bool vectors.
14052 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14053 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14054 TLI.isOperationLegal(ISD::ADD, VT)))) {
14055 // If we can eliminate the 'not', the sext form should be better
14056 if (SDValue NewXor = visitXOR(N0.getNode())) {
14057 // Returning N0 is a form of in-visit replacement that may have
14058 // invalidated N0.
14059 if (NewXor.getNode() == N0.getNode()) {
14060 // Return SDValue here as the xor should have already been replaced in
14061 // this sext.
14062 return SDValue();
14063 }
14064
14065 // Return a new sext with the new xor.
14066 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14067 }
14068
14069 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14070 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14071 }
14072
14073 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14074 return Res;
14075
14076 return SDValue();
14077}
14078
14079/// Given an extending node with a pop-count operand, if the target does not
14080/// support a pop-count in the narrow source type but does support it in the
14081/// destination type, widen the pop-count to the destination type.
14082static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14083 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14084 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14085 "Expected extend op");
14086
14087 SDValue CtPop = Extend->getOperand(0);
14088 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14089 return SDValue();
14090
14091 EVT VT = Extend->getValueType(0);
14092 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14095 return SDValue();
14096
14097 // zext (ctpop X) --> ctpop (zext X)
14098 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14099 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14100}
14101
14102// If we have (zext (abs X)) where X is a type that will be promoted by type
14103// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14104static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14105 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14106
14107 EVT VT = Extend->getValueType(0);
14108 if (VT.isVector())
14109 return SDValue();
14110
14111 SDValue Abs = Extend->getOperand(0);
14112 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14113 return SDValue();
14114
14115 EVT AbsVT = Abs.getValueType();
14116 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14117 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14119 return SDValue();
14120
14121 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14122
14123 SDValue SExt =
14124 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14125 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14126 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14127}
14128
14129SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14130 SDValue N0 = N->getOperand(0);
14131 EVT VT = N->getValueType(0);
14132 SDLoc DL(N);
14133
14134 if (VT.isVector())
14135 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14136 return FoldedVOp;
14137
14138 // zext(undef) = 0
14139 if (N0.isUndef())
14140 return DAG.getConstant(0, DL, VT);
14141
14142 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14143 return Res;
14144
14145 // fold (zext (zext x)) -> (zext x)
14146 // fold (zext (aext x)) -> (zext x)
14147 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14149 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14150 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14151 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14152 }
14153
14154 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14155 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14158 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14159
14160 // fold (zext (truncate x)) -> (zext x) or
14161 // (zext (truncate x)) -> (truncate x)
14162 // This is valid when the truncated bits of x are already zero.
14163 SDValue Op;
14164 KnownBits Known;
14165 if (isTruncateOf(DAG, N0, Op, Known)) {
14166 APInt TruncatedBits =
14167 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14168 APInt(Op.getScalarValueSizeInBits(), 0) :
14169 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14171 std::min(Op.getScalarValueSizeInBits(),
14172 VT.getScalarSizeInBits()));
14173 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14174 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14175 DAG.salvageDebugInfo(*N0.getNode());
14176
14177 return ZExtOrTrunc;
14178 }
14179 }
14180
14181 // fold (zext (truncate x)) -> (and x, mask)
14182 if (N0.getOpcode() == ISD::TRUNCATE) {
14183 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14184 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14185 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14186 SDNode *oye = N0.getOperand(0).getNode();
14187 if (NarrowLoad.getNode() != N0.getNode()) {
14188 CombineTo(N0.getNode(), NarrowLoad);
14189 // CombineTo deleted the truncate, if needed, but not what's under it.
14190 AddToWorklist(oye);
14191 }
14192 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14193 }
14194
14195 EVT SrcVT = N0.getOperand(0).getValueType();
14196 EVT MinVT = N0.getValueType();
14197
14198 if (N->getFlags().hasNonNeg()) {
14199 SDValue Op = N0.getOperand(0);
14200 unsigned OpBits = SrcVT.getScalarSizeInBits();
14201 unsigned MidBits = MinVT.getScalarSizeInBits();
14202 unsigned DestBits = VT.getScalarSizeInBits();
14203
14204 if (N0->getFlags().hasNoSignedWrap() ||
14205 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14206 if (OpBits == DestBits) {
14207 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14208 // bits, it is already ready.
14209 return Op;
14210 }
14211
14212 if (OpBits < DestBits) {
14213 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14214 // bits, just sext from i32.
14215 // FIXME: This can probably be ZERO_EXTEND nneg?
14216 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14217 }
14218
14219 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14220 // bits, just truncate to i32.
14222 Flags.setNoSignedWrap(true);
14223 Flags.setNoUnsignedWrap(true);
14224 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14225 }
14226 }
14227
14228 // Try to mask before the extension to avoid having to generate a larger mask,
14229 // possibly over several sub-vectors.
14230 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14231 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14233 SDValue Op = N0.getOperand(0);
14234 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14235 AddToWorklist(Op.getNode());
14236 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14237 // Transfer the debug info; the new node is equivalent to N0.
14238 DAG.transferDbgValues(N0, ZExtOrTrunc);
14239 return ZExtOrTrunc;
14240 }
14241 }
14242
14243 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14244 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14245 AddToWorklist(Op.getNode());
14246 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14247 // We may safely transfer the debug info describing the truncate node over
14248 // to the equivalent and operation.
14249 DAG.transferDbgValues(N0, And);
14250 return And;
14251 }
14252 }
14253
14254 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14255 // if either of the casts is not free.
14256 if (N0.getOpcode() == ISD::AND &&
14257 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14258 N0.getOperand(1).getOpcode() == ISD::Constant &&
14259 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14260 !TLI.isZExtFree(N0.getValueType(), VT))) {
14261 SDValue X = N0.getOperand(0).getOperand(0);
14262 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14264 return DAG.getNode(ISD::AND, DL, VT,
14265 X, DAG.getConstant(Mask, DL, VT));
14266 }
14267
14268 // Try to simplify (zext (load x)).
14269 if (SDValue foldedExt = tryToFoldExtOfLoad(
14270 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14271 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14272 return foldedExt;
14273
14274 if (SDValue foldedExt =
14275 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14277 return foldedExt;
14278
14279 // fold (zext (load x)) to multiple smaller zextloads.
14280 // Only on illegal but splittable vectors.
14281 if (SDValue ExtLoad = CombineExtLoad(N))
14282 return ExtLoad;
14283
14284 // Try to simplify (zext (atomic_load x)).
14285 if (SDValue foldedExt =
14286 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14287 return foldedExt;
14288
14289 // fold (zext (and/or/xor (load x), cst)) ->
14290 // (and/or/xor (zextload x), (zext cst))
14291 // Unless (and (load x) cst) will match as a zextload already and has
14292 // additional users, or the zext is already free.
14293 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
14294 isa<LoadSDNode>(N0.getOperand(0)) &&
14295 N0.getOperand(1).getOpcode() == ISD::Constant &&
14296 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14297 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14298 EVT MemVT = LN00->getMemoryVT();
14299 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
14300 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14301 bool DoXform = true;
14303 if (!N0.hasOneUse()) {
14304 if (N0.getOpcode() == ISD::AND) {
14305 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14306 EVT LoadResultTy = AndC->getValueType(0);
14307 EVT ExtVT;
14308 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14309 DoXform = false;
14310 }
14311 }
14312 if (DoXform)
14313 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14314 ISD::ZERO_EXTEND, SetCCs, TLI);
14315 if (DoXform) {
14316 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14317 LN00->getChain(), LN00->getBasePtr(),
14318 LN00->getMemoryVT(),
14319 LN00->getMemOperand());
14321 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14322 ExtLoad, DAG.getConstant(Mask, DL, VT));
14323 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14324 bool NoReplaceTruncAnd = !N0.hasOneUse();
14325 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14326 CombineTo(N, And);
14327 // If N0 has multiple uses, change other uses as well.
14328 if (NoReplaceTruncAnd) {
14329 SDValue TruncAnd =
14331 CombineTo(N0.getNode(), TruncAnd);
14332 }
14333 if (NoReplaceTrunc) {
14334 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14335 } else {
14336 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14337 LN00->getValueType(0), ExtLoad);
14338 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14339 }
14340 return SDValue(N,0); // Return N so it doesn't get rechecked!
14341 }
14342 }
14343 }
14344
14345 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14346 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14347 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14348 return ZExtLoad;
14349
14350 // Try to simplify (zext (zextload x)).
14351 if (SDValue foldedExt = tryToFoldExtOfExtload(
14352 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14353 return foldedExt;
14354
14355 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14356 return V;
14357
14358 if (N0.getOpcode() == ISD::SETCC) {
14359 // Propagate fast-math-flags.
14360 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14361
14362 // Only do this before legalize for now.
14363 if (!LegalOperations && VT.isVector() &&
14364 N0.getValueType().getVectorElementType() == MVT::i1) {
14365 EVT N00VT = N0.getOperand(0).getValueType();
14366 if (getSetCCResultType(N00VT) == N0.getValueType())
14367 return SDValue();
14368
14369 // We know that the # elements of the results is the same as the #
14370 // elements of the compare (and the # elements of the compare result for
14371 // that matter). Check to see that they are the same size. If so, we know
14372 // that the element size of the sext'd result matches the element size of
14373 // the compare operands.
14374 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14375 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14376 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14377 N0.getOperand(1), N0.getOperand(2));
14378 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14379 }
14380
14381 // If the desired elements are smaller or larger than the source
14382 // elements we can use a matching integer vector type and then
14383 // truncate/any extend followed by zext_in_reg.
14384 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14385 SDValue VsetCC =
14386 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14387 N0.getOperand(1), N0.getOperand(2));
14388 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14389 N0.getValueType());
14390 }
14391
14392 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14393 EVT N0VT = N0.getValueType();
14394 EVT N00VT = N0.getOperand(0).getValueType();
14395 if (SDValue SCC = SimplifySelectCC(
14396 DL, N0.getOperand(0), N0.getOperand(1),
14397 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14398 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14399 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14400 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14401 }
14402
14403 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14404 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14405 !TLI.isZExtFree(N0, VT)) {
14406 SDValue ShVal = N0.getOperand(0);
14407 SDValue ShAmt = N0.getOperand(1);
14408 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14409 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14410 if (N0.getOpcode() == ISD::SHL) {
14411 // If the original shl may be shifting out bits, do not perform this
14412 // transformation.
14413 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14414 ShVal.getOperand(0).getValueSizeInBits();
14415 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14416 // If the shift is too large, then see if we can deduce that the
14417 // shift is safe anyway.
14418
14419 // Check if the bits being shifted out are known to be zero.
14420 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
14421 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
14422 return SDValue();
14423 }
14424 }
14425
14426 // Ensure that the shift amount is wide enough for the shifted value.
14427 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14428 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14429
14430 return DAG.getNode(N0.getOpcode(), DL, VT,
14431 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14432 }
14433 }
14434 }
14435
14436 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14437 return NewVSel;
14438
14439 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14440 return NewCtPop;
14441
14442 if (SDValue V = widenAbs(N, DAG))
14443 return V;
14444
14445 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14446 return Res;
14447
14448 // CSE zext nneg with sext if the zext is not free.
14449 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14450 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14451 if (CSENode)
14452 return SDValue(CSENode, 0);
14453 }
14454
14455 return SDValue();
14456}
14457
14458SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14459 SDValue N0 = N->getOperand(0);
14460 EVT VT = N->getValueType(0);
14461 SDLoc DL(N);
14462
14463 // aext(undef) = undef
14464 if (N0.isUndef())
14465 return DAG.getUNDEF(VT);
14466
14467 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14468 return Res;
14469
14470 // fold (aext (aext x)) -> (aext x)
14471 // fold (aext (zext x)) -> (zext x)
14472 // fold (aext (sext x)) -> (sext x)
14473 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14474 N0.getOpcode() == ISD::SIGN_EXTEND) {
14476 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14477 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14478 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14479 }
14480
14481 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14482 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14483 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14487 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14488
14489 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14490 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14491 if (N0.getOpcode() == ISD::TRUNCATE) {
14492 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14493 SDNode *oye = N0.getOperand(0).getNode();
14494 if (NarrowLoad.getNode() != N0.getNode()) {
14495 CombineTo(N0.getNode(), NarrowLoad);
14496 // CombineTo deleted the truncate, if needed, but not what's under it.
14497 AddToWorklist(oye);
14498 }
14499 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14500 }
14501 }
14502
14503 // fold (aext (truncate x))
14504 if (N0.getOpcode() == ISD::TRUNCATE)
14505 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14506
14507 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14508 // if the trunc is not free.
14509 if (N0.getOpcode() == ISD::AND &&
14510 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14511 N0.getOperand(1).getOpcode() == ISD::Constant &&
14512 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14513 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14514 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14515 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14516 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14517 }
14518
14519 // fold (aext (load x)) -> (aext (truncate (extload x)))
14520 // None of the supported targets knows how to perform load and any_ext
14521 // on vectors in one instruction, so attempt to fold to zext instead.
14522 if (VT.isVector()) {
14523 // Try to simplify (zext (load x)).
14524 if (SDValue foldedExt =
14525 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14527 return foldedExt;
14528 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14530 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14531 bool DoXform = true;
14533 if (!N0.hasOneUse())
14534 DoXform =
14535 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14536 if (DoXform) {
14537 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14538 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14539 LN0->getBasePtr(), N0.getValueType(),
14540 LN0->getMemOperand());
14541 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14542 // If the load value is used only by N, replace it via CombineTo N.
14543 bool NoReplaceTrunc = N0.hasOneUse();
14544 CombineTo(N, ExtLoad);
14545 if (NoReplaceTrunc) {
14546 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14547 recursivelyDeleteUnusedNodes(LN0);
14548 } else {
14549 SDValue Trunc =
14550 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14551 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14552 }
14553 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14554 }
14555 }
14556
14557 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14558 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14559 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14560 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14561 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14562 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14563 ISD::LoadExtType ExtType = LN0->getExtensionType();
14564 EVT MemVT = LN0->getMemoryVT();
14565 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14566 SDValue ExtLoad =
14567 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14568 MemVT, LN0->getMemOperand());
14569 CombineTo(N, ExtLoad);
14570 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14571 recursivelyDeleteUnusedNodes(LN0);
14572 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14573 }
14574 }
14575
14576 if (N0.getOpcode() == ISD::SETCC) {
14577 // Propagate fast-math-flags.
14578 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14579
14580 // For vectors:
14581 // aext(setcc) -> vsetcc
14582 // aext(setcc) -> truncate(vsetcc)
14583 // aext(setcc) -> aext(vsetcc)
14584 // Only do this before legalize for now.
14585 if (VT.isVector() && !LegalOperations) {
14586 EVT N00VT = N0.getOperand(0).getValueType();
14587 if (getSetCCResultType(N00VT) == N0.getValueType())
14588 return SDValue();
14589
14590 // We know that the # elements of the results is the same as the
14591 // # elements of the compare (and the # elements of the compare result
14592 // for that matter). Check to see that they are the same size. If so,
14593 // we know that the element size of the sext'd result matches the
14594 // element size of the compare operands.
14595 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14596 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14597 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14598
14599 // If the desired elements are smaller or larger than the source
14600 // elements we can use a matching integer vector type and then
14601 // truncate/any extend
14602 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14603 SDValue VsetCC = DAG.getSetCC(
14604 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14605 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14606 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14607 }
14608
14609 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14610 if (SDValue SCC = SimplifySelectCC(
14611 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14612 DAG.getConstant(0, DL, VT),
14613 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14614 return SCC;
14615 }
14616
14617 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14618 return NewCtPop;
14619
14620 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14621 return Res;
14622
14623 return SDValue();
14624}
14625
14626SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14627 unsigned Opcode = N->getOpcode();
14628 SDValue N0 = N->getOperand(0);
14629 SDValue N1 = N->getOperand(1);
14630 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14631
14632 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14633 if (N0.getOpcode() == Opcode &&
14634 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14635 return N0;
14636
14637 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14638 N0.getOperand(0).getOpcode() == Opcode) {
14639 // We have an assert, truncate, assert sandwich. Make one stronger assert
14640 // by asserting on the smallest asserted type to the larger source type.
14641 // This eliminates the later assert:
14642 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14643 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14644 SDLoc DL(N);
14645 SDValue BigA = N0.getOperand(0);
14646 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14647 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14648 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14649 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14650 BigA.getOperand(0), MinAssertVTVal);
14651 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14652 }
14653
14654 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14655 // than X. Just move the AssertZext in front of the truncate and drop the
14656 // AssertSExt.
14657 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14659 Opcode == ISD::AssertZext) {
14660 SDValue BigA = N0.getOperand(0);
14661 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14662 if (AssertVT.bitsLT(BigA_AssertVT)) {
14663 SDLoc DL(N);
14664 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14665 BigA.getOperand(0), N1);
14666 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14667 }
14668 }
14669
14670 return SDValue();
14671}
14672
14673SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14674 SDLoc DL(N);
14675
14676 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14677 SDValue N0 = N->getOperand(0);
14678
14679 // Fold (assertalign (assertalign x, AL0), AL1) ->
14680 // (assertalign x, max(AL0, AL1))
14681 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14682 return DAG.getAssertAlign(DL, N0.getOperand(0),
14683 std::max(AL, AAN->getAlign()));
14684
14685 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14686 // this assert down to source operands so that those arithmetic ops could be
14687 // exposed to the DAG combining.
14688 switch (N0.getOpcode()) {
14689 default:
14690 break;
14691 case ISD::ADD:
14692 case ISD::SUB: {
14693 unsigned AlignShift = Log2(AL);
14694 SDValue LHS = N0.getOperand(0);
14695 SDValue RHS = N0.getOperand(1);
14696 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14697 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14698 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14699 if (LHSAlignShift < AlignShift)
14700 LHS = DAG.getAssertAlign(DL, LHS, AL);
14701 if (RHSAlignShift < AlignShift)
14702 RHS = DAG.getAssertAlign(DL, RHS, AL);
14703 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14704 }
14705 break;
14706 }
14707 }
14708
14709 return SDValue();
14710}
14711
14712/// If the result of a load is shifted/masked/truncated to an effectively
14713/// narrower type, try to transform the load to a narrower type and/or
14714/// use an extending load.
14715SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14716 unsigned Opc = N->getOpcode();
14717
14719 SDValue N0 = N->getOperand(0);
14720 EVT VT = N->getValueType(0);
14721 EVT ExtVT = VT;
14722
14723 // This transformation isn't valid for vector loads.
14724 if (VT.isVector())
14725 return SDValue();
14726
14727 // The ShAmt variable is used to indicate that we've consumed a right
14728 // shift. I.e. we want to narrow the width of the load by skipping to load the
14729 // ShAmt least significant bits.
14730 unsigned ShAmt = 0;
14731 // A special case is when the least significant bits from the load are masked
14732 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14733 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14734 // the result.
14735 unsigned ShiftedOffset = 0;
14736 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14737 // extended to VT.
14738 if (Opc == ISD::SIGN_EXTEND_INREG) {
14739 ExtType = ISD::SEXTLOAD;
14740 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14741 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14742 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14743 // value, or it may be shifting a higher subword, half or byte into the
14744 // lowest bits.
14745
14746 // Only handle shift with constant shift amount, and the shiftee must be a
14747 // load.
14748 auto *LN = dyn_cast<LoadSDNode>(N0);
14749 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14750 if (!N1C || !LN)
14751 return SDValue();
14752 // If the shift amount is larger than the memory type then we're not
14753 // accessing any of the loaded bytes.
14754 ShAmt = N1C->getZExtValue();
14755 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14756 if (MemoryWidth <= ShAmt)
14757 return SDValue();
14758 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14759 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14760 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14761 // If original load is a SEXTLOAD then we can't simply replace it by a
14762 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14763 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14764 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14765 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14766 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14767 LN->getExtensionType() != ExtType)
14768 return SDValue();
14769 } else if (Opc == ISD::AND) {
14770 // An AND with a constant mask is the same as a truncate + zero-extend.
14771 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14772 if (!AndC)
14773 return SDValue();
14774
14775 const APInt &Mask = AndC->getAPIntValue();
14776 unsigned ActiveBits = 0;
14777 if (Mask.isMask()) {
14778 ActiveBits = Mask.countr_one();
14779 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14780 ShiftedOffset = ShAmt;
14781 } else {
14782 return SDValue();
14783 }
14784
14785 ExtType = ISD::ZEXTLOAD;
14786 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14787 }
14788
14789 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14790 // a right shift. Here we redo some of those checks, to possibly adjust the
14791 // ExtVT even further based on "a masking AND". We could also end up here for
14792 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14793 // need to be done here as well.
14794 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14795 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14796 // Bail out when the SRL has more than one use. This is done for historical
14797 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14798 // check below? And maybe it could be non-profitable to do the transform in
14799 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14800 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14801 if (!SRL.hasOneUse())
14802 return SDValue();
14803
14804 // Only handle shift with constant shift amount, and the shiftee must be a
14805 // load.
14806 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14807 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14808 if (!SRL1C || !LN)
14809 return SDValue();
14810
14811 // If the shift amount is larger than the input type then we're not
14812 // accessing any of the loaded bytes. If the load was a zextload/extload
14813 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14814 ShAmt = SRL1C->getZExtValue();
14815 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14816 if (ShAmt >= MemoryWidth)
14817 return SDValue();
14818
14819 // Because a SRL must be assumed to *need* to zero-extend the high bits
14820 // (as opposed to anyext the high bits), we can't combine the zextload
14821 // lowering of SRL and an sextload.
14822 if (LN->getExtensionType() == ISD::SEXTLOAD)
14823 return SDValue();
14824
14825 // Avoid reading outside the memory accessed by the original load (could
14826 // happened if we only adjust the load base pointer by ShAmt). Instead we
14827 // try to narrow the load even further. The typical scenario here is:
14828 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14829 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14830 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14831 // Don't replace sextload by zextload.
14832 if (ExtType == ISD::SEXTLOAD)
14833 return SDValue();
14834 // Narrow the load.
14835 ExtType = ISD::ZEXTLOAD;
14836 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14837 }
14838
14839 // If the SRL is only used by a masking AND, we may be able to adjust
14840 // the ExtVT to make the AND redundant.
14841 SDNode *Mask = *(SRL->user_begin());
14842 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14843 isa<ConstantSDNode>(Mask->getOperand(1))) {
14844 unsigned Offset, ActiveBits;
14845 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14846 if (ShiftMask.isMask()) {
14847 EVT MaskedVT =
14848 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14849 // If the mask is smaller, recompute the type.
14850 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14851 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14852 ExtVT = MaskedVT;
14853 } else if (ExtType == ISD::ZEXTLOAD &&
14854 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14855 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14856 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14857 // If the mask is shifted we can use a narrower load and a shl to insert
14858 // the trailing zeros.
14859 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14860 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14861 ExtVT = MaskedVT;
14862 ShAmt = Offset + ShAmt;
14863 ShiftedOffset = Offset;
14864 }
14865 }
14866 }
14867
14868 N0 = SRL.getOperand(0);
14869 }
14870
14871 // If the load is shifted left (and the result isn't shifted back right), we
14872 // can fold a truncate through the shift. The typical scenario is that N
14873 // points at a TRUNCATE here so the attempted fold is:
14874 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14875 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14876 unsigned ShLeftAmt = 0;
14877 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14878 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
14879 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14880 ShLeftAmt = N01->getZExtValue();
14881 N0 = N0.getOperand(0);
14882 }
14883 }
14884
14885 // If we haven't found a load, we can't narrow it.
14886 if (!isa<LoadSDNode>(N0))
14887 return SDValue();
14888
14889 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14890 // Reducing the width of a volatile load is illegal. For atomics, we may be
14891 // able to reduce the width provided we never widen again. (see D66309)
14892 if (!LN0->isSimple() ||
14893 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14894 return SDValue();
14895
14896 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14897 unsigned LVTStoreBits =
14899 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14900 return LVTStoreBits - EVTStoreBits - ShAmt;
14901 };
14902
14903 // We need to adjust the pointer to the load by ShAmt bits in order to load
14904 // the correct bytes.
14905 unsigned PtrAdjustmentInBits =
14906 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14907
14908 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14909 SDLoc DL(LN0);
14910 // The original load itself didn't wrap, so an offset within it doesn't.
14911 SDValue NewPtr =
14914 AddToWorklist(NewPtr.getNode());
14915
14916 SDValue Load;
14917 if (ExtType == ISD::NON_EXTLOAD)
14918 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14919 LN0->getPointerInfo().getWithOffset(PtrOff),
14920 LN0->getOriginalAlign(),
14921 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14922 else
14923 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14924 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14925 LN0->getOriginalAlign(),
14926 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14927
14928 // Replace the old load's chain with the new load's chain.
14929 WorklistRemover DeadNodes(*this);
14930 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14931
14932 // Shift the result left, if we've swallowed a left shift.
14934 if (ShLeftAmt != 0) {
14935 // If the shift amount is as large as the result size (but, presumably,
14936 // no larger than the source) then the useful bits of the result are
14937 // zero; we can't simply return the shortened shift, because the result
14938 // of that operation is undefined.
14939 if (ShLeftAmt >= VT.getScalarSizeInBits())
14940 Result = DAG.getConstant(0, DL, VT);
14941 else
14942 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
14943 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
14944 }
14945
14946 if (ShiftedOffset != 0) {
14947 // We're using a shifted mask, so the load now has an offset. This means
14948 // that data has been loaded into the lower bytes than it would have been
14949 // before, so we need to shl the loaded data into the correct position in the
14950 // register.
14951 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14952 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14953 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14954 }
14955
14956 // Return the new loaded value.
14957 return Result;
14958}
14959
14960SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14961 SDValue N0 = N->getOperand(0);
14962 SDValue N1 = N->getOperand(1);
14963 EVT VT = N->getValueType(0);
14964 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14965 unsigned VTBits = VT.getScalarSizeInBits();
14966 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14967 SDLoc DL(N);
14968
14969 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14970 if (N0.isUndef())
14971 return DAG.getConstant(0, DL, VT);
14972
14973 // fold (sext_in_reg c1) -> c1
14974 if (SDValue C =
14976 return C;
14977
14978 // If the input is already sign extended, just drop the extension.
14979 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14980 return N0;
14981
14982 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14983 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14984 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14985 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
14986
14987 // fold (sext_in_reg (sext x)) -> (sext x)
14988 // fold (sext_in_reg (aext x)) -> (sext x)
14989 // if x is small enough or if we know that x has more than 1 sign bit and the
14990 // sign_extend_inreg is extending from one of them.
14991 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14992 SDValue N00 = N0.getOperand(0);
14993 unsigned N00Bits = N00.getScalarValueSizeInBits();
14994 if ((N00Bits <= ExtVTBits ||
14995 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14996 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14997 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
14998 }
14999
15000 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15001 // if x is small enough or if we know that x has more than 1 sign bit and the
15002 // sign_extend_inreg is extending from one of them.
15004 SDValue N00 = N0.getOperand(0);
15005 unsigned N00Bits = N00.getScalarValueSizeInBits();
15006 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
15007 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
15008 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15009 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
15010 if ((N00Bits == ExtVTBits ||
15011 (!IsZext && (N00Bits < ExtVTBits ||
15012 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15013 (!LegalOperations ||
15015 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15016 }
15017
15018 // fold (sext_in_reg (zext x)) -> (sext x)
15019 // iff we are extending the source sign bit.
15020 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15021 SDValue N00 = N0.getOperand(0);
15022 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15023 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15024 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15025 }
15026
15027 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15028 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15029 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15030
15031 // fold operands of sext_in_reg based on knowledge that the top bits are not
15032 // demanded.
15034 return SDValue(N, 0);
15035
15036 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15037 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15038 if (SDValue NarrowLoad = reduceLoadWidth(N))
15039 return NarrowLoad;
15040
15041 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15042 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15043 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15044 if (N0.getOpcode() == ISD::SRL) {
15045 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15046 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15047 // We can turn this into an SRA iff the input to the SRL is already sign
15048 // extended enough.
15049 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15050 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15051 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15052 N0.getOperand(1));
15053 }
15054 }
15055
15056 // fold (sext_inreg (extload x)) -> (sextload x)
15057 // If sextload is not supported by target, we can only do the combine when
15058 // load has one use. Doing otherwise can block folding the extload with other
15059 // extends that the target does support.
15061 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15062 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15063 N0.hasOneUse()) ||
15064 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15065 auto *LN0 = cast<LoadSDNode>(N0);
15066 SDValue ExtLoad =
15067 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15068 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15069 CombineTo(N, ExtLoad);
15070 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15071 AddToWorklist(ExtLoad.getNode());
15072 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15073 }
15074
15075 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15077 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15078 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15079 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15080 auto *LN0 = cast<LoadSDNode>(N0);
15081 SDValue ExtLoad =
15082 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15083 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15084 CombineTo(N, ExtLoad);
15085 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15086 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15087 }
15088
15089 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15090 // ignore it if the masked load is already sign extended
15091 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15092 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15093 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15094 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15095 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15096 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15097 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15098 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15099 CombineTo(N, ExtMaskedLoad);
15100 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15101 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15102 }
15103 }
15104
15105 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15106 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15107 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15109 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15110 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15111
15112 SDValue ExtLoad = DAG.getMaskedGather(
15113 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15114 GN0->getIndexType(), ISD::SEXTLOAD);
15115
15116 CombineTo(N, ExtLoad);
15117 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15118 AddToWorklist(ExtLoad.getNode());
15119 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15120 }
15121 }
15122
15123 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15124 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15125 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15126 N0.getOperand(1), false))
15127 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15128 }
15129
15130 // Fold (iM_signext_inreg
15131 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15132 // from iN)
15133 // -> (extract_subvector (signext iN_v to iM))
15134 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15136 SDValue InnerExt = N0.getOperand(0);
15137 EVT InnerExtVT = InnerExt->getValueType(0);
15138 SDValue Extendee = InnerExt->getOperand(0);
15139
15140 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15141 (!LegalOperations ||
15142 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15143 SDValue SignExtExtendee =
15144 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15145 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15146 N0.getOperand(1));
15147 }
15148 }
15149
15150 return SDValue();
15151}
15152
15154 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15155 bool LegalOperations) {
15156 unsigned InregOpcode = N->getOpcode();
15157 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15158
15159 SDValue Src = N->getOperand(0);
15160 EVT VT = N->getValueType(0);
15161 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15162 Src.getValueType().getVectorElementType(),
15164
15165 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15166 "Expected EXTEND_VECTOR_INREG dag node in input!");
15167
15168 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15169 // FIXME: one-use check may be overly restrictive
15170 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15171 return SDValue();
15172
15173 // Profitability check: we must be extending exactly one of it's operands.
15174 // FIXME: this is probably overly restrictive.
15175 Src = Src.getOperand(0);
15176 if (Src.getValueType() != SrcVT)
15177 return SDValue();
15178
15179 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15180 return SDValue();
15181
15182 return DAG.getNode(Opcode, DL, VT, Src);
15183}
15184
15185SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15186 SDValue N0 = N->getOperand(0);
15187 EVT VT = N->getValueType(0);
15188 SDLoc DL(N);
15189
15190 if (N0.isUndef()) {
15191 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15192 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15193 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15194 ? DAG.getUNDEF(VT)
15195 : DAG.getConstant(0, DL, VT);
15196 }
15197
15198 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15199 return Res;
15200
15202 return SDValue(N, 0);
15203
15205 LegalOperations))
15206 return R;
15207
15208 return SDValue();
15209}
15210
15211SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15212 EVT VT = N->getValueType(0);
15213 SDValue N0 = N->getOperand(0);
15214
15215 SDValue FPVal;
15216 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15218 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15219 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15220 DAG.getValueType(VT.getScalarType()));
15221
15222 return SDValue();
15223}
15224
15225/// Detect patterns of truncation with unsigned saturation:
15226///
15227/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15228/// Return the source value x to be truncated or SDValue() if the pattern was
15229/// not matched.
15230///
15232 unsigned NumDstBits = VT.getScalarSizeInBits();
15233 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15234 // Saturation with truncation. We truncate from InVT to VT.
15235 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15236
15237 SDValue Min;
15238 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15239 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15240 return Min;
15241
15242 return SDValue();
15243}
15244
15245/// Detect patterns of truncation with signed saturation:
15246/// (truncate (smin (smax (x, signed_min_of_dest_type),
15247/// signed_max_of_dest_type)) to dest_type)
15248/// or:
15249/// (truncate (smax (smin (x, signed_max_of_dest_type),
15250/// signed_min_of_dest_type)) to dest_type).
15251///
15252/// Return the source value to be truncated or SDValue() if the pattern was not
15253/// matched.
15255 unsigned NumDstBits = VT.getScalarSizeInBits();
15256 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15257 // Saturation with truncation. We truncate from InVT to VT.
15258 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15259
15260 SDValue Val;
15261 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
15262 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
15263
15264 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
15265 m_SpecificInt(SignedMax))))
15266 return Val;
15267
15268 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
15269 m_SpecificInt(SignedMin))))
15270 return Val;
15271
15272 return SDValue();
15273}
15274
15275/// Detect patterns of truncation with unsigned saturation:
15277 const SDLoc &DL) {
15278 unsigned NumDstBits = VT.getScalarSizeInBits();
15279 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15280 // Saturation with truncation. We truncate from InVT to VT.
15281 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15282
15283 SDValue Val;
15284 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15285 // Min == 0, Max is unsigned max of destination type.
15286 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
15287 m_Zero())))
15288 return Val;
15289
15290 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
15291 m_SpecificInt(UnsignedMax))))
15292 return Val;
15293
15294 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
15295 m_SpecificInt(UnsignedMax))))
15296 return Val;
15297
15298 return SDValue();
15299}
15300
15301static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
15302 SDLoc &DL, const TargetLowering &TLI,
15303 SelectionDAG &DAG) {
15304 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15305 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
15306 TLI.isTypeDesirableForOp(Opc, VT));
15307 };
15308
15309 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
15310 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
15311 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
15312 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
15313 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15314 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15315 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15316 } else if (Src.getOpcode() == ISD::UMIN) {
15317 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15318 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15319 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15320 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
15321 if (SDValue USatVal = detectUSatUPattern(Src, VT))
15322 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
15323 }
15324
15325 return SDValue();
15326}
15327
15328SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
15329 SDValue N0 = N->getOperand(0);
15330 EVT VT = N->getValueType(0);
15331 EVT SrcVT = N0.getValueType();
15332 bool isLE = DAG.getDataLayout().isLittleEndian();
15333 SDLoc DL(N);
15334
15335 // trunc(undef) = undef
15336 if (N0.isUndef())
15337 return DAG.getUNDEF(VT);
15338
15339 // fold (truncate (truncate x)) -> (truncate x)
15340 if (N0.getOpcode() == ISD::TRUNCATE)
15341 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15342
15343 // fold saturated truncate
15344 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
15345 return SaturatedTR;
15346
15347 // fold (truncate c1) -> c1
15348 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
15349 return C;
15350
15351 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
15352 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
15353 N0.getOpcode() == ISD::SIGN_EXTEND ||
15354 N0.getOpcode() == ISD::ANY_EXTEND) {
15355 // if the source is smaller than the dest, we still need an extend.
15356 if (N0.getOperand(0).getValueType().bitsLT(VT))
15357 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15358 // if the source is larger than the dest, than we just need the truncate.
15359 if (N0.getOperand(0).getValueType().bitsGT(VT))
15360 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15361 // if the source and dest are the same type, we can drop both the extend
15362 // and the truncate.
15363 return N0.getOperand(0);
15364 }
15365
15366 // Try to narrow a truncate-of-sext_in_reg to the destination type:
15367 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
15368 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15369 N0.hasOneUse()) {
15370 SDValue X = N0.getOperand(0);
15371 SDValue ExtVal = N0.getOperand(1);
15372 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
15373 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
15374 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
15375 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
15376 }
15377 }
15378
15379 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
15380 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
15381 return SDValue();
15382
15383 // Fold extract-and-trunc into a narrow extract. For example:
15384 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
15385 // i32 y = TRUNCATE(i64 x)
15386 // -- becomes --
15387 // v16i8 b = BITCAST (v2i64 val)
15388 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
15389 //
15390 // Note: We only run this optimization after type legalization (which often
15391 // creates this pattern) and before operation legalization after which
15392 // we need to be more careful about the vector instructions that we generate.
15393 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
15394 N0->hasOneUse()) {
15395 EVT TrTy = N->getValueType(0);
15396 SDValue Src = N0;
15397
15398 // Check for cases where we shift down an upper element before truncation.
15399 int EltOffset = 0;
15400 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
15401 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
15402 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
15403 Src = Src.getOperand(0);
15404 EltOffset = *ShAmt / TrTy.getSizeInBits();
15405 }
15406 }
15407 }
15408
15409 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15410 EVT VecTy = Src.getOperand(0).getValueType();
15411 EVT ExTy = Src.getValueType();
15412
15413 auto EltCnt = VecTy.getVectorElementCount();
15414 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
15415 auto NewEltCnt = EltCnt * SizeRatio;
15416
15417 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
15418 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
15419
15420 SDValue EltNo = Src->getOperand(1);
15421 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
15422 int Elt = EltNo->getAsZExtVal();
15423 int Index = isLE ? (Elt * SizeRatio + EltOffset)
15424 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
15425 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
15426 DAG.getBitcast(NVT, Src.getOperand(0)),
15427 DAG.getVectorIdxConstant(Index, DL));
15428 }
15429 }
15430 }
15431
15432 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
15433 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
15434 TLI.isTruncateFree(SrcVT, VT)) {
15435 if (!LegalOperations ||
15436 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
15437 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
15438 SDLoc SL(N0);
15439 SDValue Cond = N0.getOperand(0);
15440 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15441 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15442 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15443 }
15444 }
15445
15446 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15447 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15448 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15449 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15450 SDValue Amt = N0.getOperand(1);
15451 KnownBits Known = DAG.computeKnownBits(Amt);
15452 unsigned Size = VT.getScalarSizeInBits();
15453 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15454 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15455 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15456 if (AmtVT != Amt.getValueType()) {
15457 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15458 AddToWorklist(Amt.getNode());
15459 }
15460 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15461 }
15462 }
15463
15464 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15465 return V;
15466
15467 if (SDValue ABD = foldABSToABD(N, DL))
15468 return ABD;
15469
15470 // Attempt to pre-truncate BUILD_VECTOR sources.
15471 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15472 N0.hasOneUse() &&
15473 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15474 // Avoid creating illegal types if running after type legalizer.
15475 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15476 EVT SVT = VT.getScalarType();
15477 SmallVector<SDValue, 8> TruncOps;
15478 for (const SDValue &Op : N0->op_values()) {
15479 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15480 TruncOps.push_back(TruncOp);
15481 }
15482 return DAG.getBuildVector(VT, DL, TruncOps);
15483 }
15484
15485 // trunc (splat_vector x) -> splat_vector (trunc x)
15486 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15487 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15488 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15489 EVT SVT = VT.getScalarType();
15490 return DAG.getSplatVector(
15491 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15492 }
15493
15494 // Fold a series of buildvector, bitcast, and truncate if possible.
15495 // For example fold
15496 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15497 // (2xi32 (buildvector x, y)).
15498 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15499 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15501 N0.getOperand(0).hasOneUse()) {
15502 SDValue BuildVect = N0.getOperand(0);
15503 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15504 EVT TruncVecEltTy = VT.getVectorElementType();
15505
15506 // Check that the element types match.
15507 if (BuildVectEltTy == TruncVecEltTy) {
15508 // Now we only need to compute the offset of the truncated elements.
15509 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15510 unsigned TruncVecNumElts = VT.getVectorNumElements();
15511 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15512 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
15513
15514 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15515 "Invalid number of elements");
15516
15518 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
15519 i += TruncEltOffset)
15520 Opnds.push_back(BuildVect.getOperand(i));
15521
15522 return DAG.getBuildVector(VT, DL, Opnds);
15523 }
15524 }
15525
15526 // fold (truncate (load x)) -> (smaller load x)
15527 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15528 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15529 if (SDValue Reduced = reduceLoadWidth(N))
15530 return Reduced;
15531
15532 // Handle the case where the truncated result is at least as wide as the
15533 // loaded type.
15534 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15535 auto *LN0 = cast<LoadSDNode>(N0);
15536 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15537 SDValue NewLoad = DAG.getExtLoad(
15538 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15539 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15540 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15541 return NewLoad;
15542 }
15543 }
15544 }
15545
15546 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15547 // where ... are all 'undef'.
15548 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15550 SDValue V;
15551 unsigned Idx = 0;
15552 unsigned NumDefs = 0;
15553
15554 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15555 SDValue X = N0.getOperand(i);
15556 if (!X.isUndef()) {
15557 V = X;
15558 Idx = i;
15559 NumDefs++;
15560 }
15561 // Stop if more than one members are non-undef.
15562 if (NumDefs > 1)
15563 break;
15564
15567 X.getValueType().getVectorElementCount()));
15568 }
15569
15570 if (NumDefs == 0)
15571 return DAG.getUNDEF(VT);
15572
15573 if (NumDefs == 1) {
15574 assert(V.getNode() && "The single defined operand is empty!");
15576 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15577 if (i != Idx) {
15578 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15579 continue;
15580 }
15581 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15582 AddToWorklist(NV.getNode());
15583 Opnds.push_back(NV);
15584 }
15585 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15586 }
15587 }
15588
15589 // Fold truncate of a bitcast of a vector to an extract of the low vector
15590 // element.
15591 //
15592 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15593 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15594 SDValue VecSrc = N0.getOperand(0);
15595 EVT VecSrcVT = VecSrc.getValueType();
15596 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15597 (!LegalOperations ||
15598 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15599 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15600 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15602 }
15603 }
15604
15605 // Simplify the operands using demanded-bits information.
15607 return SDValue(N, 0);
15608
15609 // fold (truncate (extract_subvector(ext x))) ->
15610 // (extract_subvector x)
15611 // TODO: This can be generalized to cover cases where the truncate and extract
15612 // do not fully cancel each other out.
15613 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15614 SDValue N00 = N0.getOperand(0);
15615 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15616 N00.getOpcode() == ISD::ZERO_EXTEND ||
15617 N00.getOpcode() == ISD::ANY_EXTEND) {
15618 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15620 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15621 N00.getOperand(0), N0.getOperand(1));
15622 }
15623 }
15624
15625 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15626 return NewVSel;
15627
15628 // Narrow a suitable binary operation with a non-opaque constant operand by
15629 // moving it ahead of the truncate. This is limited to pre-legalization
15630 // because targets may prefer a wider type during later combines and invert
15631 // this transform.
15632 switch (N0.getOpcode()) {
15633 case ISD::ADD:
15634 case ISD::SUB:
15635 case ISD::MUL:
15636 case ISD::AND:
15637 case ISD::OR:
15638 case ISD::XOR:
15639 if (!LegalOperations && N0.hasOneUse() &&
15640 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15641 isConstantOrConstantVector(N0.getOperand(1), true))) {
15642 // TODO: We already restricted this to pre-legalization, but for vectors
15643 // we are extra cautious to not create an unsupported operation.
15644 // Target-specific changes are likely needed to avoid regressions here.
15645 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15646 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15647 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15648 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15649 }
15650 }
15651 break;
15652 case ISD::ADDE:
15653 case ISD::UADDO_CARRY:
15654 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15655 // (trunc uaddo_carry(X, Y, Carry)) ->
15656 // (uaddo_carry trunc(X), trunc(Y), Carry)
15657 // When the adde's carry is not used.
15658 // We only do for uaddo_carry before legalize operation
15659 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15660 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15661 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15662 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15663 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15664 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15665 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15666 }
15667 break;
15668 case ISD::USUBSAT:
15669 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15670 // enough to know that the upper bits are zero we must ensure that we don't
15671 // introduce an extra truncate.
15672 if (!LegalOperations && N0.hasOneUse() &&
15675 VT.getScalarSizeInBits() &&
15676 hasOperation(N0.getOpcode(), VT)) {
15677 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15678 DAG, DL);
15679 }
15680 break;
15681 }
15682
15683 return SDValue();
15684}
15685
15686static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15687 SDValue Elt = N->getOperand(i);
15688 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15689 return Elt.getNode();
15690 return Elt.getOperand(Elt.getResNo()).getNode();
15691}
15692
15693/// build_pair (load, load) -> load
15694/// if load locations are consecutive.
15695SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15696 assert(N->getOpcode() == ISD::BUILD_PAIR);
15697
15698 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15699 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15700
15701 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15702 // most significant part in elt 1. So when combining into one large load, we
15703 // need to consider the endianness.
15704 if (DAG.getDataLayout().isBigEndian())
15705 std::swap(LD1, LD2);
15706
15707 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15708 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15709 LD1->getAddressSpace() != LD2->getAddressSpace())
15710 return SDValue();
15711
15712 unsigned LD1Fast = 0;
15713 EVT LD1VT = LD1->getValueType(0);
15714 unsigned LD1Bytes = LD1VT.getStoreSize();
15715 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15716 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15717 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15718 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15719 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15720 LD1->getPointerInfo(), LD1->getAlign());
15721
15722 return SDValue();
15723}
15724
15725static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15726 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15727 // and Lo parts; on big-endian machines it doesn't.
15728 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15729}
15730
15731SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15732 const TargetLowering &TLI) {
15733 // If this is not a bitcast to an FP type or if the target doesn't have
15734 // IEEE754-compliant FP logic, we're done.
15735 EVT VT = N->getValueType(0);
15736 SDValue N0 = N->getOperand(0);
15737 EVT SourceVT = N0.getValueType();
15738
15739 if (!VT.isFloatingPoint())
15740 return SDValue();
15741
15742 // TODO: Handle cases where the integer constant is a different scalar
15743 // bitwidth to the FP.
15744 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15745 return SDValue();
15746
15747 unsigned FPOpcode;
15748 APInt SignMask;
15749 switch (N0.getOpcode()) {
15750 case ISD::AND:
15751 FPOpcode = ISD::FABS;
15752 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15753 break;
15754 case ISD::XOR:
15755 FPOpcode = ISD::FNEG;
15756 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15757 break;
15758 case ISD::OR:
15759 FPOpcode = ISD::FABS;
15760 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15761 break;
15762 default:
15763 return SDValue();
15764 }
15765
15766 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15767 return SDValue();
15768
15769 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15770 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15771 // removing this would require more changes.
15772 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15773 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15774 return true;
15775
15776 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15777 };
15778
15779 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15780 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15781 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15782 // fneg (fabs X)
15783 SDValue LogicOp0 = N0.getOperand(0);
15784 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15785 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15786 IsBitCastOrFree(LogicOp0, VT)) {
15787 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15788 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15789 NumFPLogicOpsConv++;
15790 if (N0.getOpcode() == ISD::OR)
15791 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15792 return FPOp;
15793 }
15794
15795 return SDValue();
15796}
15797
15798SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15799 SDValue N0 = N->getOperand(0);
15800 EVT VT = N->getValueType(0);
15801
15802 if (N0.isUndef())
15803 return DAG.getUNDEF(VT);
15804
15805 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15806 // Only do this before legalize types, unless both types are integer and the
15807 // scalar type is legal. Only do this before legalize ops, since the target
15808 // maybe depending on the bitcast.
15809 // First check to see if this is all constant.
15810 // TODO: Support FP bitcasts after legalize types.
15811 if (VT.isVector() &&
15812 (!LegalTypes ||
15813 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15814 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15815 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15816 cast<BuildVectorSDNode>(N0)->isConstant())
15817 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15819
15820 // If the input is a constant, let getNode fold it.
15821 if (isIntOrFPConstant(N0)) {
15822 // If we can't allow illegal operations, we need to check that this is just
15823 // a fp -> int or int -> conversion and that the resulting operation will
15824 // be legal.
15825 if (!LegalOperations ||
15826 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15828 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15829 TLI.isOperationLegal(ISD::Constant, VT))) {
15830 SDValue C = DAG.getBitcast(VT, N0);
15831 if (C.getNode() != N)
15832 return C;
15833 }
15834 }
15835
15836 // (conv (conv x, t1), t2) -> (conv x, t2)
15837 if (N0.getOpcode() == ISD::BITCAST)
15838 return DAG.getBitcast(VT, N0.getOperand(0));
15839
15840 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15841 // iff the current bitwise logicop type isn't legal
15842 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15843 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15844 auto IsFreeBitcast = [VT](SDValue V) {
15845 return (V.getOpcode() == ISD::BITCAST &&
15846 V.getOperand(0).getValueType() == VT) ||
15848 V->hasOneUse());
15849 };
15850 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15851 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15852 DAG.getBitcast(VT, N0.getOperand(0)),
15853 DAG.getBitcast(VT, N0.getOperand(1)));
15854 }
15855
15856 // fold (conv (load x)) -> (load (conv*)x)
15857 // If the resultant load doesn't need a higher alignment than the original!
15858 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15859 // Do not remove the cast if the types differ in endian layout.
15861 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15862 // If the load is volatile, we only want to change the load type if the
15863 // resulting load is legal. Otherwise we might increase the number of
15864 // memory accesses. We don't care if the original type was legal or not
15865 // as we assume software couldn't rely on the number of accesses of an
15866 // illegal type.
15867 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15868 TLI.isOperationLegal(ISD::LOAD, VT))) {
15869 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15870
15871 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15872 *LN0->getMemOperand())) {
15873 SDValue Load =
15874 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15875 LN0->getMemOperand());
15876 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15877 return Load;
15878 }
15879 }
15880
15881 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15882 return V;
15883
15884 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15885 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15886 //
15887 // For ppc_fp128:
15888 // fold (bitcast (fneg x)) ->
15889 // flipbit = signbit
15890 // (xor (bitcast x) (build_pair flipbit, flipbit))
15891 //
15892 // fold (bitcast (fabs x)) ->
15893 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15894 // (xor (bitcast x) (build_pair flipbit, flipbit))
15895 // This often reduces constant pool loads.
15896 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15897 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15898 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15899 !N0.getValueType().isVector()) {
15900 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15901 AddToWorklist(NewConv.getNode());
15902
15903 SDLoc DL(N);
15904 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15905 assert(VT.getSizeInBits() == 128);
15906 SDValue SignBit = DAG.getConstant(
15907 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15908 SDValue FlipBit;
15909 if (N0.getOpcode() == ISD::FNEG) {
15910 FlipBit = SignBit;
15911 AddToWorklist(FlipBit.getNode());
15912 } else {
15913 assert(N0.getOpcode() == ISD::FABS);
15914 SDValue Hi =
15915 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15917 SDLoc(NewConv)));
15918 AddToWorklist(Hi.getNode());
15919 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15920 AddToWorklist(FlipBit.getNode());
15921 }
15922 SDValue FlipBits =
15923 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15924 AddToWorklist(FlipBits.getNode());
15925 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15926 }
15927 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15928 if (N0.getOpcode() == ISD::FNEG)
15929 return DAG.getNode(ISD::XOR, DL, VT,
15930 NewConv, DAG.getConstant(SignBit, DL, VT));
15931 assert(N0.getOpcode() == ISD::FABS);
15932 return DAG.getNode(ISD::AND, DL, VT,
15933 NewConv, DAG.getConstant(~SignBit, DL, VT));
15934 }
15935
15936 // fold (bitconvert (fcopysign cst, x)) ->
15937 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15938 // Note that we don't handle (copysign x, cst) because this can always be
15939 // folded to an fneg or fabs.
15940 //
15941 // For ppc_fp128:
15942 // fold (bitcast (fcopysign cst, x)) ->
15943 // flipbit = (and (extract_element
15944 // (xor (bitcast cst), (bitcast x)), 0),
15945 // signbit)
15946 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15947 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15948 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15949 !VT.isVector()) {
15950 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15951 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15952 if (isTypeLegal(IntXVT)) {
15953 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15954 AddToWorklist(X.getNode());
15955
15956 // If X has a different width than the result/lhs, sext it or truncate it.
15957 unsigned VTWidth = VT.getSizeInBits();
15958 if (OrigXWidth < VTWidth) {
15959 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15960 AddToWorklist(X.getNode());
15961 } else if (OrigXWidth > VTWidth) {
15962 // To get the sign bit in the right place, we have to shift it right
15963 // before truncating.
15964 SDLoc DL(X);
15965 X = DAG.getNode(ISD::SRL, DL,
15966 X.getValueType(), X,
15967 DAG.getConstant(OrigXWidth-VTWidth, DL,
15968 X.getValueType()));
15969 AddToWorklist(X.getNode());
15970 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15971 AddToWorklist(X.getNode());
15972 }
15973
15974 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15975 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15976 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15977 AddToWorklist(Cst.getNode());
15978 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15979 AddToWorklist(X.getNode());
15980 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15981 AddToWorklist(XorResult.getNode());
15982 SDValue XorResult64 = DAG.getNode(
15983 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15985 SDLoc(XorResult)));
15986 AddToWorklist(XorResult64.getNode());
15987 SDValue FlipBit =
15988 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15989 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15990 AddToWorklist(FlipBit.getNode());
15991 SDValue FlipBits =
15992 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15993 AddToWorklist(FlipBits.getNode());
15994 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15995 }
15996 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15997 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15998 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15999 AddToWorklist(X.getNode());
16000
16001 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16002 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16003 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16004 AddToWorklist(Cst.getNode());
16005
16006 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16007 }
16008 }
16009
16010 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16011 if (N0.getOpcode() == ISD::BUILD_PAIR)
16012 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16013 return CombineLD;
16014
16015 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16016 // => int_vt (any_extend elt_vt:x)
16017 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16018 SDValue SrcScalar = N0.getOperand(0);
16019 if (SrcScalar.getValueType().isScalarInteger())
16020 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16021 }
16022
16023 // Remove double bitcasts from shuffles - this is often a legacy of
16024 // XformToShuffleWithZero being used to combine bitmaskings (of
16025 // float vectors bitcast to integer vectors) into shuffles.
16026 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16027 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16028 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16031 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16032
16033 // If operands are a bitcast, peek through if it casts the original VT.
16034 // If operands are a constant, just bitcast back to original VT.
16035 auto PeekThroughBitcast = [&](SDValue Op) {
16036 if (Op.getOpcode() == ISD::BITCAST &&
16037 Op.getOperand(0).getValueType() == VT)
16038 return SDValue(Op.getOperand(0));
16039 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16040 return DAG.getBitcast(VT, Op);
16041 return SDValue();
16042 };
16043
16044 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16045 // the result type of this bitcast. This would eliminate at least one
16046 // bitcast. See the transform in InstCombine.
16047 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16048 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16049 if (!(SV0 && SV1))
16050 return SDValue();
16051
16052 int MaskScale =
16054 SmallVector<int, 8> NewMask;
16055 for (int M : SVN->getMask())
16056 for (int i = 0; i != MaskScale; ++i)
16057 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16058
16059 SDValue LegalShuffle =
16060 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16061 if (LegalShuffle)
16062 return LegalShuffle;
16063 }
16064
16065 return SDValue();
16066}
16067
16068SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16069 EVT VT = N->getValueType(0);
16070 return CombineConsecutiveLoads(N, VT);
16071}
16072
16073SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16074 SDValue N0 = N->getOperand(0);
16075
16076 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16077 return N0;
16078
16079 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16080 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16081 // example https://reviews.llvm.org/D136529#4120959.
16082 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16083 return SDValue();
16084
16085 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16086 // Try to push freeze through instructions that propagate but don't produce
16087 // poison as far as possible. If an operand of freeze follows three
16088 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16089 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16090 // the freeze through to the operands that are not guaranteed non-poison.
16091 // NOTE: we will strip poison-generating flags, so ignore them here.
16092 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16093 /*ConsiderFlags*/ false) ||
16094 N0->getNumValues() != 1 || !N0->hasOneUse())
16095 return SDValue();
16096
16097 bool AllowMultipleMaybePoisonOperands =
16098 N0.getOpcode() == ISD::SELECT_CC ||
16099 N0.getOpcode() == ISD::SETCC ||
16100 N0.getOpcode() == ISD::BUILD_VECTOR ||
16101 N0.getOpcode() == ISD::BUILD_PAIR ||
16104
16105 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16106 // ones" or "constant" into something that depends on FrozenUndef. We can
16107 // instead pick undef values to keep those properties, while at the same time
16108 // folding away the freeze.
16109 // If we implement a more general solution for folding away freeze(undef) in
16110 // the future, then this special handling can be removed.
16111 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16112 SDLoc DL(N0);
16113 EVT VT = N0.getValueType();
16115 return DAG.getAllOnesConstant(DL, VT);
16118 for (const SDValue &Op : N0->op_values())
16119 NewVecC.push_back(
16120 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16121 return DAG.getBuildVector(VT, DL, NewVecC);
16122 }
16123 }
16124
16125 SmallSet<SDValue, 8> MaybePoisonOperands;
16126 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16127 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16128 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
16129 /*Depth*/ 1))
16130 continue;
16131 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16132 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16133 if (IsNewMaybePoisonOperand)
16134 MaybePoisonOperandNumbers.push_back(OpNo);
16135 if (!HadMaybePoisonOperands)
16136 continue;
16137 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16138 // Multiple maybe-poison ops when not allowed - bail out.
16139 return SDValue();
16140 }
16141 }
16142 // NOTE: the whole op may be not guaranteed to not be undef or poison because
16143 // it could create undef or poison due to it's poison-generating flags.
16144 // So not finding any maybe-poison operands is fine.
16145
16146 for (unsigned OpNo : MaybePoisonOperandNumbers) {
16147 // N0 can mutate during iteration, so make sure to refetch the maybe poison
16148 // operands via the operand numbers. The typical scenario is that we have
16149 // something like this
16150 // t262: i32 = freeze t181
16151 // t150: i32 = ctlz_zero_undef t262
16152 // t184: i32 = ctlz_zero_undef t181
16153 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16154 // When freezing the t181 operand we get t262 back, and then the
16155 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16156 // also recursively replace t184 by t150.
16157 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16158 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16159 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
16160 continue;
16161 // First, freeze each offending operand.
16162 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16163 // Then, change all other uses of unfrozen operand to use frozen operand.
16164 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16165 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16166 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16167 // But, that also updated the use in the freeze we just created, thus
16168 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16169 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
16170 MaybePoisonOperand);
16171 }
16172 }
16173
16174 // This node has been merged with another.
16175 if (N->getOpcode() == ISD::DELETED_NODE)
16176 return SDValue(N, 0);
16177
16178 // The whole node may have been updated, so the value we were holding
16179 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16180 N0 = N->getOperand(0);
16181
16182 // Finally, recreate the node, it's operands were updated to use
16183 // frozen operands, so we just need to use it's "original" operands.
16184 SmallVector<SDValue> Ops(N0->ops());
16185 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
16186 for (SDValue &Op : Ops) {
16187 if (Op.getOpcode() == ISD::UNDEF)
16188 Op = DAG.getFreeze(Op);
16189 }
16190
16191 SDValue R;
16192 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
16193 // Special case handling for ShuffleVectorSDNode nodes.
16194 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
16195 SVN->getMask());
16196 } else {
16197 // NOTE: this strips poison generating flags.
16198 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
16199 }
16200 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
16201 "Can't create node that may be undef/poison!");
16202 return R;
16203}
16204
16205/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
16206/// operands. DstEltVT indicates the destination element value type.
16207SDValue DAGCombiner::
16208ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
16209 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
16210
16211 // If this is already the right type, we're done.
16212 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
16213
16214 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
16215 unsigned DstBitSize = DstEltVT.getSizeInBits();
16216
16217 // If this is a conversion of N elements of one type to N elements of another
16218 // type, convert each element. This handles FP<->INT cases.
16219 if (SrcBitSize == DstBitSize) {
16221 for (SDValue Op : BV->op_values()) {
16222 // If the vector element type is not legal, the BUILD_VECTOR operands
16223 // are promoted and implicitly truncated. Make that explicit here.
16224 if (Op.getValueType() != SrcEltVT)
16225 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
16226 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
16227 AddToWorklist(Ops.back().getNode());
16228 }
16229 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
16231 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
16232 }
16233
16234 // Otherwise, we're growing or shrinking the elements. To avoid having to
16235 // handle annoying details of growing/shrinking FP values, we convert them to
16236 // int first.
16237 if (SrcEltVT.isFloatingPoint()) {
16238 // Convert the input float vector to a int vector where the elements are the
16239 // same sizes.
16240 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
16241 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
16242 SrcEltVT = IntVT;
16243 }
16244
16245 // Now we know the input is an integer vector. If the output is a FP type,
16246 // convert to integer first, then to FP of the right size.
16247 if (DstEltVT.isFloatingPoint()) {
16248 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
16249 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
16250
16251 // Next, convert to FP elements of the same size.
16252 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
16253 }
16254
16255 // Okay, we know the src/dst types are both integers of differing types.
16256 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
16257
16258 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
16259 // BuildVectorSDNode?
16260 auto *BVN = cast<BuildVectorSDNode>(BV);
16261
16262 // Extract the constant raw bit data.
16263 BitVector UndefElements;
16264 SmallVector<APInt> RawBits;
16265 bool IsLE = DAG.getDataLayout().isLittleEndian();
16266 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
16267 return SDValue();
16268
16269 SDLoc DL(BV);
16271 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
16272 if (UndefElements[I])
16273 Ops.push_back(DAG.getUNDEF(DstEltVT));
16274 else
16275 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
16276 }
16277
16278 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
16279 return DAG.getBuildVector(VT, DL, Ops);
16280}
16281
16282// Returns true if floating point contraction is allowed on the FMUL-SDValue
16283// `N`
16285 assert(N.getOpcode() == ISD::FMUL);
16286
16287 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
16288 N->getFlags().hasAllowContract();
16289}
16290
16291// Returns true if `N` can assume no infinities involved in its computation.
16293 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16294}
16295
16296/// Try to perform FMA combining on a given FADD node.
16297template <class MatchContextClass>
16298SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
16299 SDValue N0 = N->getOperand(0);
16300 SDValue N1 = N->getOperand(1);
16301 EVT VT = N->getValueType(0);
16302 SDLoc SL(N);
16303 MatchContextClass matcher(DAG, TLI, N);
16304 const TargetOptions &Options = DAG.getTarget().Options;
16305
16306 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16307
16308 // Floating-point multiply-add with intermediate rounding.
16309 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16310 // FIXME: Add VP_FMAD opcode.
16311 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16312
16313 // Floating-point multiply-add without intermediate rounding.
16314 bool HasFMA =
16315 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16317
16318 // No valid opcode, do not combine.
16319 if (!HasFMAD && !HasFMA)
16320 return SDValue();
16321
16322 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16323 Options.UnsafeFPMath || HasFMAD);
16324 // If the addition is not contractable, do not combine.
16325 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16326 return SDValue();
16327
16328 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
16329 // beneficial. It does not reduce latency. It increases register pressure. It
16330 // replaces an fadd with an fma which is a more complex instruction, so is
16331 // likely to have a larger encoding, use more functional units, etc.
16332 if (N0 == N1)
16333 return SDValue();
16334
16335 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16336 return SDValue();
16337
16338 // Always prefer FMAD to FMA for precision.
16339 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16341
16342 auto isFusedOp = [&](SDValue N) {
16343 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16344 };
16345
16346 // Is the node an FMUL and contractable either due to global flags or
16347 // SDNodeFlags.
16348 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16349 if (!matcher.match(N, ISD::FMUL))
16350 return false;
16351 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16352 };
16353 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
16354 // prefer to fold the multiply with fewer uses.
16356 if (N0->use_size() > N1->use_size())
16357 std::swap(N0, N1);
16358 }
16359
16360 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
16361 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
16362 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
16363 N0.getOperand(1), N1);
16364 }
16365
16366 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
16367 // Note: Commutes FADD operands.
16368 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
16369 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
16370 N1.getOperand(1), N0);
16371 }
16372
16373 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
16374 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
16375 // This also works with nested fma instructions:
16376 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
16377 // fma A, B, (fma C, D, fma (E, F, G))
16378 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
16379 // fma A, B, (fma C, D, fma (E, F, G)).
16380 // This requires reassociation because it changes the order of operations.
16381 bool CanReassociate =
16382 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16383 if (CanReassociate) {
16384 SDValue FMA, E;
16385 if (isFusedOp(N0) && N0.hasOneUse()) {
16386 FMA = N0;
16387 E = N1;
16388 } else if (isFusedOp(N1) && N1.hasOneUse()) {
16389 FMA = N1;
16390 E = N0;
16391 }
16392
16393 SDValue TmpFMA = FMA;
16394 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
16395 SDValue FMul = TmpFMA->getOperand(2);
16396 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
16397 SDValue C = FMul.getOperand(0);
16398 SDValue D = FMul.getOperand(1);
16399 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
16401 // Replacing the inner FMul could cause the outer FMA to be simplified
16402 // away.
16403 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
16404 }
16405
16406 TmpFMA = TmpFMA->getOperand(2);
16407 }
16408 }
16409
16410 // Look through FP_EXTEND nodes to do more combining.
16411
16412 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
16413 if (matcher.match(N0, ISD::FP_EXTEND)) {
16414 SDValue N00 = N0.getOperand(0);
16415 if (isContractableFMUL(N00) &&
16416 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16417 N00.getValueType())) {
16418 return matcher.getNode(
16419 PreferredFusedOpcode, SL, VT,
16420 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16421 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
16422 }
16423 }
16424
16425 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
16426 // Note: Commutes FADD operands.
16427 if (matcher.match(N1, ISD::FP_EXTEND)) {
16428 SDValue N10 = N1.getOperand(0);
16429 if (isContractableFMUL(N10) &&
16430 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16431 N10.getValueType())) {
16432 return matcher.getNode(
16433 PreferredFusedOpcode, SL, VT,
16434 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
16435 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16436 }
16437 }
16438
16439 // More folding opportunities when target permits.
16440 if (Aggressive) {
16441 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
16442 // -> (fma x, y, (fma (fpext u), (fpext v), z))
16443 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16444 SDValue Z) {
16445 return matcher.getNode(
16446 PreferredFusedOpcode, SL, VT, X, Y,
16447 matcher.getNode(PreferredFusedOpcode, SL, VT,
16448 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16449 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16450 };
16451 if (isFusedOp(N0)) {
16452 SDValue N02 = N0.getOperand(2);
16453 if (matcher.match(N02, ISD::FP_EXTEND)) {
16454 SDValue N020 = N02.getOperand(0);
16455 if (isContractableFMUL(N020) &&
16456 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16457 N020.getValueType())) {
16458 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
16459 N020.getOperand(0), N020.getOperand(1),
16460 N1);
16461 }
16462 }
16463 }
16464
16465 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16466 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16467 // FIXME: This turns two single-precision and one double-precision
16468 // operation into two double-precision operations, which might not be
16469 // interesting for all targets, especially GPUs.
16470 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16471 SDValue Z) {
16472 return matcher.getNode(
16473 PreferredFusedOpcode, SL, VT,
16474 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16475 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16476 matcher.getNode(PreferredFusedOpcode, SL, VT,
16477 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16478 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16479 };
16480 if (N0.getOpcode() == ISD::FP_EXTEND) {
16481 SDValue N00 = N0.getOperand(0);
16482 if (isFusedOp(N00)) {
16483 SDValue N002 = N00.getOperand(2);
16484 if (isContractableFMUL(N002) &&
16485 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16486 N00.getValueType())) {
16487 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16488 N002.getOperand(0), N002.getOperand(1),
16489 N1);
16490 }
16491 }
16492 }
16493
16494 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16495 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16496 if (isFusedOp(N1)) {
16497 SDValue N12 = N1.getOperand(2);
16498 if (N12.getOpcode() == ISD::FP_EXTEND) {
16499 SDValue N120 = N12.getOperand(0);
16500 if (isContractableFMUL(N120) &&
16501 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16502 N120.getValueType())) {
16503 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16504 N120.getOperand(0), N120.getOperand(1),
16505 N0);
16506 }
16507 }
16508 }
16509
16510 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16511 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16512 // FIXME: This turns two single-precision and one double-precision
16513 // operation into two double-precision operations, which might not be
16514 // interesting for all targets, especially GPUs.
16515 if (N1.getOpcode() == ISD::FP_EXTEND) {
16516 SDValue N10 = N1.getOperand(0);
16517 if (isFusedOp(N10)) {
16518 SDValue N102 = N10.getOperand(2);
16519 if (isContractableFMUL(N102) &&
16520 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16521 N10.getValueType())) {
16522 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16523 N102.getOperand(0), N102.getOperand(1),
16524 N0);
16525 }
16526 }
16527 }
16528 }
16529
16530 return SDValue();
16531}
16532
16533/// Try to perform FMA combining on a given FSUB node.
16534template <class MatchContextClass>
16535SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16536 SDValue N0 = N->getOperand(0);
16537 SDValue N1 = N->getOperand(1);
16538 EVT VT = N->getValueType(0);
16539 SDLoc SL(N);
16540 MatchContextClass matcher(DAG, TLI, N);
16541 const TargetOptions &Options = DAG.getTarget().Options;
16542
16543 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16544
16545 // Floating-point multiply-add with intermediate rounding.
16546 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16547 // FIXME: Add VP_FMAD opcode.
16548 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16549
16550 // Floating-point multiply-add without intermediate rounding.
16551 bool HasFMA =
16552 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16554
16555 // No valid opcode, do not combine.
16556 if (!HasFMAD && !HasFMA)
16557 return SDValue();
16558
16559 const SDNodeFlags Flags = N->getFlags();
16560 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16561 Options.UnsafeFPMath || HasFMAD);
16562
16563 // If the subtraction is not contractable, do not combine.
16564 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16565 return SDValue();
16566
16567 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16568 return SDValue();
16569
16570 // Always prefer FMAD to FMA for precision.
16571 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16573 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16574
16575 // Is the node an FMUL and contractable either due to global flags or
16576 // SDNodeFlags.
16577 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16578 if (!matcher.match(N, ISD::FMUL))
16579 return false;
16580 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16581 };
16582
16583 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16584 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16585 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16586 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16587 XY.getOperand(1),
16588 matcher.getNode(ISD::FNEG, SL, VT, Z));
16589 }
16590 return SDValue();
16591 };
16592
16593 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16594 // Note: Commutes FSUB operands.
16595 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16596 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16597 return matcher.getNode(
16598 PreferredFusedOpcode, SL, VT,
16599 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16600 YZ.getOperand(1), X);
16601 }
16602 return SDValue();
16603 };
16604
16605 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16606 // prefer to fold the multiply with fewer uses.
16607 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16608 (N0->use_size() > N1->use_size())) {
16609 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16610 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16611 return V;
16612 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16613 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16614 return V;
16615 } else {
16616 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16617 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16618 return V;
16619 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16620 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16621 return V;
16622 }
16623
16624 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16625 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16626 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16627 SDValue N00 = N0.getOperand(0).getOperand(0);
16628 SDValue N01 = N0.getOperand(0).getOperand(1);
16629 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16630 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16631 matcher.getNode(ISD::FNEG, SL, VT, N1));
16632 }
16633
16634 // Look through FP_EXTEND nodes to do more combining.
16635
16636 // fold (fsub (fpext (fmul x, y)), z)
16637 // -> (fma (fpext x), (fpext y), (fneg z))
16638 if (matcher.match(N0, ISD::FP_EXTEND)) {
16639 SDValue N00 = N0.getOperand(0);
16640 if (isContractableFMUL(N00) &&
16641 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16642 N00.getValueType())) {
16643 return matcher.getNode(
16644 PreferredFusedOpcode, SL, VT,
16645 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16646 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16647 matcher.getNode(ISD::FNEG, SL, VT, N1));
16648 }
16649 }
16650
16651 // fold (fsub x, (fpext (fmul y, z)))
16652 // -> (fma (fneg (fpext y)), (fpext z), x)
16653 // Note: Commutes FSUB operands.
16654 if (matcher.match(N1, ISD::FP_EXTEND)) {
16655 SDValue N10 = N1.getOperand(0);
16656 if (isContractableFMUL(N10) &&
16657 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16658 N10.getValueType())) {
16659 return matcher.getNode(
16660 PreferredFusedOpcode, SL, VT,
16661 matcher.getNode(
16662 ISD::FNEG, SL, VT,
16663 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16664 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16665 }
16666 }
16667
16668 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16669 // -> (fneg (fma (fpext x), (fpext y), z))
16670 // Note: This could be removed with appropriate canonicalization of the
16671 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16672 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16673 // from implementing the canonicalization in visitFSUB.
16674 if (matcher.match(N0, ISD::FP_EXTEND)) {
16675 SDValue N00 = N0.getOperand(0);
16676 if (matcher.match(N00, ISD::FNEG)) {
16677 SDValue N000 = N00.getOperand(0);
16678 if (isContractableFMUL(N000) &&
16679 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16680 N00.getValueType())) {
16681 return matcher.getNode(
16682 ISD::FNEG, SL, VT,
16683 matcher.getNode(
16684 PreferredFusedOpcode, SL, VT,
16685 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16686 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16687 N1));
16688 }
16689 }
16690 }
16691
16692 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16693 // -> (fneg (fma (fpext x)), (fpext y), z)
16694 // Note: This could be removed with appropriate canonicalization of the
16695 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16696 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16697 // from implementing the canonicalization in visitFSUB.
16698 if (matcher.match(N0, ISD::FNEG)) {
16699 SDValue N00 = N0.getOperand(0);
16700 if (matcher.match(N00, ISD::FP_EXTEND)) {
16701 SDValue N000 = N00.getOperand(0);
16702 if (isContractableFMUL(N000) &&
16703 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16704 N000.getValueType())) {
16705 return matcher.getNode(
16706 ISD::FNEG, SL, VT,
16707 matcher.getNode(
16708 PreferredFusedOpcode, SL, VT,
16709 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16710 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16711 N1));
16712 }
16713 }
16714 }
16715
16716 auto isReassociable = [&Options](SDNode *N) {
16717 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16718 };
16719
16720 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16721 &isReassociable](SDValue N) {
16722 return isContractableFMUL(N) && isReassociable(N.getNode());
16723 };
16724
16725 auto isFusedOp = [&](SDValue N) {
16726 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16727 };
16728
16729 // More folding opportunities when target permits.
16730 if (Aggressive && isReassociable(N)) {
16731 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16732 // fold (fsub (fma x, y, (fmul u, v)), z)
16733 // -> (fma x, y (fma u, v, (fneg z)))
16734 if (CanFuse && isFusedOp(N0) &&
16735 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16736 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16737 return matcher.getNode(
16738 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16739 matcher.getNode(PreferredFusedOpcode, SL, VT,
16740 N0.getOperand(2).getOperand(0),
16741 N0.getOperand(2).getOperand(1),
16742 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16743 }
16744
16745 // fold (fsub x, (fma y, z, (fmul u, v)))
16746 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16747 if (CanFuse && isFusedOp(N1) &&
16748 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16749 N1->hasOneUse() && NoSignedZero) {
16750 SDValue N20 = N1.getOperand(2).getOperand(0);
16751 SDValue N21 = N1.getOperand(2).getOperand(1);
16752 return matcher.getNode(
16753 PreferredFusedOpcode, SL, VT,
16754 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16755 N1.getOperand(1),
16756 matcher.getNode(PreferredFusedOpcode, SL, VT,
16757 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16758 }
16759
16760 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16761 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16762 if (isFusedOp(N0) && N0->hasOneUse()) {
16763 SDValue N02 = N0.getOperand(2);
16764 if (matcher.match(N02, ISD::FP_EXTEND)) {
16765 SDValue N020 = N02.getOperand(0);
16766 if (isContractableAndReassociableFMUL(N020) &&
16767 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16768 N020.getValueType())) {
16769 return matcher.getNode(
16770 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16771 matcher.getNode(
16772 PreferredFusedOpcode, SL, VT,
16773 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16774 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16775 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16776 }
16777 }
16778 }
16779
16780 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16781 // -> (fma (fpext x), (fpext y),
16782 // (fma (fpext u), (fpext v), (fneg z)))
16783 // FIXME: This turns two single-precision and one double-precision
16784 // operation into two double-precision operations, which might not be
16785 // interesting for all targets, especially GPUs.
16786 if (matcher.match(N0, ISD::FP_EXTEND)) {
16787 SDValue N00 = N0.getOperand(0);
16788 if (isFusedOp(N00)) {
16789 SDValue N002 = N00.getOperand(2);
16790 if (isContractableAndReassociableFMUL(N002) &&
16791 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16792 N00.getValueType())) {
16793 return matcher.getNode(
16794 PreferredFusedOpcode, SL, VT,
16795 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16796 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16797 matcher.getNode(
16798 PreferredFusedOpcode, SL, VT,
16799 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16800 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16801 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16802 }
16803 }
16804 }
16805
16806 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16807 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16808 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16809 N1->hasOneUse()) {
16810 SDValue N120 = N1.getOperand(2).getOperand(0);
16811 if (isContractableAndReassociableFMUL(N120) &&
16812 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16813 N120.getValueType())) {
16814 SDValue N1200 = N120.getOperand(0);
16815 SDValue N1201 = N120.getOperand(1);
16816 return matcher.getNode(
16817 PreferredFusedOpcode, SL, VT,
16818 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16819 N1.getOperand(1),
16820 matcher.getNode(
16821 PreferredFusedOpcode, SL, VT,
16822 matcher.getNode(ISD::FNEG, SL, VT,
16823 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16824 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16825 }
16826 }
16827
16828 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16829 // -> (fma (fneg (fpext y)), (fpext z),
16830 // (fma (fneg (fpext u)), (fpext v), x))
16831 // FIXME: This turns two single-precision and one double-precision
16832 // operation into two double-precision operations, which might not be
16833 // interesting for all targets, especially GPUs.
16834 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16835 SDValue CvtSrc = N1.getOperand(0);
16836 SDValue N100 = CvtSrc.getOperand(0);
16837 SDValue N101 = CvtSrc.getOperand(1);
16838 SDValue N102 = CvtSrc.getOperand(2);
16839 if (isContractableAndReassociableFMUL(N102) &&
16840 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16841 CvtSrc.getValueType())) {
16842 SDValue N1020 = N102.getOperand(0);
16843 SDValue N1021 = N102.getOperand(1);
16844 return matcher.getNode(
16845 PreferredFusedOpcode, SL, VT,
16846 matcher.getNode(ISD::FNEG, SL, VT,
16847 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16848 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16849 matcher.getNode(
16850 PreferredFusedOpcode, SL, VT,
16851 matcher.getNode(ISD::FNEG, SL, VT,
16852 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16853 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16854 }
16855 }
16856 }
16857
16858 return SDValue();
16859}
16860
16861/// Try to perform FMA combining on a given FMUL node based on the distributive
16862/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16863/// subtraction instead of addition).
16864SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16865 SDValue N0 = N->getOperand(0);
16866 SDValue N1 = N->getOperand(1);
16867 EVT VT = N->getValueType(0);
16868 SDLoc SL(N);
16869
16870 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16871
16872 const TargetOptions &Options = DAG.getTarget().Options;
16873
16874 // The transforms below are incorrect when x == 0 and y == inf, because the
16875 // intermediate multiplication produces a nan.
16876 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16877 if (!hasNoInfs(Options, FAdd))
16878 return SDValue();
16879
16880 // Floating-point multiply-add without intermediate rounding.
16881 bool HasFMA =
16883 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16885
16886 // Floating-point multiply-add with intermediate rounding. This can result
16887 // in a less precise result due to the changed rounding order.
16888 bool HasFMAD = Options.UnsafeFPMath &&
16889 (LegalOperations && TLI.isFMADLegal(DAG, N));
16890
16891 // No valid opcode, do not combine.
16892 if (!HasFMAD && !HasFMA)
16893 return SDValue();
16894
16895 // Always prefer FMAD to FMA for precision.
16896 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16898
16899 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16900 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16901 auto FuseFADD = [&](SDValue X, SDValue Y) {
16902 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16903 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16904 if (C->isExactlyValue(+1.0))
16905 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16906 Y);
16907 if (C->isExactlyValue(-1.0))
16908 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16909 DAG.getNode(ISD::FNEG, SL, VT, Y));
16910 }
16911 }
16912 return SDValue();
16913 };
16914
16915 if (SDValue FMA = FuseFADD(N0, N1))
16916 return FMA;
16917 if (SDValue FMA = FuseFADD(N1, N0))
16918 return FMA;
16919
16920 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16921 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16922 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16923 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16924 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16925 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16926 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16927 if (C0->isExactlyValue(+1.0))
16928 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16929 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16930 Y);
16931 if (C0->isExactlyValue(-1.0))
16932 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16933 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16934 DAG.getNode(ISD::FNEG, SL, VT, Y));
16935 }
16936 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16937 if (C1->isExactlyValue(+1.0))
16938 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16939 DAG.getNode(ISD::FNEG, SL, VT, Y));
16940 if (C1->isExactlyValue(-1.0))
16941 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16942 Y);
16943 }
16944 }
16945 return SDValue();
16946 };
16947
16948 if (SDValue FMA = FuseFSUB(N0, N1))
16949 return FMA;
16950 if (SDValue FMA = FuseFSUB(N1, N0))
16951 return FMA;
16952
16953 return SDValue();
16954}
16955
16956SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16957 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16958
16959 // FADD -> FMA combines:
16960 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16961 if (Fused.getOpcode() != ISD::DELETED_NODE)
16962 AddToWorklist(Fused.getNode());
16963 return Fused;
16964 }
16965 return SDValue();
16966}
16967
16968SDValue DAGCombiner::visitFADD(SDNode *N) {
16969 SDValue N0 = N->getOperand(0);
16970 SDValue N1 = N->getOperand(1);
16971 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
16972 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
16973 EVT VT = N->getValueType(0);
16974 SDLoc DL(N);
16975 const TargetOptions &Options = DAG.getTarget().Options;
16976 SDNodeFlags Flags = N->getFlags();
16977 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16978
16979 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16980 return R;
16981
16982 // fold (fadd c1, c2) -> c1 + c2
16983 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16984 return C;
16985
16986 // canonicalize constant to RHS
16987 if (N0CFP && !N1CFP)
16988 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16989
16990 // fold vector ops
16991 if (VT.isVector())
16992 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16993 return FoldedVOp;
16994
16995 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16996 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16997 if (N1C && N1C->isZero())
16998 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16999 return N0;
17000
17001 if (SDValue NewSel = foldBinOpIntoSelect(N))
17002 return NewSel;
17003
17004 // fold (fadd A, (fneg B)) -> (fsub A, B)
17005 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17006 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17007 N1, DAG, LegalOperations, ForCodeSize))
17008 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17009
17010 // fold (fadd (fneg A), B) -> (fsub B, A)
17011 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17012 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17013 N0, DAG, LegalOperations, ForCodeSize))
17014 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17015
17016 auto isFMulNegTwo = [](SDValue FMul) {
17017 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17018 return false;
17019 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17020 return C && C->isExactlyValue(-2.0);
17021 };
17022
17023 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17024 if (isFMulNegTwo(N0)) {
17025 SDValue B = N0.getOperand(0);
17026 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17027 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17028 }
17029 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17030 if (isFMulNegTwo(N1)) {
17031 SDValue B = N1.getOperand(0);
17032 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17033 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17034 }
17035
17036 // No FP constant should be created after legalization as Instruction
17037 // Selection pass has a hard time dealing with FP constants.
17038 bool AllowNewConst = (Level < AfterLegalizeDAG);
17039
17040 // If nnan is enabled, fold lots of things.
17041 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17042 // If allowed, fold (fadd (fneg x), x) -> 0.0
17043 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17044 return DAG.getConstantFP(0.0, DL, VT);
17045
17046 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17047 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17048 return DAG.getConstantFP(0.0, DL, VT);
17049 }
17050
17051 // If 'unsafe math' or reassoc and nsz, fold lots of things.
17052 // TODO: break out portions of the transformations below for which Unsafe is
17053 // considered and which do not require both nsz and reassoc
17054 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17055 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17056 AllowNewConst) {
17057 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17058 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17060 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17061 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17062 }
17063
17064 // We can fold chains of FADD's of the same value into multiplications.
17065 // This transform is not safe in general because we are reducing the number
17066 // of rounding steps.
17067 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17068 if (N0.getOpcode() == ISD::FMUL) {
17069 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17070 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17071
17072 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17073 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17074 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17075 DAG.getConstantFP(1.0, DL, VT));
17076 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17077 }
17078
17079 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17080 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17081 N1.getOperand(0) == N1.getOperand(1) &&
17082 N0.getOperand(0) == N1.getOperand(0)) {
17083 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17084 DAG.getConstantFP(2.0, DL, VT));
17085 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17086 }
17087 }
17088
17089 if (N1.getOpcode() == ISD::FMUL) {
17090 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17091 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17092
17093 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17094 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17095 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17096 DAG.getConstantFP(1.0, DL, VT));
17097 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17098 }
17099
17100 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17101 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17102 N0.getOperand(0) == N0.getOperand(1) &&
17103 N1.getOperand(0) == N0.getOperand(0)) {
17104 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17105 DAG.getConstantFP(2.0, DL, VT));
17106 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17107 }
17108 }
17109
17110 if (N0.getOpcode() == ISD::FADD) {
17111 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17112 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17113 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17114 (N0.getOperand(0) == N1)) {
17115 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17116 DAG.getConstantFP(3.0, DL, VT));
17117 }
17118 }
17119
17120 if (N1.getOpcode() == ISD::FADD) {
17121 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17122 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17123 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17124 N1.getOperand(0) == N0) {
17125 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17126 DAG.getConstantFP(3.0, DL, VT));
17127 }
17128 }
17129
17130 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17131 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17132 N0.getOperand(0) == N0.getOperand(1) &&
17133 N1.getOperand(0) == N1.getOperand(1) &&
17134 N0.getOperand(0) == N1.getOperand(0)) {
17135 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17136 DAG.getConstantFP(4.0, DL, VT));
17137 }
17138 }
17139
17140 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17141 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17142 VT, N0, N1, Flags))
17143 return SD;
17144 } // enable-unsafe-fp-math
17145
17146 // FADD -> FMA combines:
17147 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17148 if (Fused.getOpcode() != ISD::DELETED_NODE)
17149 AddToWorklist(Fused.getNode());
17150 return Fused;
17151 }
17152 return SDValue();
17153}
17154
17155SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17156 SDValue Chain = N->getOperand(0);
17157 SDValue N0 = N->getOperand(1);
17158 SDValue N1 = N->getOperand(2);
17159 EVT VT = N->getValueType(0);
17160 EVT ChainVT = N->getValueType(1);
17161 SDLoc DL(N);
17162 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17163
17164 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17165 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17166 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17167 N1, DAG, LegalOperations, ForCodeSize)) {
17168 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17169 {Chain, N0, NegN1});
17170 }
17171
17172 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17173 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17174 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17175 N0, DAG, LegalOperations, ForCodeSize)) {
17176 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17177 {Chain, N1, NegN0});
17178 }
17179 return SDValue();
17180}
17181
17182SDValue DAGCombiner::visitFSUB(SDNode *N) {
17183 SDValue N0 = N->getOperand(0);
17184 SDValue N1 = N->getOperand(1);
17185 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17186 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17187 EVT VT = N->getValueType(0);
17188 SDLoc DL(N);
17189 const TargetOptions &Options = DAG.getTarget().Options;
17190 const SDNodeFlags Flags = N->getFlags();
17191 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17192
17193 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17194 return R;
17195
17196 // fold (fsub c1, c2) -> c1-c2
17197 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17198 return C;
17199
17200 // fold vector ops
17201 if (VT.isVector())
17202 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17203 return FoldedVOp;
17204
17205 if (SDValue NewSel = foldBinOpIntoSelect(N))
17206 return NewSel;
17207
17208 // (fsub A, 0) -> A
17209 if (N1CFP && N1CFP->isZero()) {
17210 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17211 Flags.hasNoSignedZeros()) {
17212 return N0;
17213 }
17214 }
17215
17216 if (N0 == N1) {
17217 // (fsub x, x) -> 0.0
17218 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17219 return DAG.getConstantFP(0.0f, DL, VT);
17220 }
17221
17222 // (fsub -0.0, N1) -> -N1
17223 if (N0CFP && N0CFP->isZero()) {
17224 if (N0CFP->isNegative() ||
17225 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17226 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17227 // flushed to zero, unless all users treat denorms as zero (DAZ).
17228 // FIXME: This transform will change the sign of a NaN and the behavior
17229 // of a signaling NaN. It is only valid when a NoNaN flag is present.
17230 DenormalMode DenormMode = DAG.getDenormalMode(VT);
17231 if (DenormMode == DenormalMode::getIEEE()) {
17232 if (SDValue NegN1 =
17233 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17234 return NegN1;
17235 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17236 return DAG.getNode(ISD::FNEG, DL, VT, N1);
17237 }
17238 }
17239 }
17240
17241 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17242 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17243 N1.getOpcode() == ISD::FADD) {
17244 // X - (X + Y) -> -Y
17245 if (N0 == N1->getOperand(0))
17246 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17247 // X - (Y + X) -> -Y
17248 if (N0 == N1->getOperand(1))
17249 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17250 }
17251
17252 // fold (fsub A, (fneg B)) -> (fadd A, B)
17253 if (SDValue NegN1 =
17254 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17255 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
17256
17257 // FSUB -> FMA combines:
17258 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
17259 AddToWorklist(Fused.getNode());
17260 return Fused;
17261 }
17262
17263 return SDValue();
17264}
17265
17266// Transform IEEE Floats:
17267// (fmul C, (uitofp Pow2))
17268// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17269// (fdiv C, (uitofp Pow2))
17270// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17271//
17272// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
17273// there is no need for more than an add/sub.
17274//
17275// This is valid under the following circumstances:
17276// 1) We are dealing with IEEE floats
17277// 2) C is normal
17278// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
17279// TODO: Much of this could also be used for generating `ldexp` on targets the
17280// prefer it.
17281SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
17282 EVT VT = N->getValueType(0);
17283 SDValue ConstOp, Pow2Op;
17284
17285 std::optional<int> Mantissa;
17286 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
17287 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17288 return false;
17289
17290 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17291 Pow2Op = N->getOperand(1 - ConstOpIdx);
17292 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
17293 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
17294 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
17295 return false;
17296
17297 Pow2Op = Pow2Op.getOperand(0);
17298
17299 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
17300 // TODO: We could use knownbits to make this bound more precise.
17301 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
17302
17303 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
17304 if (CFP == nullptr)
17305 return false;
17306
17307 const APFloat &APF = CFP->getValueAPF();
17308
17309 // Make sure we have normal/ieee constant.
17310 if (!APF.isNormal() || !APF.isIEEE())
17311 return false;
17312
17313 // Make sure the floats exponent is within the bounds that this transform
17314 // produces bitwise equals value.
17315 int CurExp = ilogb(APF);
17316 // FMul by pow2 will only increase exponent.
17317 int MinExp =
17318 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17319 // FDiv by pow2 will only decrease exponent.
17320 int MaxExp =
17321 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17322 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
17324 return false;
17325
17326 // Finally make sure we actually know the mantissa for the float type.
17327 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
17328 if (!Mantissa)
17329 Mantissa = ThisMantissa;
17330
17331 return *Mantissa == ThisMantissa && ThisMantissa > 0;
17332 };
17333
17334 // TODO: We may be able to include undefs.
17335 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
17336 };
17337
17338 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
17339 return SDValue();
17340
17341 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
17342 return SDValue();
17343
17344 // Get log2 after all other checks have taken place. This is because
17345 // BuildLogBase2 may create a new node.
17346 SDLoc DL(N);
17347 // Get Log2 type with same bitwidth as the float type (VT).
17348 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
17349 if (VT.isVector())
17350 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
17352
17353 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
17354 /*InexpensiveOnly*/ true, NewIntVT);
17355 if (!Log2)
17356 return SDValue();
17357
17358 // Perform actual transform.
17359 SDValue MantissaShiftCnt =
17360 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
17361 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
17362 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
17363 // cast. We could implement that by handle here to handle the casts.
17364 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
17365 SDValue ResAsInt =
17366 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
17367 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
17368 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
17369 return ResAsFP;
17370}
17371
17372SDValue DAGCombiner::visitFMUL(SDNode *N) {
17373 SDValue N0 = N->getOperand(0);
17374 SDValue N1 = N->getOperand(1);
17375 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17376 EVT VT = N->getValueType(0);
17377 SDLoc DL(N);
17378 const TargetOptions &Options = DAG.getTarget().Options;
17379 const SDNodeFlags Flags = N->getFlags();
17380 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17381
17382 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17383 return R;
17384
17385 // fold (fmul c1, c2) -> c1*c2
17386 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
17387 return C;
17388
17389 // canonicalize constant to RHS
17392 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
17393
17394 // fold vector ops
17395 if (VT.isVector())
17396 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17397 return FoldedVOp;
17398
17399 if (SDValue NewSel = foldBinOpIntoSelect(N))
17400 return NewSel;
17401
17402 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
17403 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
17405 N0.getOpcode() == ISD::FMUL) {
17406 SDValue N00 = N0.getOperand(0);
17407 SDValue N01 = N0.getOperand(1);
17408 // Avoid an infinite loop by making sure that N00 is not a constant
17409 // (the inner multiply has not been constant folded yet).
17412 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
17413 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
17414 }
17415 }
17416
17417 // Match a special-case: we convert X * 2.0 into fadd.
17418 // fmul (fadd X, X), C -> fmul X, 2.0 * C
17419 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
17420 N0.getOperand(0) == N0.getOperand(1)) {
17421 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
17422 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
17423 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
17424 }
17425
17426 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
17427 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
17428 VT, N0, N1, Flags))
17429 return SD;
17430 }
17431
17432 // fold (fmul X, 2.0) -> (fadd X, X)
17433 if (N1CFP && N1CFP->isExactlyValue(+2.0))
17434 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
17435
17436 // fold (fmul X, -1.0) -> (fsub -0.0, X)
17437 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
17438 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
17439 return DAG.getNode(ISD::FSUB, DL, VT,
17440 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
17441 }
17442 }
17443
17444 // -N0 * -N1 --> N0 * N1
17449 SDValue NegN0 =
17450 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17451 if (NegN0) {
17452 HandleSDNode NegN0Handle(NegN0);
17453 SDValue NegN1 =
17454 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17455 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17457 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
17458 }
17459
17460 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17461 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17462 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17463 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17464 TLI.isOperationLegal(ISD::FABS, VT)) {
17465 SDValue Select = N0, X = N1;
17466 if (Select.getOpcode() != ISD::SELECT)
17467 std::swap(Select, X);
17468
17469 SDValue Cond = Select.getOperand(0);
17470 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17471 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17472
17473 if (TrueOpnd && FalseOpnd &&
17474 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17475 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17476 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17477 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17478 switch (CC) {
17479 default: break;
17480 case ISD::SETOLT:
17481 case ISD::SETULT:
17482 case ISD::SETOLE:
17483 case ISD::SETULE:
17484 case ISD::SETLT:
17485 case ISD::SETLE:
17486 std::swap(TrueOpnd, FalseOpnd);
17487 [[fallthrough]];
17488 case ISD::SETOGT:
17489 case ISD::SETUGT:
17490 case ISD::SETOGE:
17491 case ISD::SETUGE:
17492 case ISD::SETGT:
17493 case ISD::SETGE:
17494 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17495 TLI.isOperationLegal(ISD::FNEG, VT))
17496 return DAG.getNode(ISD::FNEG, DL, VT,
17497 DAG.getNode(ISD::FABS, DL, VT, X));
17498 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17499 return DAG.getNode(ISD::FABS, DL, VT, X);
17500
17501 break;
17502 }
17503 }
17504 }
17505
17506 // FMUL -> FMA combines:
17507 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17508 AddToWorklist(Fused.getNode());
17509 return Fused;
17510 }
17511
17512 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17513 // able to run.
17514 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17515 return R;
17516
17517 return SDValue();
17518}
17519
17520template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17521 SDValue N0 = N->getOperand(0);
17522 SDValue N1 = N->getOperand(1);
17523 SDValue N2 = N->getOperand(2);
17524 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17525 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17526 EVT VT = N->getValueType(0);
17527 SDLoc DL(N);
17528 const TargetOptions &Options = DAG.getTarget().Options;
17529 // FMA nodes have flags that propagate to the created nodes.
17530 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17531 MatchContextClass matcher(DAG, TLI, N);
17532
17533 // Constant fold FMA.
17534 if (SDValue C =
17535 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
17536 return C;
17537
17538 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17543 SDValue NegN0 =
17544 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17545 if (NegN0) {
17546 HandleSDNode NegN0Handle(NegN0);
17547 SDValue NegN1 =
17548 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17549 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17551 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17552 }
17553
17554 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17555 if (Options.UnsafeFPMath) {
17556 if (N0CFP && N0CFP->isZero())
17557 return N2;
17558 if (N1CFP && N1CFP->isZero())
17559 return N2;
17560 }
17561
17562 // FIXME: Support splat of constant.
17563 if (N0CFP && N0CFP->isExactlyValue(1.0))
17564 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
17565 if (N1CFP && N1CFP->isExactlyValue(1.0))
17566 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17567
17568 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17571 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
17572
17573 bool CanReassociate =
17574 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17575 if (CanReassociate) {
17576 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17577 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17580 return matcher.getNode(
17581 ISD::FMUL, DL, VT, N0,
17582 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17583 }
17584
17585 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17586 if (matcher.match(N0, ISD::FMUL) &&
17589 return matcher.getNode(
17590 ISD::FMA, DL, VT, N0.getOperand(0),
17591 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17592 }
17593 }
17594
17595 // (fma x, -1, y) -> (fadd (fneg x), y)
17596 // FIXME: Support splat of constant.
17597 if (N1CFP) {
17598 if (N1CFP->isExactlyValue(1.0))
17599 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17600
17601 if (N1CFP->isExactlyValue(-1.0) &&
17602 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17603 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17604 AddToWorklist(RHSNeg.getNode());
17605 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17606 }
17607
17608 // fma (fneg x), K, y -> fma x -K, y
17609 if (matcher.match(N0, ISD::FNEG) &&
17611 (N1.hasOneUse() &&
17612 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17613 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17614 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17615 }
17616 }
17617
17618 // FIXME: Support splat of constant.
17619 if (CanReassociate) {
17620 // (fma x, c, x) -> (fmul x, (c+1))
17621 if (N1CFP && N0 == N2) {
17622 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17623 matcher.getNode(ISD::FADD, DL, VT, N1,
17624 DAG.getConstantFP(1.0, DL, VT)));
17625 }
17626
17627 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17628 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17629 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17630 matcher.getNode(ISD::FADD, DL, VT, N1,
17631 DAG.getConstantFP(-1.0, DL, VT)));
17632 }
17633 }
17634
17635 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17636 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17637 if (!TLI.isFNegFree(VT))
17639 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17640 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17641 return SDValue();
17642}
17643
17644SDValue DAGCombiner::visitFMAD(SDNode *N) {
17645 SDValue N0 = N->getOperand(0);
17646 SDValue N1 = N->getOperand(1);
17647 SDValue N2 = N->getOperand(2);
17648 EVT VT = N->getValueType(0);
17649 SDLoc DL(N);
17650
17651 // Constant fold FMAD.
17652 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
17653 return C;
17654
17655 return SDValue();
17656}
17657
17658// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17659// reciprocal.
17660// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17661// Notice that this is not always beneficial. One reason is different targets
17662// may have different costs for FDIV and FMUL, so sometimes the cost of two
17663// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17664// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17665SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17666 // TODO: Limit this transform based on optsize/minsize - it always creates at
17667 // least 1 extra instruction. But the perf win may be substantial enough
17668 // that only minsize should restrict this.
17669 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17670 const SDNodeFlags Flags = N->getFlags();
17671 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17672 return SDValue();
17673
17674 // Skip if current node is a reciprocal/fneg-reciprocal.
17675 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17676 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17677 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17678 return SDValue();
17679
17680 // Exit early if the target does not want this transform or if there can't
17681 // possibly be enough uses of the divisor to make the transform worthwhile.
17682 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17683
17684 // For splat vectors, scale the number of uses by the splat factor. If we can
17685 // convert the division into a scalar op, that will likely be much faster.
17686 unsigned NumElts = 1;
17687 EVT VT = N->getValueType(0);
17688 if (VT.isVector() && DAG.isSplatValue(N1))
17689 NumElts = VT.getVectorMinNumElements();
17690
17691 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17692 return SDValue();
17693
17694 // Find all FDIV users of the same divisor.
17695 // Use a set because duplicates may be present in the user list.
17697 for (auto *U : N1->users()) {
17698 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17699 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17700 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17701 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17702 U->getFlags().hasAllowReassociation() &&
17703 U->getFlags().hasNoSignedZeros())
17704 continue;
17705
17706 // This division is eligible for optimization only if global unsafe math
17707 // is enabled or if this division allows reciprocal formation.
17708 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17709 Users.insert(U);
17710 }
17711 }
17712
17713 // Now that we have the actual number of divisor uses, make sure it meets
17714 // the minimum threshold specified by the target.
17715 if ((Users.size() * NumElts) < MinUses)
17716 return SDValue();
17717
17718 SDLoc DL(N);
17719 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17720 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17721
17722 // Dividend / Divisor -> Dividend * Reciprocal
17723 for (auto *U : Users) {
17724 SDValue Dividend = U->getOperand(0);
17725 if (Dividend != FPOne) {
17726 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17727 Reciprocal, Flags);
17728 CombineTo(U, NewNode);
17729 } else if (U != Reciprocal.getNode()) {
17730 // In the absence of fast-math-flags, this user node is always the
17731 // same node as Reciprocal, but with FMF they may be different nodes.
17732 CombineTo(U, Reciprocal);
17733 }
17734 }
17735 return SDValue(N, 0); // N was replaced.
17736}
17737
17738SDValue DAGCombiner::visitFDIV(SDNode *N) {
17739 SDValue N0 = N->getOperand(0);
17740 SDValue N1 = N->getOperand(1);
17741 EVT VT = N->getValueType(0);
17742 SDLoc DL(N);
17743 const TargetOptions &Options = DAG.getTarget().Options;
17744 SDNodeFlags Flags = N->getFlags();
17745 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17746
17747 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17748 return R;
17749
17750 // fold (fdiv c1, c2) -> c1/c2
17751 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17752 return C;
17753
17754 // fold vector ops
17755 if (VT.isVector())
17756 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17757 return FoldedVOp;
17758
17759 if (SDValue NewSel = foldBinOpIntoSelect(N))
17760 return NewSel;
17761
17763 return V;
17764
17765 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17766 // the loss is acceptable with AllowReciprocal.
17767 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17768 // Compute the reciprocal 1.0 / c2.
17769 const APFloat &N1APF = N1CFP->getValueAPF();
17770 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17772 // Only do the transform if the reciprocal is a legal fp immediate that
17773 // isn't too nasty (eg NaN, denormal, ...).
17774 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17775 (st == APFloat::opInexact &&
17776 (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17777 (!LegalOperations ||
17778 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17779 // backend)... we should handle this gracefully after Legalize.
17780 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17782 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17783 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17784 DAG.getConstantFP(Recip, DL, VT));
17785 }
17786
17787 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17788 // If this FDIV is part of a reciprocal square root, it may be folded
17789 // into a target-specific square root estimate instruction.
17790 if (N1.getOpcode() == ISD::FSQRT) {
17791 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17792 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17793 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17794 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17795 if (SDValue RV =
17796 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17797 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17798 AddToWorklist(RV.getNode());
17799 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17800 }
17801 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17802 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17803 if (SDValue RV =
17804 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17805 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17806 AddToWorklist(RV.getNode());
17807 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17808 }
17809 } else if (N1.getOpcode() == ISD::FMUL) {
17810 // Look through an FMUL. Even though this won't remove the FDIV directly,
17811 // it's still worthwhile to get rid of the FSQRT if possible.
17812 SDValue Sqrt, Y;
17813 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17814 Sqrt = N1.getOperand(0);
17815 Y = N1.getOperand(1);
17816 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17817 Sqrt = N1.getOperand(1);
17818 Y = N1.getOperand(0);
17819 }
17820 if (Sqrt.getNode()) {
17821 // If the other multiply operand is known positive, pull it into the
17822 // sqrt. That will eliminate the division if we convert to an estimate.
17823 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17824 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17825 SDValue A;
17826 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17827 A = Y.getOperand(0);
17828 else if (Y == Sqrt.getOperand(0))
17829 A = Y;
17830 if (A) {
17831 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17832 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17833 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17834 SDValue AAZ =
17835 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17836 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17837 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17838
17839 // Estimate creation failed. Clean up speculatively created nodes.
17840 recursivelyDeleteUnusedNodes(AAZ.getNode());
17841 }
17842 }
17843
17844 // We found a FSQRT, so try to make this fold:
17845 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17846 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17847 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17848 AddToWorklist(Div.getNode());
17849 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17850 }
17851 }
17852 }
17853
17854 // Fold into a reciprocal estimate and multiply instead of a real divide.
17855 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17856 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17857 return RV;
17858 }
17859
17860 // Fold X/Sqrt(X) -> Sqrt(X)
17861 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17862 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17863 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17864 return N1;
17865
17866 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17871 SDValue NegN0 =
17872 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17873 if (NegN0) {
17874 HandleSDNode NegN0Handle(NegN0);
17875 SDValue NegN1 =
17876 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17877 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17879 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
17880 }
17881
17882 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17883 return R;
17884
17885 return SDValue();
17886}
17887
17888SDValue DAGCombiner::visitFREM(SDNode *N) {
17889 SDValue N0 = N->getOperand(0);
17890 SDValue N1 = N->getOperand(1);
17891 EVT VT = N->getValueType(0);
17892 SDNodeFlags Flags = N->getFlags();
17893 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17894 SDLoc DL(N);
17895
17896 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17897 return R;
17898
17899 // fold (frem c1, c2) -> fmod(c1,c2)
17900 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17901 return C;
17902
17903 if (SDValue NewSel = foldBinOpIntoSelect(N))
17904 return NewSel;
17905
17906 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17907 // power of 2.
17908 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17912 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17913 bool NeedsCopySign =
17914 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17915 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17916 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17917 SDValue MLA;
17919 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17920 N1, N0);
17921 } else {
17922 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17923 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17924 }
17925 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17926 }
17927
17928 return SDValue();
17929}
17930
17931SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17932 SDNodeFlags Flags = N->getFlags();
17933 const TargetOptions &Options = DAG.getTarget().Options;
17934
17935 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17936 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17937 if (!Flags.hasApproximateFuncs() ||
17938 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17939 return SDValue();
17940
17941 SDValue N0 = N->getOperand(0);
17942 if (TLI.isFsqrtCheap(N0, DAG))
17943 return SDValue();
17944
17945 // FSQRT nodes have flags that propagate to the created nodes.
17946 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17947 // transform the fdiv, we may produce a sub-optimal estimate sequence
17948 // because the reciprocal calculation may not have to filter out a
17949 // 0.0 input.
17950 return buildSqrtEstimate(N0, Flags);
17951}
17952
17953/// copysign(x, fp_extend(y)) -> copysign(x, y)
17954/// copysign(x, fp_round(y)) -> copysign(x, y)
17955/// Operands to the functions are the type of X and Y respectively.
17956static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17957 // Always fold no-op FP casts.
17958 if (XTy == YTy)
17959 return true;
17960
17961 // Do not optimize out type conversion of f128 type yet.
17962 // For some targets like x86_64, configuration is changed to keep one f128
17963 // value in one SSE register, but instruction selection cannot handle
17964 // FCOPYSIGN on SSE registers yet.
17965 if (YTy == MVT::f128)
17966 return false;
17967
17969}
17970
17972 SDValue N1 = N->getOperand(1);
17973 if (N1.getOpcode() != ISD::FP_EXTEND &&
17974 N1.getOpcode() != ISD::FP_ROUND)
17975 return false;
17976 EVT N1VT = N1->getValueType(0);
17977 EVT N1Op0VT = N1->getOperand(0).getValueType();
17978 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17979}
17980
17981SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17982 SDValue N0 = N->getOperand(0);
17983 SDValue N1 = N->getOperand(1);
17984 EVT VT = N->getValueType(0);
17985 SDLoc DL(N);
17986
17987 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17988 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
17989 return C;
17990
17991 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17992 const APFloat &V = N1C->getValueAPF();
17993 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17994 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17995 if (!V.isNegative()) {
17996 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17997 return DAG.getNode(ISD::FABS, DL, VT, N0);
17998 } else {
17999 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18000 return DAG.getNode(ISD::FNEG, DL, VT,
18001 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
18002 }
18003 }
18004
18005 // copysign(fabs(x), y) -> copysign(x, y)
18006 // copysign(fneg(x), y) -> copysign(x, y)
18007 // copysign(copysign(x,z), y) -> copysign(x, y)
18008 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
18009 N0.getOpcode() == ISD::FCOPYSIGN)
18010 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1);
18011
18012 // copysign(x, abs(y)) -> abs(x)
18013 if (N1.getOpcode() == ISD::FABS)
18014 return DAG.getNode(ISD::FABS, DL, VT, N0);
18015
18016 // copysign(x, copysign(y,z)) -> copysign(x, z)
18017 if (N1.getOpcode() == ISD::FCOPYSIGN)
18018 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1));
18019
18020 // copysign(x, fp_extend(y)) -> copysign(x, y)
18021 // copysign(x, fp_round(y)) -> copysign(x, y)
18023 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18024
18025 // We only take the sign bit from the sign operand.
18026 EVT SignVT = N1.getValueType();
18027 if (SimplifyDemandedBits(N1,
18029 return SDValue(N, 0);
18030
18031 // We only take the non-sign bits from the value operand
18032 if (SimplifyDemandedBits(N0,
18034 return SDValue(N, 0);
18035
18036 return SDValue();
18037}
18038
18039SDValue DAGCombiner::visitFPOW(SDNode *N) {
18040 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18041 if (!ExponentC)
18042 return SDValue();
18043 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18044
18045 // Try to convert x ** (1/3) into cube root.
18046 // TODO: Handle the various flavors of long double.
18047 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18048 // Some range near 1/3 should be fine.
18049 EVT VT = N->getValueType(0);
18050 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18051 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18052 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18053 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18054 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18055 // For regular numbers, rounding may cause the results to differ.
18056 // Therefore, we require { nsz ninf nnan afn } for this transform.
18057 // TODO: We could select out the special cases if we don't have nsz/ninf.
18058 SDNodeFlags Flags = N->getFlags();
18059 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18060 !Flags.hasApproximateFuncs())
18061 return SDValue();
18062
18063 // Do not create a cbrt() libcall if the target does not have it, and do not
18064 // turn a pow that has lowering support into a cbrt() libcall.
18065 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18068 return SDValue();
18069
18070 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18071 }
18072
18073 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18074 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18075 // TODO: This could be extended (using a target hook) to handle smaller
18076 // power-of-2 fractional exponents.
18077 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18078 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18079 if (ExponentIs025 || ExponentIs075) {
18080 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18081 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18082 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18083 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18084 // For regular numbers, rounding may cause the results to differ.
18085 // Therefore, we require { nsz ninf afn } for this transform.
18086 // TODO: We could select out the special cases if we don't have nsz/ninf.
18087 SDNodeFlags Flags = N->getFlags();
18088
18089 // We only need no signed zeros for the 0.25 case.
18090 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18091 !Flags.hasApproximateFuncs())
18092 return SDValue();
18093
18094 // Don't double the number of libcalls. We are trying to inline fast code.
18096 return SDValue();
18097
18098 // Assume that libcalls are the smallest code.
18099 // TODO: This restriction should probably be lifted for vectors.
18100 if (ForCodeSize)
18101 return SDValue();
18102
18103 // pow(X, 0.25) --> sqrt(sqrt(X))
18104 SDLoc DL(N);
18105 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18106 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18107 if (ExponentIs025)
18108 return SqrtSqrt;
18109 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18110 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18111 }
18112
18113 return SDValue();
18114}
18115
18117 const TargetLowering &TLI) {
18118 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18119 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18120 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18121 // conversions would return +0.0.
18122 // FIXME: We should be able to use node-level FMF here.
18123 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18124 EVT VT = N->getValueType(0);
18125 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18127 return SDValue();
18128
18129 // fptosi/fptoui round towards zero, so converting from FP to integer and
18130 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18131 SDValue N0 = N->getOperand(0);
18132 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18133 N0.getOperand(0).getValueType() == VT)
18134 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18135
18136 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18137 N0.getOperand(0).getValueType() == VT)
18138 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18139
18140 return SDValue();
18141}
18142
18143SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18144 SDValue N0 = N->getOperand(0);
18145 EVT VT = N->getValueType(0);
18146 EVT OpVT = N0.getValueType();
18147 SDLoc DL(N);
18148
18149 // [us]itofp(undef) = 0, because the result value is bounded.
18150 if (N0.isUndef())
18151 return DAG.getConstantFP(0.0, DL, VT);
18152
18153 // fold (sint_to_fp c1) -> c1fp
18154 // ...but only if the target supports immediate floating-point values
18155 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18156 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18157 return C;
18158
18159 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18160 // but UINT_TO_FP is legal on this target, try to convert.
18161 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18162 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18163 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18164 if (DAG.SignBitIsZero(N0))
18165 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18166 }
18167
18168 // The next optimizations are desirable only if SELECT_CC can be lowered.
18169 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18170 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18171 !VT.isVector() &&
18172 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18173 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18174 DAG.getConstantFP(0.0, DL, VT));
18175
18176 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18177 // (select (setcc x, y, cc), 1.0, 0.0)
18178 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18179 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18180 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18181 return DAG.getSelect(DL, VT, N0.getOperand(0),
18182 DAG.getConstantFP(1.0, DL, VT),
18183 DAG.getConstantFP(0.0, DL, VT));
18184
18185 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18186 return FTrunc;
18187
18188 return SDValue();
18189}
18190
18191SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18192 SDValue N0 = N->getOperand(0);
18193 EVT VT = N->getValueType(0);
18194 EVT OpVT = N0.getValueType();
18195 SDLoc DL(N);
18196
18197 // [us]itofp(undef) = 0, because the result value is bounded.
18198 if (N0.isUndef())
18199 return DAG.getConstantFP(0.0, DL, VT);
18200
18201 // fold (uint_to_fp c1) -> c1fp
18202 // ...but only if the target supports immediate floating-point values
18203 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18204 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18205 return C;
18206
18207 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18208 // but SINT_TO_FP is legal on this target, try to convert.
18209 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18210 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18211 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18212 if (DAG.SignBitIsZero(N0))
18213 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18214 }
18215
18216 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18217 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18218 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18219 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18220 DAG.getConstantFP(0.0, DL, VT));
18221
18222 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18223 return FTrunc;
18224
18225 return SDValue();
18226}
18227
18228// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18230 SDValue N0 = N->getOperand(0);
18231 EVT VT = N->getValueType(0);
18232
18233 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18234 return SDValue();
18235
18236 SDValue Src = N0.getOperand(0);
18237 EVT SrcVT = Src.getValueType();
18238 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18239 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18240
18241 // We can safely assume the conversion won't overflow the output range,
18242 // because (for example) (uint8_t)18293.f is undefined behavior.
18243
18244 // Since we can assume the conversion won't overflow, our decision as to
18245 // whether the input will fit in the float should depend on the minimum
18246 // of the input range and output range.
18247
18248 // This means this is also safe for a signed input and unsigned output, since
18249 // a negative input would lead to undefined behavior.
18250 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18251 unsigned OutputSize = (int)VT.getScalarSizeInBits();
18252 unsigned ActualSize = std::min(InputSize, OutputSize);
18253 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18254
18255 // We can only fold away the float conversion if the input range can be
18256 // represented exactly in the float range.
18257 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18258 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
18259 unsigned ExtOp =
18260 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18261 return DAG.getNode(ExtOp, DL, VT, Src);
18262 }
18263 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
18264 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
18265 return DAG.getBitcast(VT, Src);
18266 }
18267 return SDValue();
18268}
18269
18270SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
18271 SDValue N0 = N->getOperand(0);
18272 EVT VT = N->getValueType(0);
18273 SDLoc DL(N);
18274
18275 // fold (fp_to_sint undef) -> undef
18276 if (N0.isUndef())
18277 return DAG.getUNDEF(VT);
18278
18279 // fold (fp_to_sint c1fp) -> c1
18280 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
18281 return C;
18282
18283 return FoldIntToFPToInt(N, DL, DAG);
18284}
18285
18286SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
18287 SDValue N0 = N->getOperand(0);
18288 EVT VT = N->getValueType(0);
18289 SDLoc DL(N);
18290
18291 // fold (fp_to_uint undef) -> undef
18292 if (N0.isUndef())
18293 return DAG.getUNDEF(VT);
18294
18295 // fold (fp_to_uint c1fp) -> c1
18296 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
18297 return C;
18298
18299 return FoldIntToFPToInt(N, DL, DAG);
18300}
18301
18302SDValue DAGCombiner::visitXROUND(SDNode *N) {
18303 SDValue N0 = N->getOperand(0);
18304 EVT VT = N->getValueType(0);
18305
18306 // fold (lrint|llrint undef) -> undef
18307 // fold (lround|llround undef) -> undef
18308 if (N0.isUndef())
18309 return DAG.getUNDEF(VT);
18310
18311 // fold (lrint|llrint c1fp) -> c1
18312 // fold (lround|llround c1fp) -> c1
18313 if (SDValue C =
18314 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18315 return C;
18316
18317 return SDValue();
18318}
18319
18320SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
18321 SDValue N0 = N->getOperand(0);
18322 SDValue N1 = N->getOperand(1);
18323 EVT VT = N->getValueType(0);
18324 SDLoc DL(N);
18325
18326 // fold (fp_round c1fp) -> c1fp
18327 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
18328 return C;
18329
18330 // fold (fp_round (fp_extend x)) -> x
18331 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
18332 return N0.getOperand(0);
18333
18334 // fold (fp_round (fp_round x)) -> (fp_round x)
18335 if (N0.getOpcode() == ISD::FP_ROUND) {
18336 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18337 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
18338
18339 // Avoid folding legal fp_rounds into non-legal ones.
18340 if (!hasOperation(ISD::FP_ROUND, VT))
18341 return SDValue();
18342
18343 // Skip this folding if it results in an fp_round from f80 to f16.
18344 //
18345 // f80 to f16 always generates an expensive (and as yet, unimplemented)
18346 // libcall to __truncxfhf2 instead of selecting native f16 conversion
18347 // instructions from f32 or f64. Moreover, the first (value-preserving)
18348 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
18349 // x86.
18350 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
18351 return SDValue();
18352
18353 // If the first fp_round isn't a value preserving truncation, it might
18354 // introduce a tie in the second fp_round, that wouldn't occur in the
18355 // single-step fp_round we want to fold to.
18356 // In other words, double rounding isn't the same as rounding.
18357 // Also, this is a value preserving truncation iff both fp_round's are.
18358 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
18359 return DAG.getNode(
18360 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
18361 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
18362 }
18363
18364 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
18365 // Note: From a legality perspective, this is a two step transform. First,
18366 // we duplicate the fp_round to the arguments of the copysign, then we
18367 // eliminate the fp_round on Y. The second step requires an additional
18368 // predicate to match the implementation above.
18369 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
18371 N0.getValueType())) {
18372 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
18373 N0.getOperand(0), N1);
18374 AddToWorklist(Tmp.getNode());
18375 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
18376 }
18377
18378 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18379 return NewVSel;
18380
18381 return SDValue();
18382}
18383
18384SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
18385 SDValue N0 = N->getOperand(0);
18386 EVT VT = N->getValueType(0);
18387 SDLoc DL(N);
18388
18389 if (VT.isVector())
18390 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
18391 return FoldedVOp;
18392
18393 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
18394 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
18395 return SDValue();
18396
18397 // fold (fp_extend c1fp) -> c1fp
18398 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
18399 return C;
18400
18401 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
18402 if (N0.getOpcode() == ISD::FP16_TO_FP &&
18404 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
18405
18406 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
18407 // value of X.
18408 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
18409 SDValue In = N0.getOperand(0);
18410 if (In.getValueType() == VT) return In;
18411 if (VT.bitsLT(In.getValueType()))
18412 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
18413 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
18414 }
18415
18416 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
18417 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
18419 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
18420 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
18421 LN0->getChain(),
18422 LN0->getBasePtr(), N0.getValueType(),
18423 LN0->getMemOperand());
18424 CombineTo(N, ExtLoad);
18425 CombineTo(
18426 N0.getNode(),
18427 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
18428 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
18429 ExtLoad.getValue(1));
18430 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18431 }
18432
18433 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18434 return NewVSel;
18435
18436 return SDValue();
18437}
18438
18439SDValue DAGCombiner::visitFCEIL(SDNode *N) {
18440 SDValue N0 = N->getOperand(0);
18441 EVT VT = N->getValueType(0);
18442
18443 // fold (fceil c1) -> fceil(c1)
18444 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
18445 return C;
18446
18447 return SDValue();
18448}
18449
18450SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
18451 SDValue N0 = N->getOperand(0);
18452 EVT VT = N->getValueType(0);
18453
18454 // fold (ftrunc c1) -> ftrunc(c1)
18455 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
18456 return C;
18457
18458 // fold ftrunc (known rounded int x) -> x
18459 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
18460 // likely to be generated to extract integer from a rounded floating value.
18461 switch (N0.getOpcode()) {
18462 default: break;
18463 case ISD::FRINT:
18464 case ISD::FTRUNC:
18465 case ISD::FNEARBYINT:
18466 case ISD::FROUNDEVEN:
18467 case ISD::FFLOOR:
18468 case ISD::FCEIL:
18469 return N0;
18470 }
18471
18472 return SDValue();
18473}
18474
18475SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18476 SDValue N0 = N->getOperand(0);
18477
18478 // fold (ffrexp c1) -> ffrexp(c1)
18480 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18481 return SDValue();
18482}
18483
18484SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18485 SDValue N0 = N->getOperand(0);
18486 EVT VT = N->getValueType(0);
18487
18488 // fold (ffloor c1) -> ffloor(c1)
18489 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
18490 return C;
18491
18492 return SDValue();
18493}
18494
18495SDValue DAGCombiner::visitFNEG(SDNode *N) {
18496 SDValue N0 = N->getOperand(0);
18497 EVT VT = N->getValueType(0);
18498 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18499
18500 // Constant fold FNEG.
18501 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
18502 return C;
18503
18504 if (SDValue NegN0 =
18505 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18506 return NegN0;
18507
18508 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18509 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18510 // know it was called from a context with a nsz flag if the input fsub does
18511 // not.
18512 if (N0.getOpcode() == ISD::FSUB &&
18514 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18515 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18516 N0.getOperand(0));
18517 }
18518
18519 if (SDValue Cast = foldSignChangeInBitcast(N))
18520 return Cast;
18521
18522 return SDValue();
18523}
18524
18525SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18526 SDValue N0 = N->getOperand(0);
18527 SDValue N1 = N->getOperand(1);
18528 EVT VT = N->getValueType(0);
18529 const SDNodeFlags Flags = N->getFlags();
18530 unsigned Opc = N->getOpcode();
18531 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18532 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18533 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18534
18535 // Constant fold.
18536 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18537 return C;
18538
18539 // Canonicalize to constant on RHS.
18542 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18543
18544 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18545 const APFloat &AF = N1CFP->getValueAPF();
18546
18547 // minnum(X, nan) -> X
18548 // maxnum(X, nan) -> X
18549 // minimum(X, nan) -> nan
18550 // maximum(X, nan) -> nan
18551 if (AF.isNaN())
18552 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18553
18554 // In the following folds, inf can be replaced with the largest finite
18555 // float, if the ninf flag is set.
18556 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18557 // minnum(X, -inf) -> -inf
18558 // maxnum(X, +inf) -> +inf
18559 // minimum(X, -inf) -> -inf if nnan
18560 // maximum(X, +inf) -> +inf if nnan
18561 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18562 return N->getOperand(1);
18563
18564 // minnum(X, +inf) -> X if nnan
18565 // maxnum(X, -inf) -> X if nnan
18566 // minimum(X, +inf) -> X
18567 // maximum(X, -inf) -> X
18568 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18569 return N->getOperand(0);
18570 }
18571 }
18572
18573 if (SDValue SD = reassociateReduction(
18574 PropagatesNaN
18577 Opc, SDLoc(N), VT, N0, N1, Flags))
18578 return SD;
18579
18580 return SDValue();
18581}
18582
18583SDValue DAGCombiner::visitFABS(SDNode *N) {
18584 SDValue N0 = N->getOperand(0);
18585 EVT VT = N->getValueType(0);
18586 SDLoc DL(N);
18587
18588 // fold (fabs c1) -> fabs(c1)
18589 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
18590 return C;
18591
18592 // fold (fabs (fabs x)) -> (fabs x)
18593 if (N0.getOpcode() == ISD::FABS)
18594 return N->getOperand(0);
18595
18596 // fold (fabs (fneg x)) -> (fabs x)
18597 // fold (fabs (fcopysign x, y)) -> (fabs x)
18598 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18599 return DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
18600
18601 if (SDValue Cast = foldSignChangeInBitcast(N))
18602 return Cast;
18603
18604 return SDValue();
18605}
18606
18607SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18608 SDValue Chain = N->getOperand(0);
18609 SDValue N1 = N->getOperand(1);
18610 SDValue N2 = N->getOperand(2);
18611
18612 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18613 // nondeterministic jumps).
18614 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18615 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18616 N1->getOperand(0), N2, N->getFlags());
18617 }
18618
18619 // Variant of the previous fold where there is a SETCC in between:
18620 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18621 // =>
18622 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18623 // =>
18624 // BRCOND(SETCC(X, CONST, Cond))
18625 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18626 // isn't equivalent to true or false.
18627 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18628 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18629 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18630 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18631 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18632 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18633 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18634 bool Updated = false;
18635
18636 // Is 'X Cond C' always true or false?
18637 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18638 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18639 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18640 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18641 (Cond == ISD::SETGT && C->isMaxSignedValue());
18642 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18643 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18644 (Cond == ISD::SETUGE && C->isZero()) ||
18645 (Cond == ISD::SETGE && C->isMinSignedValue());
18646 return True || False;
18647 };
18648
18649 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18650 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18651 S0 = S0->getOperand(0);
18652 Updated = true;
18653 }
18654 }
18655 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18656 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18657 S1 = S1->getOperand(0);
18658 Updated = true;
18659 }
18660 }
18661
18662 if (Updated)
18663 return DAG.getNode(
18664 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18665 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
18666 N->getFlags());
18667 }
18668
18669 // If N is a constant we could fold this into a fallthrough or unconditional
18670 // branch. However that doesn't happen very often in normal code, because
18671 // Instcombine/SimplifyCFG should have handled the available opportunities.
18672 // If we did this folding here, it would be necessary to update the
18673 // MachineBasicBlock CFG, which is awkward.
18674
18675 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18676 // on the target.
18677 if (N1.getOpcode() == ISD::SETCC &&
18679 N1.getOperand(0).getValueType())) {
18680 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18681 Chain, N1.getOperand(2),
18682 N1.getOperand(0), N1.getOperand(1), N2);
18683 }
18684
18685 if (N1.hasOneUse()) {
18686 // rebuildSetCC calls visitXor which may change the Chain when there is a
18687 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18688 HandleSDNode ChainHandle(Chain);
18689 if (SDValue NewN1 = rebuildSetCC(N1))
18690 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18691 ChainHandle.getValue(), NewN1, N2, N->getFlags());
18692 }
18693
18694 return SDValue();
18695}
18696
18697SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18698 if (N.getOpcode() == ISD::SRL ||
18699 (N.getOpcode() == ISD::TRUNCATE &&
18700 (N.getOperand(0).hasOneUse() &&
18701 N.getOperand(0).getOpcode() == ISD::SRL))) {
18702 // Look pass the truncate.
18703 if (N.getOpcode() == ISD::TRUNCATE)
18704 N = N.getOperand(0);
18705
18706 // Match this pattern so that we can generate simpler code:
18707 //
18708 // %a = ...
18709 // %b = and i32 %a, 2
18710 // %c = srl i32 %b, 1
18711 // brcond i32 %c ...
18712 //
18713 // into
18714 //
18715 // %a = ...
18716 // %b = and i32 %a, 2
18717 // %c = setcc eq %b, 0
18718 // brcond %c ...
18719 //
18720 // This applies only when the AND constant value has one bit set and the
18721 // SRL constant is equal to the log2 of the AND constant. The back-end is
18722 // smart enough to convert the result into a TEST/JMP sequence.
18723 SDValue Op0 = N.getOperand(0);
18724 SDValue Op1 = N.getOperand(1);
18725
18726 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18727 SDValue AndOp1 = Op0.getOperand(1);
18728
18729 if (AndOp1.getOpcode() == ISD::Constant) {
18730 const APInt &AndConst = AndOp1->getAsAPIntVal();
18731
18732 if (AndConst.isPowerOf2() &&
18733 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18734 SDLoc DL(N);
18735 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18736 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18737 ISD::SETNE);
18738 }
18739 }
18740 }
18741 }
18742
18743 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18744 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18745 if (N.getOpcode() == ISD::XOR) {
18746 // Because we may call this on a speculatively constructed
18747 // SimplifiedSetCC Node, we need to simplify this node first.
18748 // Ideally this should be folded into SimplifySetCC and not
18749 // here. For now, grab a handle to N so we don't lose it from
18750 // replacements interal to the visit.
18751 HandleSDNode XORHandle(N);
18752 while (N.getOpcode() == ISD::XOR) {
18753 SDValue Tmp = visitXOR(N.getNode());
18754 // No simplification done.
18755 if (!Tmp.getNode())
18756 break;
18757 // Returning N is form in-visit replacement that may invalidated
18758 // N. Grab value from Handle.
18759 if (Tmp.getNode() == N.getNode())
18760 N = XORHandle.getValue();
18761 else // Node simplified. Try simplifying again.
18762 N = Tmp;
18763 }
18764
18765 if (N.getOpcode() != ISD::XOR)
18766 return N;
18767
18768 SDValue Op0 = N->getOperand(0);
18769 SDValue Op1 = N->getOperand(1);
18770
18771 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18772 bool Equal = false;
18773 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18774 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18775 Op0.getValueType() == MVT::i1) {
18776 N = Op0;
18777 Op0 = N->getOperand(0);
18778 Op1 = N->getOperand(1);
18779 Equal = true;
18780 }
18781
18782 EVT SetCCVT = N.getValueType();
18783 if (LegalTypes)
18784 SetCCVT = getSetCCResultType(SetCCVT);
18785 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
18786 // it would introduce illegal operations post-legalization as this can
18787 // result in infinite looping between converting xor->setcc here, and
18788 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
18790 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
18791 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
18792 }
18793 }
18794
18795 return SDValue();
18796}
18797
18798// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18799//
18800SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18801 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18802 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18803
18804 // If N is a constant we could fold this into a fallthrough or unconditional
18805 // branch. However that doesn't happen very often in normal code, because
18806 // Instcombine/SimplifyCFG should have handled the available opportunities.
18807 // If we did this folding here, it would be necessary to update the
18808 // MachineBasicBlock CFG, which is awkward.
18809
18810 // Use SimplifySetCC to simplify SETCC's.
18812 CondLHS, CondRHS, CC->get(), SDLoc(N),
18813 false);
18814 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18815
18816 // fold to a simpler setcc
18817 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18818 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18819 N->getOperand(0), Simp.getOperand(2),
18820 Simp.getOperand(0), Simp.getOperand(1),
18821 N->getOperand(4));
18822
18823 return SDValue();
18824}
18825
18826static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18827 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18828 const TargetLowering &TLI) {
18829 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18830 if (LD->isIndexed())
18831 return false;
18832 EVT VT = LD->getMemoryVT();
18833 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18834 return false;
18835 Ptr = LD->getBasePtr();
18836 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18837 if (ST->isIndexed())
18838 return false;
18839 EVT VT = ST->getMemoryVT();
18840 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18841 return false;
18842 Ptr = ST->getBasePtr();
18843 IsLoad = false;
18844 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18845 if (LD->isIndexed())
18846 return false;
18847 EVT VT = LD->getMemoryVT();
18848 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18849 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18850 return false;
18851 Ptr = LD->getBasePtr();
18852 IsMasked = true;
18853 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18854 if (ST->isIndexed())
18855 return false;
18856 EVT VT = ST->getMemoryVT();
18857 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18858 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18859 return false;
18860 Ptr = ST->getBasePtr();
18861 IsLoad = false;
18862 IsMasked = true;
18863 } else {
18864 return false;
18865 }
18866 return true;
18867}
18868
18869/// Try turning a load/store into a pre-indexed load/store when the base
18870/// pointer is an add or subtract and it has other uses besides the load/store.
18871/// After the transformation, the new indexed load/store has effectively folded
18872/// the add/subtract in and all of its other uses are redirected to the
18873/// new load/store.
18874bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18875 if (Level < AfterLegalizeDAG)
18876 return false;
18877
18878 bool IsLoad = true;
18879 bool IsMasked = false;
18880 SDValue Ptr;
18881 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18882 Ptr, TLI))
18883 return false;
18884
18885 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18886 // out. There is no reason to make this a preinc/predec.
18887 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18888 Ptr->hasOneUse())
18889 return false;
18890
18891 // Ask the target to do addressing mode selection.
18895 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18896 return false;
18897
18898 // Backends without true r+i pre-indexed forms may need to pass a
18899 // constant base with a variable offset so that constant coercion
18900 // will work with the patterns in canonical form.
18901 bool Swapped = false;
18902 if (isa<ConstantSDNode>(BasePtr)) {
18903 std::swap(BasePtr, Offset);
18904 Swapped = true;
18905 }
18906
18907 // Don't create a indexed load / store with zero offset.
18909 return false;
18910
18911 // Try turning it into a pre-indexed load / store except when:
18912 // 1) The new base ptr is a frame index.
18913 // 2) If N is a store and the new base ptr is either the same as or is a
18914 // predecessor of the value being stored.
18915 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18916 // that would create a cycle.
18917 // 4) All uses are load / store ops that use it as old base ptr.
18918
18919 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18920 // (plus the implicit offset) to a register to preinc anyway.
18921 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18922 return false;
18923
18924 // Check #2.
18925 if (!IsLoad) {
18926 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18927 : cast<StoreSDNode>(N)->getValue();
18928
18929 // Would require a copy.
18930 if (Val == BasePtr)
18931 return false;
18932
18933 // Would create a cycle.
18934 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18935 return false;
18936 }
18937
18938 // Caches for hasPredecessorHelper.
18941 Worklist.push_back(N);
18942
18943 // If the offset is a constant, there may be other adds of constants that
18944 // can be folded with this one. We should do this to avoid having to keep
18945 // a copy of the original base pointer.
18946 SmallVector<SDNode *, 16> OtherUses;
18948 if (isa<ConstantSDNode>(Offset))
18949 for (SDUse &Use : BasePtr->uses()) {
18950 // Skip the use that is Ptr and uses of other results from BasePtr's
18951 // node (important for nodes that return multiple results).
18952 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18953 continue;
18954
18955 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18956 MaxSteps))
18957 continue;
18958
18959 if (Use.getUser()->getOpcode() != ISD::ADD &&
18960 Use.getUser()->getOpcode() != ISD::SUB) {
18961 OtherUses.clear();
18962 break;
18963 }
18964
18965 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
18966 if (!isa<ConstantSDNode>(Op1)) {
18967 OtherUses.clear();
18968 break;
18969 }
18970
18971 // FIXME: In some cases, we can be smarter about this.
18972 if (Op1.getValueType() != Offset.getValueType()) {
18973 OtherUses.clear();
18974 break;
18975 }
18976
18977 OtherUses.push_back(Use.getUser());
18978 }
18979
18980 if (Swapped)
18981 std::swap(BasePtr, Offset);
18982
18983 // Now check for #3 and #4.
18984 bool RealUse = false;
18985
18986 for (SDNode *User : Ptr->users()) {
18987 if (User == N)
18988 continue;
18989 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
18990 return false;
18991
18992 // If Ptr may be folded in addressing mode of other use, then it's
18993 // not profitable to do this transformation.
18994 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
18995 RealUse = true;
18996 }
18997
18998 if (!RealUse)
18999 return false;
19000
19002 if (!IsMasked) {
19003 if (IsLoad)
19004 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19005 else
19006 Result =
19007 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19008 } else {
19009 if (IsLoad)
19010 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19011 Offset, AM);
19012 else
19013 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19014 Offset, AM);
19015 }
19016 ++PreIndexedNodes;
19017 ++NodesCombined;
19018 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19019 Result.dump(&DAG); dbgs() << '\n');
19020 WorklistRemover DeadNodes(*this);
19021 if (IsLoad) {
19022 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19023 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19024 } else {
19025 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19026 }
19027
19028 // Finally, since the node is now dead, remove it from the graph.
19029 deleteAndRecombine(N);
19030
19031 if (Swapped)
19032 std::swap(BasePtr, Offset);
19033
19034 // Replace other uses of BasePtr that can be updated to use Ptr
19035 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
19036 unsigned OffsetIdx = 1;
19037 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19038 OffsetIdx = 0;
19039 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
19040 BasePtr.getNode() && "Expected BasePtr operand");
19041
19042 // We need to replace ptr0 in the following expression:
19043 // x0 * offset0 + y0 * ptr0 = t0
19044 // knowing that
19045 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19046 //
19047 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19048 // indexed load/store and the expression that needs to be re-written.
19049 //
19050 // Therefore, we have:
19051 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19052
19053 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
19054 const APInt &Offset0 = CN->getAPIntValue();
19055 const APInt &Offset1 = Offset->getAsAPIntVal();
19056 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19057 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19058 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19059 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19060
19061 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19062
19063 APInt CNV = Offset0;
19064 if (X0 < 0) CNV = -CNV;
19065 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19066 else CNV = CNV - Offset1;
19067
19068 SDLoc DL(OtherUses[i]);
19069
19070 // We can now generate the new expression.
19071 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19072 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19073
19074 SDValue NewUse = DAG.getNode(Opcode,
19075 DL,
19076 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
19077 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
19078 deleteAndRecombine(OtherUses[i]);
19079 }
19080
19081 // Replace the uses of Ptr with uses of the updated base value.
19082 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19083 deleteAndRecombine(Ptr.getNode());
19084 AddToWorklist(Result.getNode());
19085
19086 return true;
19087}
19088
19090 SDValue &BasePtr, SDValue &Offset,
19092 SelectionDAG &DAG,
19093 const TargetLowering &TLI) {
19094 if (PtrUse == N ||
19095 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19096 return false;
19097
19098 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19099 return false;
19100
19101 // Don't create a indexed load / store with zero offset.
19103 return false;
19104
19105 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19106 return false;
19107
19110 for (SDNode *User : BasePtr->users()) {
19111 if (User == Ptr.getNode())
19112 continue;
19113
19114 // No if there's a later user which could perform the index instead.
19115 if (isa<MemSDNode>(User)) {
19116 bool IsLoad = true;
19117 bool IsMasked = false;
19118 SDValue OtherPtr;
19120 IsMasked, OtherPtr, TLI)) {
19122 Worklist.push_back(User);
19123 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19124 return false;
19125 }
19126 }
19127
19128 // If all the uses are load / store addresses, then don't do the
19129 // transformation.
19130 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19131 for (SDNode *UserUser : User->users())
19132 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19133 return false;
19134 }
19135 }
19136 return true;
19137}
19138
19140 bool &IsMasked, SDValue &Ptr,
19141 SDValue &BasePtr, SDValue &Offset,
19143 SelectionDAG &DAG,
19144 const TargetLowering &TLI) {
19146 IsMasked, Ptr, TLI) ||
19147 Ptr->hasOneUse())
19148 return nullptr;
19149
19150 // Try turning it into a post-indexed load / store except when
19151 // 1) All uses are load / store ops that use it as base ptr (and
19152 // it may be folded as addressing mmode).
19153 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19154 // nor a successor of N. Otherwise, if Op is folded that would
19155 // create a cycle.
19157 for (SDNode *Op : Ptr->users()) {
19158 // Check for #1.
19159 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19160 continue;
19161
19162 // Check for #2.
19165 // Ptr is predecessor to both N and Op.
19166 Visited.insert(Ptr.getNode());
19167 Worklist.push_back(N);
19168 Worklist.push_back(Op);
19169 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19170 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19171 return Op;
19172 }
19173 return nullptr;
19174}
19175
19176/// Try to combine a load/store with a add/sub of the base pointer node into a
19177/// post-indexed load/store. The transformation folded the add/subtract into the
19178/// new indexed load/store effectively and all of its uses are redirected to the
19179/// new load/store.
19180bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19181 if (Level < AfterLegalizeDAG)
19182 return false;
19183
19184 bool IsLoad = true;
19185 bool IsMasked = false;
19186 SDValue Ptr;
19190 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19191 Offset, AM, DAG, TLI);
19192 if (!Op)
19193 return false;
19194
19196 if (!IsMasked)
19197 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19198 Offset, AM)
19199 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19200 BasePtr, Offset, AM);
19201 else
19202 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19203 BasePtr, Offset, AM)
19205 BasePtr, Offset, AM);
19206 ++PostIndexedNodes;
19207 ++NodesCombined;
19208 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19209 Result.dump(&DAG); dbgs() << '\n');
19210 WorklistRemover DeadNodes(*this);
19211 if (IsLoad) {
19212 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19213 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19214 } else {
19215 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19216 }
19217
19218 // Finally, since the node is now dead, remove it from the graph.
19219 deleteAndRecombine(N);
19220
19221 // Replace the uses of Use with uses of the updated base value.
19223 Result.getValue(IsLoad ? 1 : 0));
19224 deleteAndRecombine(Op);
19225 return true;
19226}
19227
19228/// Return the base-pointer arithmetic from an indexed \p LD.
19229SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
19230 ISD::MemIndexedMode AM = LD->getAddressingMode();
19231 assert(AM != ISD::UNINDEXED);
19232 SDValue BP = LD->getOperand(1);
19233 SDValue Inc = LD->getOperand(2);
19234
19235 // Some backends use TargetConstants for load offsets, but don't expect
19236 // TargetConstants in general ADD nodes. We can convert these constants into
19237 // regular Constants (if the constant is not opaque).
19239 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19240 "Cannot split out indexing using opaque target constants");
19241 if (Inc.getOpcode() == ISD::TargetConstant) {
19242 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
19243 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19244 ConstInc->getValueType(0));
19245 }
19246
19247 unsigned Opc =
19248 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
19249 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
19250}
19251
19253 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
19254}
19255
19256bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
19257 EVT STType = Val.getValueType();
19258 EVT STMemType = ST->getMemoryVT();
19259 if (STType == STMemType)
19260 return true;
19261 if (isTypeLegal(STMemType))
19262 return false; // fail.
19263 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
19264 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
19265 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
19266 return true;
19267 }
19268 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
19269 STType.isInteger() && STMemType.isInteger()) {
19270 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
19271 return true;
19272 }
19273 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
19274 Val = DAG.getBitcast(STMemType, Val);
19275 return true;
19276 }
19277 return false; // fail.
19278}
19279
19280bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
19281 EVT LDMemType = LD->getMemoryVT();
19282 EVT LDType = LD->getValueType(0);
19283 assert(Val.getValueType() == LDMemType &&
19284 "Attempting to extend value of non-matching type");
19285 if (LDType == LDMemType)
19286 return true;
19287 if (LDMemType.isInteger() && LDType.isInteger()) {
19288 switch (LD->getExtensionType()) {
19289 case ISD::NON_EXTLOAD:
19290 Val = DAG.getBitcast(LDType, Val);
19291 return true;
19292 case ISD::EXTLOAD:
19293 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
19294 return true;
19295 case ISD::SEXTLOAD:
19296 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
19297 return true;
19298 case ISD::ZEXTLOAD:
19299 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
19300 return true;
19301 }
19302 }
19303 return false;
19304}
19305
19306StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
19307 int64_t &Offset) {
19308 SDValue Chain = LD->getOperand(0);
19309
19310 // Look through CALLSEQ_START.
19311 if (Chain.getOpcode() == ISD::CALLSEQ_START)
19312 Chain = Chain->getOperand(0);
19313
19314 StoreSDNode *ST = nullptr;
19316 if (Chain.getOpcode() == ISD::TokenFactor) {
19317 // Look for unique store within the TokenFactor.
19318 for (SDValue Op : Chain->ops()) {
19319 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
19320 if (!Store)
19321 continue;
19322 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19323 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19324 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19325 continue;
19326 // Make sure the store is not aliased with any nodes in TokenFactor.
19327 GatherAllAliases(Store, Chain, Aliases);
19328 if (Aliases.empty() ||
19329 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
19330 ST = Store;
19331 break;
19332 }
19333 } else {
19334 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
19335 if (Store) {
19336 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19337 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19338 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19339 ST = Store;
19340 }
19341 }
19342
19343 return ST;
19344}
19345
19346SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
19347 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
19348 return SDValue();
19349 SDValue Chain = LD->getOperand(0);
19350 int64_t Offset;
19351
19352 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
19353 // TODO: Relax this restriction for unordered atomics (see D66309)
19354 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
19355 return SDValue();
19356
19357 EVT LDType = LD->getValueType(0);
19358 EVT LDMemType = LD->getMemoryVT();
19359 EVT STMemType = ST->getMemoryVT();
19360 EVT STType = ST->getValue().getValueType();
19361
19362 // There are two cases to consider here:
19363 // 1. The store is fixed width and the load is scalable. In this case we
19364 // don't know at compile time if the store completely envelops the load
19365 // so we abandon the optimisation.
19366 // 2. The store is scalable and the load is fixed width. We could
19367 // potentially support a limited number of cases here, but there has been
19368 // no cost-benefit analysis to prove it's worth it.
19369 bool LdStScalable = LDMemType.isScalableVT();
19370 if (LdStScalable != STMemType.isScalableVT())
19371 return SDValue();
19372
19373 // If we are dealing with scalable vectors on a big endian platform the
19374 // calculation of offsets below becomes trickier, since we do not know at
19375 // compile time the absolute size of the vector. Until we've done more
19376 // analysis on big-endian platforms it seems better to bail out for now.
19377 if (LdStScalable && DAG.getDataLayout().isBigEndian())
19378 return SDValue();
19379
19380 // Normalize for Endianness. After this Offset=0 will denote that the least
19381 // significant bit in the loaded value maps to the least significant bit in
19382 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
19383 // n:th least significant byte of the stored value.
19384 int64_t OrigOffset = Offset;
19385 if (DAG.getDataLayout().isBigEndian())
19386 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
19387 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
19388 8 -
19389 Offset;
19390
19391 // Check that the stored value cover all bits that are loaded.
19392 bool STCoversLD;
19393
19394 TypeSize LdMemSize = LDMemType.getSizeInBits();
19395 TypeSize StMemSize = STMemType.getSizeInBits();
19396 if (LdStScalable)
19397 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
19398 else
19399 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
19400 StMemSize.getFixedValue());
19401
19402 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
19403 if (LD->isIndexed()) {
19404 // Cannot handle opaque target constants and we must respect the user's
19405 // request not to split indexes from loads.
19406 if (!canSplitIdx(LD))
19407 return SDValue();
19408 SDValue Idx = SplitIndexingFromLoad(LD);
19409 SDValue Ops[] = {Val, Idx, Chain};
19410 return CombineTo(LD, Ops, 3);
19411 }
19412 return CombineTo(LD, Val, Chain);
19413 };
19414
19415 if (!STCoversLD)
19416 return SDValue();
19417
19418 // Memory as copy space (potentially masked).
19419 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
19420 // Simple case: Direct non-truncating forwarding
19421 if (LDType.getSizeInBits() == LdMemSize)
19422 return ReplaceLd(LD, ST->getValue(), Chain);
19423 // Can we model the truncate and extension with an and mask?
19424 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
19425 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
19426 // Mask to size of LDMemType
19427 auto Mask =
19429 StMemSize.getFixedValue()),
19430 SDLoc(ST), STType);
19431 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
19432 return ReplaceLd(LD, Val, Chain);
19433 }
19434 }
19435
19436 // Handle some cases for big-endian that would be Offset 0 and handled for
19437 // little-endian.
19438 SDValue Val = ST->getValue();
19439 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
19440 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
19441 !LDType.isVector() && isTypeLegal(STType) &&
19442 TLI.isOperationLegal(ISD::SRL, STType)) {
19443 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
19444 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
19445 Offset = 0;
19446 }
19447 }
19448
19449 // TODO: Deal with nonzero offset.
19450 if (LD->getBasePtr().isUndef() || Offset != 0)
19451 return SDValue();
19452 // Model necessary truncations / extenstions.
19453 // Truncate Value To Stored Memory Size.
19454 do {
19455 if (!getTruncatedStoreValue(ST, Val))
19456 break;
19457 if (!isTypeLegal(LDMemType))
19458 break;
19459 if (STMemType != LDMemType) {
19460 // TODO: Support vectors? This requires extract_subvector/bitcast.
19461 if (!STMemType.isVector() && !LDMemType.isVector() &&
19462 STMemType.isInteger() && LDMemType.isInteger())
19463 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
19464 else
19465 break;
19466 }
19467 if (!extendLoadedValueToExtension(LD, Val))
19468 break;
19469 return ReplaceLd(LD, Val, Chain);
19470 } while (false);
19471
19472 // On failure, cleanup dead nodes we may have created.
19473 if (Val->use_empty())
19474 deleteAndRecombine(Val.getNode());
19475 return SDValue();
19476}
19477
19478SDValue DAGCombiner::visitLOAD(SDNode *N) {
19479 LoadSDNode *LD = cast<LoadSDNode>(N);
19480 SDValue Chain = LD->getChain();
19481 SDValue Ptr = LD->getBasePtr();
19482
19483 // If load is not volatile and there are no uses of the loaded value (and
19484 // the updated indexed value in case of indexed loads), change uses of the
19485 // chain value into uses of the chain input (i.e. delete the dead load).
19486 // TODO: Allow this for unordered atomics (see D66309)
19487 if (LD->isSimple()) {
19488 if (N->getValueType(1) == MVT::Other) {
19489 // Unindexed loads.
19490 if (!N->hasAnyUseOfValue(0)) {
19491 // It's not safe to use the two value CombineTo variant here. e.g.
19492 // v1, chain2 = load chain1, loc
19493 // v2, chain3 = load chain2, loc
19494 // v3 = add v2, c
19495 // Now we replace use of chain2 with chain1. This makes the second load
19496 // isomorphic to the one we are deleting, and thus makes this load live.
19497 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19498 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19499 dbgs() << "\n");
19500 WorklistRemover DeadNodes(*this);
19501 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19502 AddUsersToWorklist(Chain.getNode());
19503 if (N->use_empty())
19504 deleteAndRecombine(N);
19505
19506 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19507 }
19508 } else {
19509 // Indexed loads.
19510 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19511
19512 // If this load has an opaque TargetConstant offset, then we cannot split
19513 // the indexing into an add/sub directly (that TargetConstant may not be
19514 // valid for a different type of node, and we cannot convert an opaque
19515 // target constant into a regular constant).
19516 bool CanSplitIdx = canSplitIdx(LD);
19517
19518 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19519 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19520 SDValue Index;
19521 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19522 Index = SplitIndexingFromLoad(LD);
19523 // Try to fold the base pointer arithmetic into subsequent loads and
19524 // stores.
19525 AddUsersToWorklist(N);
19526 } else
19527 Index = DAG.getUNDEF(N->getValueType(1));
19528 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19529 dbgs() << "\nWith: "; Undef.dump(&DAG);
19530 dbgs() << " and 2 other values\n");
19531 WorklistRemover DeadNodes(*this);
19532 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19533 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
19534 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19535 deleteAndRecombine(N);
19536 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19537 }
19538 }
19539 }
19540
19541 // If this load is directly stored, replace the load value with the stored
19542 // value.
19543 if (auto V = ForwardStoreValueToDirectLoad(LD))
19544 return V;
19545
19546 // Try to infer better alignment information than the load already has.
19547 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19548 !LD->isAtomic()) {
19549 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19550 if (*Alignment > LD->getAlign() &&
19551 isAligned(*Alignment, LD->getSrcValueOffset())) {
19552 SDValue NewLoad = DAG.getExtLoad(
19553 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19554 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19555 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19556 // NewLoad will always be N as we are only refining the alignment
19557 assert(NewLoad.getNode() == N);
19558 (void)NewLoad;
19559 }
19560 }
19561 }
19562
19563 if (LD->isUnindexed()) {
19564 // Walk up chain skipping non-aliasing memory nodes.
19565 SDValue BetterChain = FindBetterChain(LD, Chain);
19566
19567 // If there is a better chain.
19568 if (Chain != BetterChain) {
19569 SDValue ReplLoad;
19570
19571 // Replace the chain to void dependency.
19572 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19573 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19574 BetterChain, Ptr, LD->getMemOperand());
19575 } else {
19576 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19577 LD->getValueType(0),
19578 BetterChain, Ptr, LD->getMemoryVT(),
19579 LD->getMemOperand());
19580 }
19581
19582 // Create token factor to keep old chain connected.
19583 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19584 MVT::Other, Chain, ReplLoad.getValue(1));
19585
19586 // Replace uses with load result and token factor
19587 return CombineTo(N, ReplLoad.getValue(0), Token);
19588 }
19589 }
19590
19591 // Try transforming N to an indexed load.
19592 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19593 return SDValue(N, 0);
19594
19595 // Try to slice up N to more direct loads if the slices are mapped to
19596 // different register banks or pairing can take place.
19597 if (SliceUpLoad(N))
19598 return SDValue(N, 0);
19599
19600 return SDValue();
19601}
19602
19603namespace {
19604
19605/// Helper structure used to slice a load in smaller loads.
19606/// Basically a slice is obtained from the following sequence:
19607/// Origin = load Ty1, Base
19608/// Shift = srl Ty1 Origin, CstTy Amount
19609/// Inst = trunc Shift to Ty2
19610///
19611/// Then, it will be rewritten into:
19612/// Slice = load SliceTy, Base + SliceOffset
19613/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19614///
19615/// SliceTy is deduced from the number of bits that are actually used to
19616/// build Inst.
19617struct LoadedSlice {
19618 /// Helper structure used to compute the cost of a slice.
19619 struct Cost {
19620 /// Are we optimizing for code size.
19621 bool ForCodeSize = false;
19622
19623 /// Various cost.
19624 unsigned Loads = 0;
19625 unsigned Truncates = 0;
19626 unsigned CrossRegisterBanksCopies = 0;
19627 unsigned ZExts = 0;
19628 unsigned Shift = 0;
19629
19630 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19631
19632 /// Get the cost of one isolated slice.
19633 Cost(const LoadedSlice &LS, bool ForCodeSize)
19634 : ForCodeSize(ForCodeSize), Loads(1) {
19635 EVT TruncType = LS.Inst->getValueType(0);
19636 EVT LoadedType = LS.getLoadedType();
19637 if (TruncType != LoadedType &&
19638 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19639 ZExts = 1;
19640 }
19641
19642 /// Account for slicing gain in the current cost.
19643 /// Slicing provide a few gains like removing a shift or a
19644 /// truncate. This method allows to grow the cost of the original
19645 /// load with the gain from this slice.
19646 void addSliceGain(const LoadedSlice &LS) {
19647 // Each slice saves a truncate.
19648 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19649 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19650 ++Truncates;
19651 // If there is a shift amount, this slice gets rid of it.
19652 if (LS.Shift)
19653 ++Shift;
19654 // If this slice can merge a cross register bank copy, account for it.
19655 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19656 ++CrossRegisterBanksCopies;
19657 }
19658
19659 Cost &operator+=(const Cost &RHS) {
19660 Loads += RHS.Loads;
19661 Truncates += RHS.Truncates;
19662 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19663 ZExts += RHS.ZExts;
19664 Shift += RHS.Shift;
19665 return *this;
19666 }
19667
19668 bool operator==(const Cost &RHS) const {
19669 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19670 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19671 ZExts == RHS.ZExts && Shift == RHS.Shift;
19672 }
19673
19674 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19675
19676 bool operator<(const Cost &RHS) const {
19677 // Assume cross register banks copies are as expensive as loads.
19678 // FIXME: Do we want some more target hooks?
19679 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19680 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19681 // Unless we are optimizing for code size, consider the
19682 // expensive operation first.
19683 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19684 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19685 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19686 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19687 }
19688
19689 bool operator>(const Cost &RHS) const { return RHS < *this; }
19690
19691 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19692
19693 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19694 };
19695
19696 // The last instruction that represent the slice. This should be a
19697 // truncate instruction.
19698 SDNode *Inst;
19699
19700 // The original load instruction.
19701 LoadSDNode *Origin;
19702
19703 // The right shift amount in bits from the original load.
19704 unsigned Shift;
19705
19706 // The DAG from which Origin came from.
19707 // This is used to get some contextual information about legal types, etc.
19708 SelectionDAG *DAG;
19709
19710 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19711 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19712 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19713
19714 /// Get the bits used in a chunk of bits \p BitWidth large.
19715 /// \return Result is \p BitWidth and has used bits set to 1 and
19716 /// not used bits set to 0.
19717 APInt getUsedBits() const {
19718 // Reproduce the trunc(lshr) sequence:
19719 // - Start from the truncated value.
19720 // - Zero extend to the desired bit width.
19721 // - Shift left.
19722 assert(Origin && "No original load to compare against.");
19723 unsigned BitWidth = Origin->getValueSizeInBits(0);
19724 assert(Inst && "This slice is not bound to an instruction");
19725 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19726 "Extracted slice is bigger than the whole type!");
19727 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19728 UsedBits.setAllBits();
19729 UsedBits = UsedBits.zext(BitWidth);
19730 UsedBits <<= Shift;
19731 return UsedBits;
19732 }
19733
19734 /// Get the size of the slice to be loaded in bytes.
19735 unsigned getLoadedSize() const {
19736 unsigned SliceSize = getUsedBits().popcount();
19737 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19738 return SliceSize / 8;
19739 }
19740
19741 /// Get the type that will be loaded for this slice.
19742 /// Note: This may not be the final type for the slice.
19743 EVT getLoadedType() const {
19744 assert(DAG && "Missing context");
19745 LLVMContext &Ctxt = *DAG->getContext();
19746 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19747 }
19748
19749 /// Get the alignment of the load used for this slice.
19750 Align getAlign() const {
19751 Align Alignment = Origin->getAlign();
19752 uint64_t Offset = getOffsetFromBase();
19753 if (Offset != 0)
19754 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19755 return Alignment;
19756 }
19757
19758 /// Check if this slice can be rewritten with legal operations.
19759 bool isLegal() const {
19760 // An invalid slice is not legal.
19761 if (!Origin || !Inst || !DAG)
19762 return false;
19763
19764 // Offsets are for indexed load only, we do not handle that.
19765 if (!Origin->getOffset().isUndef())
19766 return false;
19767
19768 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19769
19770 // Check that the type is legal.
19771 EVT SliceType = getLoadedType();
19772 if (!TLI.isTypeLegal(SliceType))
19773 return false;
19774
19775 // Check that the load is legal for this type.
19776 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19777 return false;
19778
19779 // Check that the offset can be computed.
19780 // 1. Check its type.
19781 EVT PtrType = Origin->getBasePtr().getValueType();
19782 if (PtrType == MVT::Untyped || PtrType.isExtended())
19783 return false;
19784
19785 // 2. Check that it fits in the immediate.
19786 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19787 return false;
19788
19789 // 3. Check that the computation is legal.
19790 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19791 return false;
19792
19793 // Check that the zext is legal if it needs one.
19794 EVT TruncateType = Inst->getValueType(0);
19795 if (TruncateType != SliceType &&
19796 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19797 return false;
19798
19799 return true;
19800 }
19801
19802 /// Get the offset in bytes of this slice in the original chunk of
19803 /// bits.
19804 /// \pre DAG != nullptr.
19805 uint64_t getOffsetFromBase() const {
19806 assert(DAG && "Missing context.");
19807 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19808 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19809 uint64_t Offset = Shift / 8;
19810 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19811 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19812 "The size of the original loaded type is not a multiple of a"
19813 " byte.");
19814 // If Offset is bigger than TySizeInBytes, it means we are loading all
19815 // zeros. This should have been optimized before in the process.
19816 assert(TySizeInBytes > Offset &&
19817 "Invalid shift amount for given loaded size");
19818 if (IsBigEndian)
19819 Offset = TySizeInBytes - Offset - getLoadedSize();
19820 return Offset;
19821 }
19822
19823 /// Generate the sequence of instructions to load the slice
19824 /// represented by this object and redirect the uses of this slice to
19825 /// this new sequence of instructions.
19826 /// \pre this->Inst && this->Origin are valid Instructions and this
19827 /// object passed the legal check: LoadedSlice::isLegal returned true.
19828 /// \return The last instruction of the sequence used to load the slice.
19829 SDValue loadSlice() const {
19830 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19831 const SDValue &OldBaseAddr = Origin->getBasePtr();
19832 SDValue BaseAddr = OldBaseAddr;
19833 // Get the offset in that chunk of bytes w.r.t. the endianness.
19834 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19835 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19836 if (Offset) {
19837 // BaseAddr = BaseAddr + Offset.
19838 EVT ArithType = BaseAddr.getValueType();
19839 SDLoc DL(Origin);
19840 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19841 DAG->getConstant(Offset, DL, ArithType));
19842 }
19843
19844 // Create the type of the loaded slice according to its size.
19845 EVT SliceType = getLoadedType();
19846
19847 // Create the load for the slice.
19848 SDValue LastInst =
19849 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19851 Origin->getMemOperand()->getFlags());
19852 // If the final type is not the same as the loaded type, this means that
19853 // we have to pad with zero. Create a zero extend for that.
19854 EVT FinalType = Inst->getValueType(0);
19855 if (SliceType != FinalType)
19856 LastInst =
19857 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19858 return LastInst;
19859 }
19860
19861 /// Check if this slice can be merged with an expensive cross register
19862 /// bank copy. E.g.,
19863 /// i = load i32
19864 /// f = bitcast i32 i to float
19865 bool canMergeExpensiveCrossRegisterBankCopy() const {
19866 if (!Inst || !Inst->hasOneUse())
19867 return false;
19868 SDNode *User = *Inst->user_begin();
19869 if (User->getOpcode() != ISD::BITCAST)
19870 return false;
19871 assert(DAG && "Missing context");
19872 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19873 EVT ResVT = User->getValueType(0);
19874 const TargetRegisterClass *ResRC =
19875 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
19876 const TargetRegisterClass *ArgRC =
19877 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
19878 User->getOperand(0)->isDivergent());
19879 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19880 return false;
19881
19882 // At this point, we know that we perform a cross-register-bank copy.
19883 // Check if it is expensive.
19885 // Assume bitcasts are cheap, unless both register classes do not
19886 // explicitly share a common sub class.
19887 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19888 return false;
19889
19890 // Check if it will be merged with the load.
19891 // 1. Check the alignment / fast memory access constraint.
19892 unsigned IsFast = 0;
19893 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19894 Origin->getAddressSpace(), getAlign(),
19895 Origin->getMemOperand()->getFlags(), &IsFast) ||
19896 !IsFast)
19897 return false;
19898
19899 // 2. Check that the load is a legal operation for that type.
19900 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19901 return false;
19902
19903 // 3. Check that we do not have a zext in the way.
19904 if (Inst->getValueType(0) != getLoadedType())
19905 return false;
19906
19907 return true;
19908 }
19909};
19910
19911} // end anonymous namespace
19912
19913/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19914/// \p UsedBits looks like 0..0 1..1 0..0.
19915static bool areUsedBitsDense(const APInt &UsedBits) {
19916 // If all the bits are one, this is dense!
19917 if (UsedBits.isAllOnes())
19918 return true;
19919
19920 // Get rid of the unused bits on the right.
19921 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19922 // Get rid of the unused bits on the left.
19923 if (NarrowedUsedBits.countl_zero())
19924 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19925 // Check that the chunk of bits is completely used.
19926 return NarrowedUsedBits.isAllOnes();
19927}
19928
19929/// Check whether or not \p First and \p Second are next to each other
19930/// in memory. This means that there is no hole between the bits loaded
19931/// by \p First and the bits loaded by \p Second.
19932static bool areSlicesNextToEachOther(const LoadedSlice &First,
19933 const LoadedSlice &Second) {
19934 assert(First.Origin == Second.Origin && First.Origin &&
19935 "Unable to match different memory origins.");
19936 APInt UsedBits = First.getUsedBits();
19937 assert((UsedBits & Second.getUsedBits()) == 0 &&
19938 "Slices are not supposed to overlap.");
19939 UsedBits |= Second.getUsedBits();
19940 return areUsedBitsDense(UsedBits);
19941}
19942
19943/// Adjust the \p GlobalLSCost according to the target
19944/// paring capabilities and the layout of the slices.
19945/// \pre \p GlobalLSCost should account for at least as many loads as
19946/// there is in the slices in \p LoadedSlices.
19948 LoadedSlice::Cost &GlobalLSCost) {
19949 unsigned NumberOfSlices = LoadedSlices.size();
19950 // If there is less than 2 elements, no pairing is possible.
19951 if (NumberOfSlices < 2)
19952 return;
19953
19954 // Sort the slices so that elements that are likely to be next to each
19955 // other in memory are next to each other in the list.
19956 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19957 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19958 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19959 });
19960 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19961 // First (resp. Second) is the first (resp. Second) potentially candidate
19962 // to be placed in a paired load.
19963 const LoadedSlice *First = nullptr;
19964 const LoadedSlice *Second = nullptr;
19965 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19966 // Set the beginning of the pair.
19967 First = Second) {
19968 Second = &LoadedSlices[CurrSlice];
19969
19970 // If First is NULL, it means we start a new pair.
19971 // Get to the next slice.
19972 if (!First)
19973 continue;
19974
19975 EVT LoadedType = First->getLoadedType();
19976
19977 // If the types of the slices are different, we cannot pair them.
19978 if (LoadedType != Second->getLoadedType())
19979 continue;
19980
19981 // Check if the target supplies paired loads for this type.
19982 Align RequiredAlignment;
19983 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19984 // move to the next pair, this type is hopeless.
19985 Second = nullptr;
19986 continue;
19987 }
19988 // Check if we meet the alignment requirement.
19989 if (First->getAlign() < RequiredAlignment)
19990 continue;
19991
19992 // Check that both loads are next to each other in memory.
19993 if (!areSlicesNextToEachOther(*First, *Second))
19994 continue;
19995
19996 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19997 --GlobalLSCost.Loads;
19998 // Move to the next pair.
19999 Second = nullptr;
20000 }
20001}
20002
20003/// Check the profitability of all involved LoadedSlice.
20004/// Currently, it is considered profitable if there is exactly two
20005/// involved slices (1) which are (2) next to each other in memory, and
20006/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20007///
20008/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20009/// the elements themselves.
20010///
20011/// FIXME: When the cost model will be mature enough, we can relax
20012/// constraints (1) and (2).
20014 const APInt &UsedBits, bool ForCodeSize) {
20015 unsigned NumberOfSlices = LoadedSlices.size();
20017 return NumberOfSlices > 1;
20018
20019 // Check (1).
20020 if (NumberOfSlices != 2)
20021 return false;
20022
20023 // Check (2).
20024 if (!areUsedBitsDense(UsedBits))
20025 return false;
20026
20027 // Check (3).
20028 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20029 // The original code has one big load.
20030 OrigCost.Loads = 1;
20031 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20032 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20033 // Accumulate the cost of all the slices.
20034 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20035 GlobalSlicingCost += SliceCost;
20036
20037 // Account as cost in the original configuration the gain obtained
20038 // with the current slices.
20039 OrigCost.addSliceGain(LS);
20040 }
20041
20042 // If the target supports paired load, adjust the cost accordingly.
20043 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20044 return OrigCost > GlobalSlicingCost;
20045}
20046
20047/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20048/// operations, split it in the various pieces being extracted.
20049///
20050/// This sort of thing is introduced by SROA.
20051/// This slicing takes care not to insert overlapping loads.
20052/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20053bool DAGCombiner::SliceUpLoad(SDNode *N) {
20054 if (Level < AfterLegalizeDAG)
20055 return false;
20056
20057 LoadSDNode *LD = cast<LoadSDNode>(N);
20058 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20059 !LD->getValueType(0).isInteger())
20060 return false;
20061
20062 // The algorithm to split up a load of a scalable vector into individual
20063 // elements currently requires knowing the length of the loaded type,
20064 // so will need adjusting to work on scalable vectors.
20065 if (LD->getValueType(0).isScalableVector())
20066 return false;
20067
20068 // Keep track of already used bits to detect overlapping values.
20069 // In that case, we will just abort the transformation.
20070 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20071
20072 SmallVector<LoadedSlice, 4> LoadedSlices;
20073
20074 // Check if this load is used as several smaller chunks of bits.
20075 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20076 // of computation for each trunc.
20077 for (SDUse &U : LD->uses()) {
20078 // Skip the uses of the chain.
20079 if (U.getResNo() != 0)
20080 continue;
20081
20082 SDNode *User = U.getUser();
20083 unsigned Shift = 0;
20084
20085 // Check if this is a trunc(lshr).
20086 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20087 isa<ConstantSDNode>(User->getOperand(1))) {
20088 Shift = User->getConstantOperandVal(1);
20089 User = *User->user_begin();
20090 }
20091
20092 // At this point, User is a Truncate, iff we encountered, trunc or
20093 // trunc(lshr).
20094 if (User->getOpcode() != ISD::TRUNCATE)
20095 return false;
20096
20097 // The width of the type must be a power of 2 and greater than 8-bits.
20098 // Otherwise the load cannot be represented in LLVM IR.
20099 // Moreover, if we shifted with a non-8-bits multiple, the slice
20100 // will be across several bytes. We do not support that.
20101 unsigned Width = User->getValueSizeInBits(0);
20102 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20103 return false;
20104
20105 // Build the slice for this chain of computations.
20106 LoadedSlice LS(User, LD, Shift, &DAG);
20107 APInt CurrentUsedBits = LS.getUsedBits();
20108
20109 // Check if this slice overlaps with another.
20110 if ((CurrentUsedBits & UsedBits) != 0)
20111 return false;
20112 // Update the bits used globally.
20113 UsedBits |= CurrentUsedBits;
20114
20115 // Check if the new slice would be legal.
20116 if (!LS.isLegal())
20117 return false;
20118
20119 // Record the slice.
20120 LoadedSlices.push_back(LS);
20121 }
20122
20123 // Abort slicing if it does not seem to be profitable.
20124 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20125 return false;
20126
20127 ++SlicedLoads;
20128
20129 // Rewrite each chain to use an independent load.
20130 // By construction, each chain can be represented by a unique load.
20131
20132 // Prepare the argument for the new token factor for all the slices.
20133 SmallVector<SDValue, 8> ArgChains;
20134 for (const LoadedSlice &LS : LoadedSlices) {
20135 SDValue SliceInst = LS.loadSlice();
20136 CombineTo(LS.Inst, SliceInst, true);
20137 if (SliceInst.getOpcode() != ISD::LOAD)
20138 SliceInst = SliceInst.getOperand(0);
20139 assert(SliceInst->getOpcode() == ISD::LOAD &&
20140 "It takes more than a zext to get to the loaded slice!!");
20141 ArgChains.push_back(SliceInst.getValue(1));
20142 }
20143
20144 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20145 ArgChains);
20146 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20147 AddToWorklist(Chain.getNode());
20148 return true;
20149}
20150
20151/// Check to see if V is (and load (ptr), imm), where the load is having
20152/// specific bytes cleared out. If so, return the byte size being masked out
20153/// and the shift amount.
20154static std::pair<unsigned, unsigned>
20156 std::pair<unsigned, unsigned> Result(0, 0);
20157
20158 // Check for the structure we're looking for.
20159 if (V->getOpcode() != ISD::AND ||
20160 !isa<ConstantSDNode>(V->getOperand(1)) ||
20161 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20162 return Result;
20163
20164 // Check the chain and pointer.
20165 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20166 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20167
20168 // This only handles simple types.
20169 if (V.getValueType() != MVT::i16 &&
20170 V.getValueType() != MVT::i32 &&
20171 V.getValueType() != MVT::i64)
20172 return Result;
20173
20174 // Check the constant mask. Invert it so that the bits being masked out are
20175 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
20176 // follow the sign bit for uniformity.
20177 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20178 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20179 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
20180 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20181 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
20182 if (NotMaskLZ == 64) return Result; // All zero mask.
20183
20184 // See if we have a continuous run of bits. If so, we have 0*1+0*
20185 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20186 return Result;
20187
20188 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20189 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20190 NotMaskLZ -= 64-V.getValueSizeInBits();
20191
20192 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20193 switch (MaskedBytes) {
20194 case 1:
20195 case 2:
20196 case 4: break;
20197 default: return Result; // All one mask, or 5-byte mask.
20198 }
20199
20200 // Verify that the first bit starts at a multiple of mask so that the access
20201 // is aligned the same as the access width.
20202 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20203
20204 // For narrowing to be valid, it must be the case that the load the
20205 // immediately preceding memory operation before the store.
20206 if (LD == Chain.getNode())
20207 ; // ok.
20208 else if (Chain->getOpcode() == ISD::TokenFactor &&
20209 SDValue(LD, 1).hasOneUse()) {
20210 // LD has only 1 chain use so they are no indirect dependencies.
20211 if (!LD->isOperandOf(Chain.getNode()))
20212 return Result;
20213 } else
20214 return Result; // Fail.
20215
20216 Result.first = MaskedBytes;
20217 Result.second = NotMaskTZ/8;
20218 return Result;
20219}
20220
20221/// Check to see if IVal is something that provides a value as specified by
20222/// MaskInfo. If so, replace the specified store with a narrower store of
20223/// truncated IVal.
20224static SDValue
20225ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
20226 SDValue IVal, StoreSDNode *St,
20227 DAGCombiner *DC) {
20228 unsigned NumBytes = MaskInfo.first;
20229 unsigned ByteShift = MaskInfo.second;
20230 SelectionDAG &DAG = DC->getDAG();
20231
20232 // Check to see if IVal is all zeros in the part being masked in by the 'or'
20233 // that uses this. If not, this is not a replacement.
20234 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
20235 ByteShift*8, (ByteShift+NumBytes)*8);
20236 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
20237
20238 // Check that it is legal on the target to do this. It is legal if the new
20239 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
20240 // legalization. If the source type is legal, but the store type isn't, see
20241 // if we can use a truncating store.
20242 MVT VT = MVT::getIntegerVT(NumBytes * 8);
20243 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20244 bool UseTruncStore;
20245 if (DC->isTypeLegal(VT))
20246 UseTruncStore = false;
20247 else if (TLI.isTypeLegal(IVal.getValueType()) &&
20248 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
20249 UseTruncStore = true;
20250 else
20251 return SDValue();
20252
20253 // Can't do this for indexed stores.
20254 if (St->isIndexed())
20255 return SDValue();
20256
20257 // Check that the target doesn't think this is a bad idea.
20258 if (St->getMemOperand() &&
20259 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
20260 *St->getMemOperand()))
20261 return SDValue();
20262
20263 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
20264 // shifted by ByteShift and truncated down to NumBytes.
20265 if (ByteShift) {
20266 SDLoc DL(IVal);
20267 IVal = DAG.getNode(
20268 ISD::SRL, DL, IVal.getValueType(), IVal,
20269 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
20270 }
20271
20272 // Figure out the offset for the store and the alignment of the access.
20273 unsigned StOffset;
20274 if (DAG.getDataLayout().isLittleEndian())
20275 StOffset = ByteShift;
20276 else
20277 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20278
20279 SDValue Ptr = St->getBasePtr();
20280 if (StOffset) {
20281 SDLoc DL(IVal);
20283 }
20284
20285 ++OpsNarrowed;
20286 if (UseTruncStore)
20287 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20288 St->getPointerInfo().getWithOffset(StOffset),
20289 VT, St->getOriginalAlign());
20290
20291 // Truncate down to the new size.
20292 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
20293
20294 return DAG
20295 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20296 St->getPointerInfo().getWithOffset(StOffset),
20297 St->getOriginalAlign());
20298}
20299
20300/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
20301/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
20302/// narrowing the load and store if it would end up being a win for performance
20303/// or code size.
20304SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
20305 StoreSDNode *ST = cast<StoreSDNode>(N);
20306 if (!ST->isSimple())
20307 return SDValue();
20308
20309 SDValue Chain = ST->getChain();
20310 SDValue Value = ST->getValue();
20311 SDValue Ptr = ST->getBasePtr();
20312 EVT VT = Value.getValueType();
20313
20314 if (ST->isTruncatingStore() || VT.isVector())
20315 return SDValue();
20316
20317 unsigned Opc = Value.getOpcode();
20318
20319 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
20320 !Value.hasOneUse())
20321 return SDValue();
20322
20323 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
20324 // is a byte mask indicating a consecutive number of bytes, check to see if
20325 // Y is known to provide just those bytes. If so, we try to replace the
20326 // load + replace + store sequence with a single (narrower) store, which makes
20327 // the load dead.
20329 std::pair<unsigned, unsigned> MaskedLoad;
20330 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
20331 if (MaskedLoad.first)
20332 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20333 Value.getOperand(1), ST,this))
20334 return NewST;
20335
20336 // Or is commutative, so try swapping X and Y.
20337 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
20338 if (MaskedLoad.first)
20339 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20340 Value.getOperand(0), ST,this))
20341 return NewST;
20342 }
20343
20345 return SDValue();
20346
20347 if (Value.getOperand(1).getOpcode() != ISD::Constant)
20348 return SDValue();
20349
20350 SDValue N0 = Value.getOperand(0);
20351 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
20352 Chain == SDValue(N0.getNode(), 1)) {
20353 LoadSDNode *LD = cast<LoadSDNode>(N0);
20354 if (LD->getBasePtr() != Ptr ||
20355 LD->getPointerInfo().getAddrSpace() !=
20356 ST->getPointerInfo().getAddrSpace())
20357 return SDValue();
20358
20359 // Find the type NewVT to narrow the load / op / store to.
20360 SDValue N1 = Value.getOperand(1);
20361 unsigned BitWidth = N1.getValueSizeInBits();
20362 APInt Imm = N1->getAsAPIntVal();
20363 if (Opc == ISD::AND)
20364 Imm.flipAllBits();
20365 if (Imm == 0 || Imm.isAllOnes())
20366 return SDValue();
20367 // Find least/most significant bit that need to be part of the narrowed
20368 // operation. We assume target will need to address/access full bytes, so
20369 // we make sure to align LSB and MSB at byte boundaries.
20370 unsigned BitsPerByteMask = 7u;
20371 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
20372 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
20373 unsigned NewBW = NextPowerOf2(MSB - LSB);
20374 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20375 // The narrowing should be profitable, the load/store operation should be
20376 // legal (or custom) and the store size should be equal to the NewVT width.
20377 while (NewBW < BitWidth &&
20378 (NewVT.getStoreSizeInBits() != NewBW ||
20379 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
20381 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
20382 NewBW = NextPowerOf2(NewBW);
20383 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20384 }
20385 if (NewBW >= BitWidth)
20386 return SDValue();
20387
20388 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
20389 // large enough to cover all bits that should be modified. This type might
20390 // however be larger than really needed (such as i32 while we actually only
20391 // need to modify one byte). Now we need to find our how to align the memory
20392 // accesses to satisfy preferred alignments as well as avoiding to access
20393 // memory outside the store size of the orignal access.
20394
20395 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
20396
20397 // Let ShAmt denote amount of bits to skip, counted from the least
20398 // significant bits of Imm. And let PtrOff how much the pointer needs to be
20399 // offsetted (in bytes) for the new access.
20400 unsigned ShAmt = 0;
20401 uint64_t PtrOff = 0;
20402 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
20403 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
20404 if (ShAmt > LSB)
20405 return SDValue();
20406 if (ShAmt + NewBW < MSB)
20407 continue;
20408
20409 // Calculate PtrOff.
20410 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
20411 ? VTStoreSize - NewBW - ShAmt
20412 : ShAmt;
20413 PtrOff = PtrAdjustmentInBits / 8;
20414
20415 // Now check if narrow access is allowed and fast, considering alignments.
20416 unsigned IsFast = 0;
20417 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20418 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
20419 LD->getAddressSpace(), NewAlign,
20420 LD->getMemOperand()->getFlags(), &IsFast) &&
20421 IsFast)
20422 break;
20423 }
20424 // If loop above did not find any accepted ShAmt we need to exit here.
20425 if (ShAmt + NewBW > VTStoreSize)
20426 return SDValue();
20427
20428 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
20429 if (Opc == ISD::AND)
20430 NewImm.flipAllBits();
20431 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20432 SDValue NewPtr =
20434 SDValue NewLD =
20435 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
20436 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
20437 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20438 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
20439 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
20440 SDValue NewST =
20441 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
20442 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
20443
20444 AddToWorklist(NewPtr.getNode());
20445 AddToWorklist(NewLD.getNode());
20446 AddToWorklist(NewVal.getNode());
20447 WorklistRemover DeadNodes(*this);
20448 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
20449 ++OpsNarrowed;
20450 return NewST;
20451 }
20452
20453 return SDValue();
20454}
20455
20456/// For a given floating point load / store pair, if the load value isn't used
20457/// by any other operations, then consider transforming the pair to integer
20458/// load / store operations if the target deems the transformation profitable.
20459SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
20460 StoreSDNode *ST = cast<StoreSDNode>(N);
20461 SDValue Value = ST->getValue();
20462 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
20463 Value.hasOneUse()) {
20464 LoadSDNode *LD = cast<LoadSDNode>(Value);
20465 EVT VT = LD->getMemoryVT();
20466 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
20467 LD->isNonTemporal() || ST->isNonTemporal() ||
20468 LD->getPointerInfo().getAddrSpace() != 0 ||
20469 ST->getPointerInfo().getAddrSpace() != 0)
20470 return SDValue();
20471
20472 TypeSize VTSize = VT.getSizeInBits();
20473
20474 // We don't know the size of scalable types at compile time so we cannot
20475 // create an integer of the equivalent size.
20476 if (VTSize.isScalable())
20477 return SDValue();
20478
20479 unsigned FastLD = 0, FastST = 0;
20480 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
20481 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
20482 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
20485 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20486 *LD->getMemOperand(), &FastLD) ||
20487 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20488 *ST->getMemOperand(), &FastST) ||
20489 !FastLD || !FastST)
20490 return SDValue();
20491
20492 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
20493 LD->getBasePtr(), LD->getMemOperand());
20494
20495 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
20496 ST->getBasePtr(), ST->getMemOperand());
20497
20498 AddToWorklist(NewLD.getNode());
20499 AddToWorklist(NewST.getNode());
20500 WorklistRemover DeadNodes(*this);
20501 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20502 ++LdStFP2Int;
20503 return NewST;
20504 }
20505
20506 return SDValue();
20507}
20508
20509// This is a helper function for visitMUL to check the profitability
20510// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20511// MulNode is the original multiply, AddNode is (add x, c1),
20512// and ConstNode is c2.
20513//
20514// If the (add x, c1) has multiple uses, we could increase
20515// the number of adds if we make this transformation.
20516// It would only be worth doing this if we can remove a
20517// multiply in the process. Check for that here.
20518// To illustrate:
20519// (A + c1) * c3
20520// (A + c2) * c3
20521// We're checking for cases where we have common "c3 * A" expressions.
20522bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20523 SDValue ConstNode) {
20524 APInt Val;
20525
20526 // If the add only has one use, and the target thinks the folding is
20527 // profitable or does not lead to worse code, this would be OK to do.
20528 if (AddNode->hasOneUse() &&
20529 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20530 return true;
20531
20532 // Walk all the users of the constant with which we're multiplying.
20533 for (SDNode *User : ConstNode->users()) {
20534 if (User == MulNode) // This use is the one we're on right now. Skip it.
20535 continue;
20536
20537 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
20538 SDNode *OtherOp;
20539 SDNode *MulVar = AddNode.getOperand(0).getNode();
20540
20541 // OtherOp is what we're multiplying against the constant.
20542 if (User->getOperand(0) == ConstNode)
20543 OtherOp = User->getOperand(1).getNode();
20544 else
20545 OtherOp = User->getOperand(0).getNode();
20546
20547 // Check to see if multiply is with the same operand of our "add".
20548 //
20549 // ConstNode = CONST
20550 // User = ConstNode * A <-- visiting User. OtherOp is A.
20551 // ...
20552 // AddNode = (A + c1) <-- MulVar is A.
20553 // = AddNode * ConstNode <-- current visiting instruction.
20554 //
20555 // If we make this transformation, we will have a common
20556 // multiply (ConstNode * A) that we can save.
20557 if (OtherOp == MulVar)
20558 return true;
20559
20560 // Now check to see if a future expansion will give us a common
20561 // multiply.
20562 //
20563 // ConstNode = CONST
20564 // AddNode = (A + c1)
20565 // ... = AddNode * ConstNode <-- current visiting instruction.
20566 // ...
20567 // OtherOp = (A + c2)
20568 // User = OtherOp * ConstNode <-- visiting User.
20569 //
20570 // If we make this transformation, we will have a common
20571 // multiply (CONST * A) after we also do the same transformation
20572 // to the "t2" instruction.
20573 if (OtherOp->getOpcode() == ISD::ADD &&
20575 OtherOp->getOperand(0).getNode() == MulVar)
20576 return true;
20577 }
20578 }
20579
20580 // Didn't find a case where this would be profitable.
20581 return false;
20582}
20583
20584SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20585 unsigned NumStores) {
20588 SDLoc StoreDL(StoreNodes[0].MemNode);
20589
20590 for (unsigned i = 0; i < NumStores; ++i) {
20591 Visited.insert(StoreNodes[i].MemNode);
20592 }
20593
20594 // don't include nodes that are children or repeated nodes.
20595 for (unsigned i = 0; i < NumStores; ++i) {
20596 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20597 Chains.push_back(StoreNodes[i].MemNode->getChain());
20598 }
20599
20600 assert(!Chains.empty() && "Chain should have generated a chain");
20601 return DAG.getTokenFactor(StoreDL, Chains);
20602}
20603
20604bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20605 const Value *UnderlyingObj = nullptr;
20606 for (const auto &MemOp : StoreNodes) {
20607 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20608 // Pseudo value like stack frame has its own frame index and size, should
20609 // not use the first store's frame index for other frames.
20610 if (MMO->getPseudoValue())
20611 return false;
20612
20613 if (!MMO->getValue())
20614 return false;
20615
20616 const Value *Obj = getUnderlyingObject(MMO->getValue());
20617
20618 if (UnderlyingObj && UnderlyingObj != Obj)
20619 return false;
20620
20621 if (!UnderlyingObj)
20622 UnderlyingObj = Obj;
20623 }
20624
20625 return true;
20626}
20627
20628bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20629 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20630 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20631 // Make sure we have something to merge.
20632 if (NumStores < 2)
20633 return false;
20634
20635 assert((!UseTrunc || !UseVector) &&
20636 "This optimization cannot emit a vector truncating store");
20637
20638 // The latest Node in the DAG.
20639 SDLoc DL(StoreNodes[0].MemNode);
20640
20641 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20642 unsigned SizeInBits = NumStores * ElementSizeBits;
20643 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20644
20645 std::optional<MachineMemOperand::Flags> Flags;
20646 AAMDNodes AAInfo;
20647 for (unsigned I = 0; I != NumStores; ++I) {
20648 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20649 if (!Flags) {
20650 Flags = St->getMemOperand()->getFlags();
20651 AAInfo = St->getAAInfo();
20652 continue;
20653 }
20654 // Skip merging if there's an inconsistent flag.
20655 if (Flags != St->getMemOperand()->getFlags())
20656 return false;
20657 // Concatenate AA metadata.
20658 AAInfo = AAInfo.concat(St->getAAInfo());
20659 }
20660
20661 EVT StoreTy;
20662 if (UseVector) {
20663 unsigned Elts = NumStores * NumMemElts;
20664 // Get the type for the merged vector store.
20665 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20666 } else
20667 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20668
20669 SDValue StoredVal;
20670 if (UseVector) {
20671 if (IsConstantSrc) {
20672 SmallVector<SDValue, 8> BuildVector;
20673 for (unsigned I = 0; I != NumStores; ++I) {
20674 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20675 SDValue Val = St->getValue();
20676 // If constant is of the wrong type, convert it now. This comes up
20677 // when one of our stores was truncating.
20678 if (MemVT != Val.getValueType()) {
20679 Val = peekThroughBitcasts(Val);
20680 // Deal with constants of wrong size.
20681 if (ElementSizeBits != Val.getValueSizeInBits()) {
20682 auto *C = dyn_cast<ConstantSDNode>(Val);
20683 if (!C)
20684 // Not clear how to truncate FP values.
20685 // TODO: Handle truncation of build_vector constants
20686 return false;
20687
20688 EVT IntMemVT =
20690 Val = DAG.getConstant(C->getAPIntValue()
20691 .zextOrTrunc(Val.getValueSizeInBits())
20692 .zextOrTrunc(ElementSizeBits),
20693 SDLoc(C), IntMemVT);
20694 }
20695 // Make sure correctly size type is the correct type.
20696 Val = DAG.getBitcast(MemVT, Val);
20697 }
20698 BuildVector.push_back(Val);
20699 }
20700 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20702 DL, StoreTy, BuildVector);
20703 } else {
20705 for (unsigned i = 0; i < NumStores; ++i) {
20706 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20708 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20709 // type MemVT. If the underlying value is not the correct
20710 // type, but it is an extraction of an appropriate vector we
20711 // can recast Val to be of the correct type. This may require
20712 // converting between EXTRACT_VECTOR_ELT and
20713 // EXTRACT_SUBVECTOR.
20714 if ((MemVT != Val.getValueType()) &&
20717 EVT MemVTScalarTy = MemVT.getScalarType();
20718 // We may need to add a bitcast here to get types to line up.
20719 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20720 Val = DAG.getBitcast(MemVT, Val);
20721 } else if (MemVT.isVector() &&
20723 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20724 } else {
20725 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20727 SDValue Vec = Val.getOperand(0);
20728 SDValue Idx = Val.getOperand(1);
20729 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20730 }
20731 }
20732 Ops.push_back(Val);
20733 }
20734
20735 // Build the extracted vector elements back into a vector.
20736 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20738 DL, StoreTy, Ops);
20739 }
20740 } else {
20741 // We should always use a vector store when merging extracted vector
20742 // elements, so this path implies a store of constants.
20743 assert(IsConstantSrc && "Merged vector elements should use vector store");
20744
20745 APInt StoreInt(SizeInBits, 0);
20746
20747 // Construct a single integer constant which is made of the smaller
20748 // constant inputs.
20749 bool IsLE = DAG.getDataLayout().isLittleEndian();
20750 for (unsigned i = 0; i < NumStores; ++i) {
20751 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20752 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20753
20754 SDValue Val = St->getValue();
20755 Val = peekThroughBitcasts(Val);
20756 StoreInt <<= ElementSizeBits;
20757 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20758 StoreInt |= C->getAPIntValue()
20759 .zextOrTrunc(ElementSizeBits)
20760 .zextOrTrunc(SizeInBits);
20761 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20762 StoreInt |= C->getValueAPF()
20763 .bitcastToAPInt()
20764 .zextOrTrunc(ElementSizeBits)
20765 .zextOrTrunc(SizeInBits);
20766 // If fp truncation is necessary give up for now.
20767 if (MemVT.getSizeInBits() != ElementSizeBits)
20768 return false;
20769 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20771 // Not yet handled
20772 return false;
20773 } else {
20774 llvm_unreachable("Invalid constant element type");
20775 }
20776 }
20777
20778 // Create the new Load and Store operations.
20779 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20780 }
20781
20782 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20783 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20784 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20785
20786 // make sure we use trunc store if it's necessary to be legal.
20787 // When generate the new widen store, if the first store's pointer info can
20788 // not be reused, discard the pointer info except the address space because
20789 // now the widen store can not be represented by the original pointer info
20790 // which is for the narrow memory object.
20791 SDValue NewStore;
20792 if (!UseTrunc) {
20793 NewStore = DAG.getStore(
20794 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20795 CanReusePtrInfo
20796 ? FirstInChain->getPointerInfo()
20797 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20798 FirstInChain->getAlign(), *Flags, AAInfo);
20799 } else { // Must be realized as a trunc store
20800 EVT LegalizedStoredValTy =
20801 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20802 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20803 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20804 SDValue ExtendedStoreVal =
20805 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20806 LegalizedStoredValTy);
20807 NewStore = DAG.getTruncStore(
20808 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20809 CanReusePtrInfo
20810 ? FirstInChain->getPointerInfo()
20811 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20812 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20813 AAInfo);
20814 }
20815
20816 // Replace all merged stores with the new store.
20817 for (unsigned i = 0; i < NumStores; ++i)
20818 CombineTo(StoreNodes[i].MemNode, NewStore);
20819
20820 AddToWorklist(NewChain.getNode());
20821 return true;
20822}
20823
20824SDNode *
20825DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
20826 SmallVectorImpl<MemOpLink> &StoreNodes) {
20827 // This holds the base pointer, index, and the offset in bytes from the base
20828 // pointer. We must have a base and an offset. Do not handle stores to undef
20829 // base pointers.
20831 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20832 return nullptr;
20833
20835 StoreSource StoreSrc = getStoreSource(Val);
20836 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20837
20838 // Match on loadbaseptr if relevant.
20839 EVT MemVT = St->getMemoryVT();
20840 BaseIndexOffset LBasePtr;
20841 EVT LoadVT;
20842 if (StoreSrc == StoreSource::Load) {
20843 auto *Ld = cast<LoadSDNode>(Val);
20844 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20845 LoadVT = Ld->getMemoryVT();
20846 // Load and store should be the same type.
20847 if (MemVT != LoadVT)
20848 return nullptr;
20849 // Loads must only have one use.
20850 if (!Ld->hasNUsesOfValue(1, 0))
20851 return nullptr;
20852 // The memory operands must not be volatile/indexed/atomic.
20853 // TODO: May be able to relax for unordered atomics (see D66309)
20854 if (!Ld->isSimple() || Ld->isIndexed())
20855 return nullptr;
20856 }
20857 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20858 int64_t &Offset) -> bool {
20859 // The memory operands must not be volatile/indexed/atomic.
20860 // TODO: May be able to relax for unordered atomics (see D66309)
20861 if (!Other->isSimple() || Other->isIndexed())
20862 return false;
20863 // Don't mix temporal stores with non-temporal stores.
20864 if (St->isNonTemporal() != Other->isNonTemporal())
20865 return false;
20867 return false;
20868 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20869 // Allow merging constants of different types as integers.
20870 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20871 : Other->getMemoryVT() != MemVT;
20872 switch (StoreSrc) {
20873 case StoreSource::Load: {
20874 if (NoTypeMatch)
20875 return false;
20876 // The Load's Base Ptr must also match.
20877 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20878 if (!OtherLd)
20879 return false;
20880 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20881 if (LoadVT != OtherLd->getMemoryVT())
20882 return false;
20883 // Loads must only have one use.
20884 if (!OtherLd->hasNUsesOfValue(1, 0))
20885 return false;
20886 // The memory operands must not be volatile/indexed/atomic.
20887 // TODO: May be able to relax for unordered atomics (see D66309)
20888 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20889 return false;
20890 // Don't mix temporal loads with non-temporal loads.
20891 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20892 return false;
20893 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20894 *OtherLd))
20895 return false;
20896 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20897 return false;
20898 break;
20899 }
20900 case StoreSource::Constant:
20901 if (NoTypeMatch)
20902 return false;
20903 if (getStoreSource(OtherBC) != StoreSource::Constant)
20904 return false;
20905 break;
20906 case StoreSource::Extract:
20907 // Do not merge truncated stores here.
20908 if (Other->isTruncatingStore())
20909 return false;
20910 if (!MemVT.bitsEq(OtherBC.getValueType()))
20911 return false;
20912 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20913 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20914 return false;
20915 break;
20916 default:
20917 llvm_unreachable("Unhandled store source for merging");
20918 }
20920 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20921 };
20922
20923 // We are looking for a root node which is an ancestor to all mergable
20924 // stores. We search up through a load, to our root and then down
20925 // through all children. For instance we will find Store{1,2,3} if
20926 // St is Store1, Store2. or Store3 where the root is not a load
20927 // which always true for nonvolatile ops. TODO: Expand
20928 // the search to find all valid candidates through multiple layers of loads.
20929 //
20930 // Root
20931 // |-------|-------|
20932 // Load Load Store3
20933 // | |
20934 // Store1 Store2
20935 //
20936 // FIXME: We should be able to climb and
20937 // descend TokenFactors to find candidates as well.
20938
20939 SDNode *RootNode = St->getChain().getNode();
20940 // Bail out if we already analyzed this root node and found nothing.
20941 if (ChainsWithoutMergeableStores.contains(RootNode))
20942 return nullptr;
20943
20944 // Check if the pair of StoreNode and the RootNode already bail out many
20945 // times which is over the limit in dependence check.
20946 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20947 SDNode *RootNode) -> bool {
20948 auto RootCount = StoreRootCountMap.find(StoreNode);
20949 return RootCount != StoreRootCountMap.end() &&
20950 RootCount->second.first == RootNode &&
20951 RootCount->second.second > StoreMergeDependenceLimit;
20952 };
20953
20954 auto TryToAddCandidate = [&](SDUse &Use) {
20955 // This must be a chain use.
20956 if (Use.getOperandNo() != 0)
20957 return;
20958 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
20960 int64_t PtrDiff;
20961 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20962 !OverLimitInDependenceCheck(OtherStore, RootNode))
20963 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20964 }
20965 };
20966
20967 unsigned NumNodesExplored = 0;
20968 const unsigned MaxSearchNodes = 1024;
20969 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20970 RootNode = Ldn->getChain().getNode();
20971 // Bail out if we already analyzed this root node and found nothing.
20972 if (ChainsWithoutMergeableStores.contains(RootNode))
20973 return nullptr;
20974 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20975 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20976 SDNode *User = I->getUser();
20977 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
20978 for (SDUse &U2 : User->uses())
20979 TryToAddCandidate(U2);
20980 }
20981 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20982 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
20983 TryToAddCandidate(*I);
20984 }
20985 }
20986 } else {
20987 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20988 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20989 TryToAddCandidate(*I);
20990 }
20991
20992 return RootNode;
20993}
20994
20995// We need to check that merging these stores does not cause a loop in the
20996// DAG. Any store candidate may depend on another candidate indirectly through
20997// its operands. Check in parallel by searching up from operands of candidates.
20998bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20999 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21000 SDNode *RootNode) {
21001 // FIXME: We should be able to truncate a full search of
21002 // predecessors by doing a BFS and keeping tabs the originating
21003 // stores from which worklist nodes come from in a similar way to
21004 // TokenFactor simplfication.
21005
21008
21009 // RootNode is a predecessor to all candidates so we need not search
21010 // past it. Add RootNode (peeking through TokenFactors). Do not count
21011 // these towards size check.
21012
21013 Worklist.push_back(RootNode);
21014 while (!Worklist.empty()) {
21015 auto N = Worklist.pop_back_val();
21016 if (!Visited.insert(N).second)
21017 continue; // Already present in Visited.
21018 if (N->getOpcode() == ISD::TokenFactor) {
21019 for (SDValue Op : N->ops())
21020 Worklist.push_back(Op.getNode());
21021 }
21022 }
21023
21024 // Don't count pruning nodes towards max.
21025 unsigned int Max = 1024 + Visited.size();
21026 // Search Ops of store candidates.
21027 for (unsigned i = 0; i < NumStores; ++i) {
21028 SDNode *N = StoreNodes[i].MemNode;
21029 // Of the 4 Store Operands:
21030 // * Chain (Op 0) -> We have already considered these
21031 // in candidate selection, but only by following the
21032 // chain dependencies. We could still have a chain
21033 // dependency to a load, that has a non-chain dep to
21034 // another load, that depends on a store, etc. So it is
21035 // possible to have dependencies that consist of a mix
21036 // of chain and non-chain deps, and we need to include
21037 // chain operands in the analysis here..
21038 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21039 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21040 // but aren't necessarily fromt the same base node, so
21041 // cycles possible (e.g. via indexed store).
21042 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21043 // non-indexed stores). Not constant on all targets (e.g. ARM)
21044 // and so can participate in a cycle.
21045 for (const SDValue &Op : N->op_values())
21046 Worklist.push_back(Op.getNode());
21047 }
21048 // Search through DAG. We can stop early if we find a store node.
21049 for (unsigned i = 0; i < NumStores; ++i)
21050 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21051 Max)) {
21052 // If the searching bail out, record the StoreNode and RootNode in the
21053 // StoreRootCountMap. If we have seen the pair many times over a limit,
21054 // we won't add the StoreNode into StoreNodes set again.
21055 if (Visited.size() >= Max) {
21056 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21057 if (RootCount.first == RootNode)
21058 RootCount.second++;
21059 else
21060 RootCount = {RootNode, 1};
21061 }
21062 return false;
21063 }
21064 return true;
21065}
21066
21067unsigned
21068DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21069 int64_t ElementSizeBytes) const {
21070 while (true) {
21071 // Find a store past the width of the first store.
21072 size_t StartIdx = 0;
21073 while ((StartIdx + 1 < StoreNodes.size()) &&
21074 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21075 StoreNodes[StartIdx + 1].OffsetFromBase)
21076 ++StartIdx;
21077
21078 // Bail if we don't have enough candidates to merge.
21079 if (StartIdx + 1 >= StoreNodes.size())
21080 return 0;
21081
21082 // Trim stores that overlapped with the first store.
21083 if (StartIdx)
21084 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21085
21086 // Scan the memory operations on the chain and find the first
21087 // non-consecutive store memory address.
21088 unsigned NumConsecutiveStores = 1;
21089 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21090 // Check that the addresses are consecutive starting from the second
21091 // element in the list of stores.
21092 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21093 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21094 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21095 break;
21096 NumConsecutiveStores = i + 1;
21097 }
21098 if (NumConsecutiveStores > 1)
21099 return NumConsecutiveStores;
21100
21101 // There are no consecutive stores at the start of the list.
21102 // Remove the first store and try again.
21103 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21104 }
21105}
21106
21107bool DAGCombiner::tryStoreMergeOfConstants(
21108 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21109 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21110 LLVMContext &Context = *DAG.getContext();
21111 const DataLayout &DL = DAG.getDataLayout();
21112 int64_t ElementSizeBytes = MemVT.getStoreSize();
21113 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21114 bool MadeChange = false;
21115
21116 // Store the constants into memory as one consecutive store.
21117 while (NumConsecutiveStores >= 2) {
21118 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21119 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21120 Align FirstStoreAlign = FirstInChain->getAlign();
21121 unsigned LastLegalType = 1;
21122 unsigned LastLegalVectorType = 1;
21123 bool LastIntegerTrunc = false;
21124 bool NonZero = false;
21125 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21126 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21127 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21128 SDValue StoredVal = ST->getValue();
21129 bool IsElementZero = false;
21130 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21131 IsElementZero = C->isZero();
21132 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21133 IsElementZero = C->getConstantFPValue()->isNullValue();
21134 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21135 IsElementZero = true;
21136 if (IsElementZero) {
21137 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21138 FirstZeroAfterNonZero = i;
21139 }
21140 NonZero |= !IsElementZero;
21141
21142 // Find a legal type for the constant store.
21143 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21144 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21145 unsigned IsFast = 0;
21146
21147 // Break early when size is too large to be legal.
21148 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21149 break;
21150
21151 if (TLI.isTypeLegal(StoreTy) &&
21152 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21153 DAG.getMachineFunction()) &&
21154 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21155 *FirstInChain->getMemOperand(), &IsFast) &&
21156 IsFast) {
21157 LastIntegerTrunc = false;
21158 LastLegalType = i + 1;
21159 // Or check whether a truncstore is legal.
21160 } else if (TLI.getTypeAction(Context, StoreTy) ==
21162 EVT LegalizedStoredValTy =
21163 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21164 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21165 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21166 DAG.getMachineFunction()) &&
21167 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21168 *FirstInChain->getMemOperand(), &IsFast) &&
21169 IsFast) {
21170 LastIntegerTrunc = true;
21171 LastLegalType = i + 1;
21172 }
21173 }
21174
21175 // We only use vectors if the target allows it and the function is not
21176 // marked with the noimplicitfloat attribute.
21177 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21178 AllowVectors) {
21179 // Find a legal type for the vector store.
21180 unsigned Elts = (i + 1) * NumMemElts;
21181 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21182 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21183 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21184 TLI.allowsMemoryAccess(Context, DL, Ty,
21185 *FirstInChain->getMemOperand(), &IsFast) &&
21186 IsFast)
21187 LastLegalVectorType = i + 1;
21188 }
21189 }
21190
21191 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
21192 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
21193 bool UseTrunc = LastIntegerTrunc && !UseVector;
21194
21195 // Check if we found a legal integer type that creates a meaningful
21196 // merge.
21197 if (NumElem < 2) {
21198 // We know that candidate stores are in order and of correct
21199 // shape. While there is no mergeable sequence from the
21200 // beginning one may start later in the sequence. The only
21201 // reason a merge of size N could have failed where another of
21202 // the same size would not have, is if the alignment has
21203 // improved or we've dropped a non-zero value. Drop as many
21204 // candidates as we can here.
21205 unsigned NumSkip = 1;
21206 while ((NumSkip < NumConsecutiveStores) &&
21207 (NumSkip < FirstZeroAfterNonZero) &&
21208 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21209 NumSkip++;
21210
21211 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21212 NumConsecutiveStores -= NumSkip;
21213 continue;
21214 }
21215
21216 // Check that we can merge these candidates without causing a cycle.
21217 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21218 RootNode)) {
21219 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21220 NumConsecutiveStores -= NumElem;
21221 continue;
21222 }
21223
21224 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
21225 /*IsConstantSrc*/ true,
21226 UseVector, UseTrunc);
21227
21228 // Remove merged stores for next iteration.
21229 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21230 NumConsecutiveStores -= NumElem;
21231 }
21232 return MadeChange;
21233}
21234
21235bool DAGCombiner::tryStoreMergeOfExtracts(
21236 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21237 EVT MemVT, SDNode *RootNode) {
21238 LLVMContext &Context = *DAG.getContext();
21239 const DataLayout &DL = DAG.getDataLayout();
21240 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21241 bool MadeChange = false;
21242
21243 // Loop on Consecutive Stores on success.
21244 while (NumConsecutiveStores >= 2) {
21245 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21246 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21247 Align FirstStoreAlign = FirstInChain->getAlign();
21248 unsigned NumStoresToMerge = 1;
21249 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21250 // Find a legal type for the vector store.
21251 unsigned Elts = (i + 1) * NumMemElts;
21252 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21253 unsigned IsFast = 0;
21254
21255 // Break early when size is too large to be legal.
21256 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
21257 break;
21258
21259 if (TLI.isTypeLegal(Ty) &&
21260 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21261 TLI.allowsMemoryAccess(Context, DL, Ty,
21262 *FirstInChain->getMemOperand(), &IsFast) &&
21263 IsFast)
21264 NumStoresToMerge = i + 1;
21265 }
21266
21267 // Check if we found a legal integer type creating a meaningful
21268 // merge.
21269 if (NumStoresToMerge < 2) {
21270 // We know that candidate stores are in order and of correct
21271 // shape. While there is no mergeable sequence from the
21272 // beginning one may start later in the sequence. The only
21273 // reason a merge of size N could have failed where another of
21274 // the same size would not have, is if the alignment has
21275 // improved. Drop as many candidates as we can here.
21276 unsigned NumSkip = 1;
21277 while ((NumSkip < NumConsecutiveStores) &&
21278 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21279 NumSkip++;
21280
21281 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21282 NumConsecutiveStores -= NumSkip;
21283 continue;
21284 }
21285
21286 // Check that we can merge these candidates without causing a cycle.
21287 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
21288 RootNode)) {
21289 StoreNodes.erase(StoreNodes.begin(),
21290 StoreNodes.begin() + NumStoresToMerge);
21291 NumConsecutiveStores -= NumStoresToMerge;
21292 continue;
21293 }
21294
21295 MadeChange |= mergeStoresOfConstantsOrVecElts(
21296 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
21297 /*UseVector*/ true, /*UseTrunc*/ false);
21298
21299 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
21300 NumConsecutiveStores -= NumStoresToMerge;
21301 }
21302 return MadeChange;
21303}
21304
21305bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
21306 unsigned NumConsecutiveStores, EVT MemVT,
21307 SDNode *RootNode, bool AllowVectors,
21308 bool IsNonTemporalStore,
21309 bool IsNonTemporalLoad) {
21310 LLVMContext &Context = *DAG.getContext();
21311 const DataLayout &DL = DAG.getDataLayout();
21312 int64_t ElementSizeBytes = MemVT.getStoreSize();
21313 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21314 bool MadeChange = false;
21315
21316 // Look for load nodes which are used by the stored values.
21317 SmallVector<MemOpLink, 8> LoadNodes;
21318
21319 // Find acceptable loads. Loads need to have the same chain (token factor),
21320 // must not be zext, volatile, indexed, and they must be consecutive.
21321 BaseIndexOffset LdBasePtr;
21322
21323 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21324 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21326 LoadSDNode *Ld = cast<LoadSDNode>(Val);
21327
21328 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
21329 // If this is not the first ptr that we check.
21330 int64_t LdOffset = 0;
21331 if (LdBasePtr.getBase().getNode()) {
21332 // The base ptr must be the same.
21333 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
21334 break;
21335 } else {
21336 // Check that all other base pointers are the same as this one.
21337 LdBasePtr = LdPtr;
21338 }
21339
21340 // We found a potential memory operand to merge.
21341 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
21342 }
21343
21344 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
21345 Align RequiredAlignment;
21346 bool NeedRotate = false;
21347 if (LoadNodes.size() == 2) {
21348 // If we have load/store pair instructions and we only have two values,
21349 // don't bother merging.
21350 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
21351 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
21352 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
21353 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
21354 break;
21355 }
21356 // If the loads are reversed, see if we can rotate the halves into place.
21357 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
21358 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
21359 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
21360 if (Offset0 - Offset1 == ElementSizeBytes &&
21361 (hasOperation(ISD::ROTL, PairVT) ||
21362 hasOperation(ISD::ROTR, PairVT))) {
21363 std::swap(LoadNodes[0], LoadNodes[1]);
21364 NeedRotate = true;
21365 }
21366 }
21367 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21368 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21369 Align FirstStoreAlign = FirstInChain->getAlign();
21370 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
21371
21372 // Scan the memory operations on the chain and find the first
21373 // non-consecutive load memory address. These variables hold the index in
21374 // the store node array.
21375
21376 unsigned LastConsecutiveLoad = 1;
21377
21378 // This variable refers to the size and not index in the array.
21379 unsigned LastLegalVectorType = 1;
21380 unsigned LastLegalIntegerType = 1;
21381 bool isDereferenceable = true;
21382 bool DoIntegerTruncate = false;
21383 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
21384 SDValue LoadChain = FirstLoad->getChain();
21385 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
21386 // All loads must share the same chain.
21387 if (LoadNodes[i].MemNode->getChain() != LoadChain)
21388 break;
21389
21390 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
21391 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21392 break;
21393 LastConsecutiveLoad = i;
21394
21395 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
21396 isDereferenceable = false;
21397
21398 // Find a legal type for the vector store.
21399 unsigned Elts = (i + 1) * NumMemElts;
21400 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21401
21402 // Break early when size is too large to be legal.
21403 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21404 break;
21405
21406 unsigned IsFastSt = 0;
21407 unsigned IsFastLd = 0;
21408 // Don't try vector types if we need a rotate. We may still fail the
21409 // legality checks for the integer type, but we can't handle the rotate
21410 // case with vectors.
21411 // FIXME: We could use a shuffle in place of the rotate.
21412 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
21413 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21414 DAG.getMachineFunction()) &&
21415 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21416 *FirstInChain->getMemOperand(), &IsFastSt) &&
21417 IsFastSt &&
21418 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21419 *FirstLoad->getMemOperand(), &IsFastLd) &&
21420 IsFastLd) {
21421 LastLegalVectorType = i + 1;
21422 }
21423
21424 // Find a legal type for the integer store.
21425 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21426 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21427 if (TLI.isTypeLegal(StoreTy) &&
21428 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21429 DAG.getMachineFunction()) &&
21430 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21431 *FirstInChain->getMemOperand(), &IsFastSt) &&
21432 IsFastSt &&
21433 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21434 *FirstLoad->getMemOperand(), &IsFastLd) &&
21435 IsFastLd) {
21436 LastLegalIntegerType = i + 1;
21437 DoIntegerTruncate = false;
21438 // Or check whether a truncstore and extload is legal.
21439 } else if (TLI.getTypeAction(Context, StoreTy) ==
21441 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
21442 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21443 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21444 DAG.getMachineFunction()) &&
21445 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21446 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21447 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
21448 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21449 *FirstInChain->getMemOperand(), &IsFastSt) &&
21450 IsFastSt &&
21451 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21452 *FirstLoad->getMemOperand(), &IsFastLd) &&
21453 IsFastLd) {
21454 LastLegalIntegerType = i + 1;
21455 DoIntegerTruncate = true;
21456 }
21457 }
21458 }
21459
21460 // Only use vector types if the vector type is larger than the integer
21461 // type. If they are the same, use integers.
21462 bool UseVectorTy =
21463 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
21464 unsigned LastLegalType =
21465 std::max(LastLegalVectorType, LastLegalIntegerType);
21466
21467 // We add +1 here because the LastXXX variables refer to location while
21468 // the NumElem refers to array/index size.
21469 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
21470 NumElem = std::min(LastLegalType, NumElem);
21471 Align FirstLoadAlign = FirstLoad->getAlign();
21472
21473 if (NumElem < 2) {
21474 // We know that candidate stores are in order and of correct
21475 // shape. While there is no mergeable sequence from the
21476 // beginning one may start later in the sequence. The only
21477 // reason a merge of size N could have failed where another of
21478 // the same size would not have is if the alignment or either
21479 // the load or store has improved. Drop as many candidates as we
21480 // can here.
21481 unsigned NumSkip = 1;
21482 while ((NumSkip < LoadNodes.size()) &&
21483 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21484 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21485 NumSkip++;
21486 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21487 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
21488 NumConsecutiveStores -= NumSkip;
21489 continue;
21490 }
21491
21492 // Check that we can merge these candidates without causing a cycle.
21493 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21494 RootNode)) {
21495 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21496 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21497 NumConsecutiveStores -= NumElem;
21498 continue;
21499 }
21500
21501 // Find if it is better to use vectors or integers to load and store
21502 // to memory.
21503 EVT JointMemOpVT;
21504 if (UseVectorTy) {
21505 // Find a legal type for the vector store.
21506 unsigned Elts = NumElem * NumMemElts;
21507 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21508 } else {
21509 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21510 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21511 }
21512
21513 SDLoc LoadDL(LoadNodes[0].MemNode);
21514 SDLoc StoreDL(StoreNodes[0].MemNode);
21515
21516 // The merged loads are required to have the same incoming chain, so
21517 // using the first's chain is acceptable.
21518
21519 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21520 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21521 AddToWorklist(NewStoreChain.getNode());
21522
21523 MachineMemOperand::Flags LdMMOFlags =
21524 isDereferenceable ? MachineMemOperand::MODereferenceable
21526 if (IsNonTemporalLoad)
21528
21529 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21530
21531 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21534
21535 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21536
21537 SDValue NewLoad, NewStore;
21538 if (UseVectorTy || !DoIntegerTruncate) {
21539 NewLoad = DAG.getLoad(
21540 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21541 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21542 SDValue StoreOp = NewLoad;
21543 if (NeedRotate) {
21544 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21545 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21546 "Unexpected type for rotate-able load pair");
21547 SDValue RotAmt =
21548 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21549 // Target can convert to the identical ROTR if it does not have ROTL.
21550 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21551 }
21552 NewStore = DAG.getStore(
21553 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21554 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21555 : MachinePointerInfo(FirstStoreAS),
21556 FirstStoreAlign, StMMOFlags);
21557 } else { // This must be the truncstore/extload case
21558 EVT ExtendedTy =
21559 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21560 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21561 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21562 FirstLoad->getPointerInfo(), JointMemOpVT,
21563 FirstLoadAlign, LdMMOFlags);
21564 NewStore = DAG.getTruncStore(
21565 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21566 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21567 : MachinePointerInfo(FirstStoreAS),
21568 JointMemOpVT, FirstInChain->getAlign(),
21569 FirstInChain->getMemOperand()->getFlags());
21570 }
21571
21572 // Transfer chain users from old loads to the new load.
21573 for (unsigned i = 0; i < NumElem; ++i) {
21574 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21576 SDValue(NewLoad.getNode(), 1));
21577 }
21578
21579 // Replace all stores with the new store. Recursively remove corresponding
21580 // values if they are no longer used.
21581 for (unsigned i = 0; i < NumElem; ++i) {
21582 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21583 CombineTo(StoreNodes[i].MemNode, NewStore);
21584 if (Val->use_empty())
21585 recursivelyDeleteUnusedNodes(Val.getNode());
21586 }
21587
21588 MadeChange = true;
21589 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21590 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21591 NumConsecutiveStores -= NumElem;
21592 }
21593 return MadeChange;
21594}
21595
21596bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21597 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21598 return false;
21599
21600 // TODO: Extend this function to merge stores of scalable vectors.
21601 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21602 // store since we know <vscale x 16 x i8> is exactly twice as large as
21603 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21604 EVT MemVT = St->getMemoryVT();
21605 if (MemVT.isScalableVT())
21606 return false;
21607 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21608 return false;
21609
21610 // This function cannot currently deal with non-byte-sized memory sizes.
21611 int64_t ElementSizeBytes = MemVT.getStoreSize();
21612 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21613 return false;
21614
21615 // Do not bother looking at stored values that are not constants, loads, or
21616 // extracted vector elements.
21617 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21618 const StoreSource StoreSrc = getStoreSource(StoredVal);
21619 if (StoreSrc == StoreSource::Unknown)
21620 return false;
21621
21622 SmallVector<MemOpLink, 8> StoreNodes;
21623 // Find potential store merge candidates by searching through chain sub-DAG
21624 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
21625
21626 // Check if there is anything to merge.
21627 if (StoreNodes.size() < 2)
21628 return false;
21629
21630 // Sort the memory operands according to their distance from the
21631 // base pointer.
21632 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21633 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21634 });
21635
21636 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21637 Attribute::NoImplicitFloat);
21638 bool IsNonTemporalStore = St->isNonTemporal();
21639 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21640 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21641
21642 // Store Merge attempts to merge the lowest stores. This generally
21643 // works out as if successful, as the remaining stores are checked
21644 // after the first collection of stores is merged. However, in the
21645 // case that a non-mergeable store is found first, e.g., {p[-2],
21646 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21647 // mergeable cases. To prevent this, we prune such stores from the
21648 // front of StoreNodes here.
21649 bool MadeChange = false;
21650 while (StoreNodes.size() > 1) {
21651 unsigned NumConsecutiveStores =
21652 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21653 // There are no more stores in the list to examine.
21654 if (NumConsecutiveStores == 0)
21655 return MadeChange;
21656
21657 // We have at least 2 consecutive stores. Try to merge them.
21658 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21659 switch (StoreSrc) {
21660 case StoreSource::Constant:
21661 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21662 MemVT, RootNode, AllowVectors);
21663 break;
21664
21665 case StoreSource::Extract:
21666 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21667 MemVT, RootNode);
21668 break;
21669
21670 case StoreSource::Load:
21671 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21672 MemVT, RootNode, AllowVectors,
21673 IsNonTemporalStore, IsNonTemporalLoad);
21674 break;
21675
21676 default:
21677 llvm_unreachable("Unhandled store source type");
21678 }
21679 }
21680
21681 // Remember if we failed to optimize, to save compile time.
21682 if (!MadeChange)
21683 ChainsWithoutMergeableStores.insert(RootNode);
21684
21685 return MadeChange;
21686}
21687
21688SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21689 SDLoc SL(ST);
21690 SDValue ReplStore;
21691
21692 // Replace the chain to avoid dependency.
21693 if (ST->isTruncatingStore()) {
21694 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21695 ST->getBasePtr(), ST->getMemoryVT(),
21696 ST->getMemOperand());
21697 } else {
21698 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21699 ST->getMemOperand());
21700 }
21701
21702 // Create token to keep both nodes around.
21703 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21704 MVT::Other, ST->getChain(), ReplStore);
21705
21706 // Make sure the new and old chains are cleaned up.
21707 AddToWorklist(Token.getNode());
21708
21709 // Don't add users to work list.
21710 return CombineTo(ST, Token, false);
21711}
21712
21713SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21714 SDValue Value = ST->getValue();
21715 if (Value.getOpcode() == ISD::TargetConstantFP)
21716 return SDValue();
21717
21718 if (!ISD::isNormalStore(ST))
21719 return SDValue();
21720
21721 SDLoc DL(ST);
21722
21723 SDValue Chain = ST->getChain();
21724 SDValue Ptr = ST->getBasePtr();
21725
21726 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21727
21728 // NOTE: If the original store is volatile, this transform must not increase
21729 // the number of stores. For example, on x86-32 an f64 can be stored in one
21730 // processor operation but an i64 (which is not legal) requires two. So the
21731 // transform should not be done in this case.
21732
21733 SDValue Tmp;
21734 switch (CFP->getSimpleValueType(0).SimpleTy) {
21735 default:
21736 llvm_unreachable("Unknown FP type");
21737 case MVT::f16: // We don't do this for these yet.
21738 case MVT::bf16:
21739 case MVT::f80:
21740 case MVT::f128:
21741 case MVT::ppcf128:
21742 return SDValue();
21743 case MVT::f32:
21744 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21745 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21746 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21747 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21748 MVT::i32);
21749 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21750 }
21751
21752 return SDValue();
21753 case MVT::f64:
21754 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21755 ST->isSimple()) ||
21756 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21757 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21758 getZExtValue(), SDLoc(CFP), MVT::i64);
21759 return DAG.getStore(Chain, DL, Tmp,
21760 Ptr, ST->getMemOperand());
21761 }
21762
21763 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21764 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21765 // Many FP stores are not made apparent until after legalize, e.g. for
21766 // argument passing. Since this is so common, custom legalize the
21767 // 64-bit integer store into two 32-bit stores.
21769 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21770 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21771 if (DAG.getDataLayout().isBigEndian())
21772 std::swap(Lo, Hi);
21773
21774 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21775 AAMDNodes AAInfo = ST->getAAInfo();
21776
21777 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21778 ST->getOriginalAlign(), MMOFlags, AAInfo);
21780 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21781 ST->getPointerInfo().getWithOffset(4),
21782 ST->getOriginalAlign(), MMOFlags, AAInfo);
21783 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21784 St0, St1);
21785 }
21786
21787 return SDValue();
21788 }
21789}
21790
21791// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21792//
21793// If a store of a load with an element inserted into it has no other
21794// uses in between the chain, then we can consider the vector store
21795// dead and replace it with just the single scalar element store.
21796SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21797 SDLoc DL(ST);
21798 SDValue Value = ST->getValue();
21799 SDValue Ptr = ST->getBasePtr();
21800 SDValue Chain = ST->getChain();
21801 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21802 return SDValue();
21803
21804 SDValue Elt = Value.getOperand(1);
21805 SDValue Idx = Value.getOperand(2);
21806
21807 // If the element isn't byte sized or is implicitly truncated then we can't
21808 // compute an offset.
21809 EVT EltVT = Elt.getValueType();
21810 if (!EltVT.isByteSized() ||
21811 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21812 return SDValue();
21813
21814 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21815 if (!Ld || Ld->getBasePtr() != Ptr ||
21816 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21817 !ISD::isNormalStore(ST) ||
21818 Ld->getAddressSpace() != ST->getAddressSpace() ||
21820 return SDValue();
21821
21822 unsigned IsFast;
21823 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21824 Elt.getValueType(), ST->getAddressSpace(),
21825 ST->getAlign(), ST->getMemOperand()->getFlags(),
21826 &IsFast) ||
21827 !IsFast)
21828 return SDValue();
21829
21830 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21831
21832 // If the offset is a known constant then try to recover the pointer
21833 // info
21834 SDValue NewPtr;
21835 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21836 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21837 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21838 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21839 } else {
21840 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21841 }
21842
21843 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21844 ST->getMemOperand()->getFlags());
21845}
21846
21847SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21848 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21849 SDValue Val = ST->getVal();
21850 EVT VT = Val.getValueType();
21851 EVT MemVT = ST->getMemoryVT();
21852
21853 if (MemVT.bitsLT(VT)) { // Is truncating store
21854 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21855 MemVT.getScalarSizeInBits());
21856 // See if we can simplify the operation with SimplifyDemandedBits, which
21857 // only works if the value has a single use.
21858 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21859 return SDValue(N, 0);
21860 }
21861
21862 return SDValue();
21863}
21864
21865SDValue DAGCombiner::visitSTORE(SDNode *N) {
21866 StoreSDNode *ST = cast<StoreSDNode>(N);
21867 SDValue Chain = ST->getChain();
21868 SDValue Value = ST->getValue();
21869 SDValue Ptr = ST->getBasePtr();
21870
21871 // If this is a store of a bit convert, store the input value if the
21872 // resultant store does not need a higher alignment than the original.
21873 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21874 ST->isUnindexed()) {
21875 EVT SVT = Value.getOperand(0).getValueType();
21876 // If the store is volatile, we only want to change the store type if the
21877 // resulting store is legal. Otherwise we might increase the number of
21878 // memory accesses. We don't care if the original type was legal or not
21879 // as we assume software couldn't rely on the number of accesses of an
21880 // illegal type.
21881 // TODO: May be able to relax for unordered atomics (see D66309)
21882 if (((!LegalOperations && ST->isSimple()) ||
21883 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21884 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21885 DAG, *ST->getMemOperand())) {
21886 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21887 ST->getMemOperand());
21888 }
21889 }
21890
21891 // Turn 'store undef, Ptr' -> nothing.
21892 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21893 return Chain;
21894
21895 // Try to infer better alignment information than the store already has.
21896 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21897 !ST->isAtomic()) {
21898 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21899 if (*Alignment > ST->getAlign() &&
21900 isAligned(*Alignment, ST->getSrcValueOffset())) {
21901 SDValue NewStore =
21902 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21903 ST->getMemoryVT(), *Alignment,
21904 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21905 // NewStore will always be N as we are only refining the alignment
21906 assert(NewStore.getNode() == N);
21907 (void)NewStore;
21908 }
21909 }
21910 }
21911
21912 // Try transforming a pair floating point load / store ops to integer
21913 // load / store ops.
21914 if (SDValue NewST = TransformFPLoadStorePair(N))
21915 return NewST;
21916
21917 // Try transforming several stores into STORE (BSWAP).
21918 if (SDValue Store = mergeTruncStores(ST))
21919 return Store;
21920
21921 if (ST->isUnindexed()) {
21922 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21923 // adjacent stores.
21924 if (findBetterNeighborChains(ST)) {
21925 // replaceStoreChain uses CombineTo, which handled all of the worklist
21926 // manipulation. Return the original node to not do anything else.
21927 return SDValue(ST, 0);
21928 }
21929 Chain = ST->getChain();
21930 }
21931
21932 // FIXME: is there such a thing as a truncating indexed store?
21933 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21934 Value.getValueType().isInteger() &&
21935 (!isa<ConstantSDNode>(Value) ||
21936 !cast<ConstantSDNode>(Value)->isOpaque())) {
21937 // Convert a truncating store of a extension into a standard store.
21938 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21939 Value.getOpcode() == ISD::SIGN_EXTEND ||
21940 Value.getOpcode() == ISD::ANY_EXTEND) &&
21941 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21942 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21943 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21944 ST->getMemOperand());
21945
21946 APInt TruncDemandedBits =
21947 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21948 ST->getMemoryVT().getScalarSizeInBits());
21949
21950 // See if we can simplify the operation with SimplifyDemandedBits, which
21951 // only works if the value has a single use.
21952 AddToWorklist(Value.getNode());
21953 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21954 // Re-visit the store if anything changed and the store hasn't been merged
21955 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21956 // node back to the worklist if necessary, but we also need to re-visit
21957 // the Store node itself.
21958 if (N->getOpcode() != ISD::DELETED_NODE)
21959 AddToWorklist(N);
21960 return SDValue(N, 0);
21961 }
21962
21963 // Otherwise, see if we can simplify the input to this truncstore with
21964 // knowledge that only the low bits are being used. For example:
21965 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21966 if (SDValue Shorter =
21967 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21968 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21969 ST->getMemOperand());
21970
21971 // If we're storing a truncated constant, see if we can simplify it.
21972 // TODO: Move this to targetShrinkDemandedConstant?
21973 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21974 if (!Cst->isOpaque()) {
21975 const APInt &CValue = Cst->getAPIntValue();
21976 APInt NewVal = CValue & TruncDemandedBits;
21977 if (NewVal != CValue) {
21978 SDValue Shorter =
21979 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21980 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21981 ST->getMemoryVT(), ST->getMemOperand());
21982 }
21983 }
21984 }
21985
21986 // If this is a load followed by a store to the same location, then the store
21987 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21988 // TODO: Add big-endian truncate support with test coverage.
21989 // TODO: Can relax for unordered atomics (see D66309)
21990 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21992 : Value;
21993 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21994 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21995 ST->isUnindexed() && ST->isSimple() &&
21996 Ld->getAddressSpace() == ST->getAddressSpace() &&
21997 // There can't be any side effects between the load and store, such as
21998 // a call or store.
22000 // The store is dead, remove it.
22001 return Chain;
22002 }
22003 }
22004
22005 // Try scalarizing vector stores of loads where we only change one element
22006 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22007 return NewST;
22008
22009 // TODO: Can relax for unordered atomics (see D66309)
22010 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22011 if (ST->isUnindexed() && ST->isSimple() &&
22012 ST1->isUnindexed() && ST1->isSimple()) {
22013 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22014 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22015 ST->getAddressSpace() == ST1->getAddressSpace()) {
22016 // If this is a store followed by a store with the same value to the
22017 // same location, then the store is dead/noop.
22018 return Chain;
22019 }
22020
22021 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22022 !ST1->getBasePtr().isUndef() &&
22023 ST->getAddressSpace() == ST1->getAddressSpace()) {
22024 // If we consider two stores and one smaller in size is a scalable
22025 // vector type and another one a bigger size store with a fixed type,
22026 // then we could not allow the scalable store removal because we don't
22027 // know its final size in the end.
22028 if (ST->getMemoryVT().isScalableVector() ||
22029 ST1->getMemoryVT().isScalableVector()) {
22030 if (ST1->getBasePtr() == Ptr &&
22031 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22032 ST->getMemoryVT().getStoreSize())) {
22033 CombineTo(ST1, ST1->getChain());
22034 return SDValue(N, 0);
22035 }
22036 } else {
22037 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22038 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22039 // If this is a store who's preceding store to a subset of the current
22040 // location and no one other node is chained to that store we can
22041 // effectively drop the store. Do not remove stores to undef as they
22042 // may be used as data sinks.
22043 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22044 ChainBase,
22045 ST1->getMemoryVT().getFixedSizeInBits())) {
22046 CombineTo(ST1, ST1->getChain());
22047 return SDValue(N, 0);
22048 }
22049 }
22050 }
22051 }
22052 }
22053
22054 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22055 // truncating store. We can do this even if this is already a truncstore.
22056 if ((Value.getOpcode() == ISD::FP_ROUND ||
22057 Value.getOpcode() == ISD::TRUNCATE) &&
22058 Value->hasOneUse() && ST->isUnindexed() &&
22059 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22060 ST->getMemoryVT(), LegalOperations)) {
22061 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22062 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22063 }
22064
22065 // Always perform this optimization before types are legal. If the target
22066 // prefers, also try this after legalization to catch stores that were created
22067 // by intrinsics or other nodes.
22068 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22069 while (true) {
22070 // There can be multiple store sequences on the same chain.
22071 // Keep trying to merge store sequences until we are unable to do so
22072 // or until we merge the last store on the chain.
22073 bool Changed = mergeConsecutiveStores(ST);
22074 if (!Changed) break;
22075 // Return N as merge only uses CombineTo and no worklist clean
22076 // up is necessary.
22077 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22078 return SDValue(N, 0);
22079 }
22080 }
22081
22082 // Try transforming N to an indexed store.
22083 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22084 return SDValue(N, 0);
22085
22086 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22087 //
22088 // Make sure to do this only after attempting to merge stores in order to
22089 // avoid changing the types of some subset of stores due to visit order,
22090 // preventing their merging.
22091 if (isa<ConstantFPSDNode>(ST->getValue())) {
22092 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22093 return NewSt;
22094 }
22095
22096 if (SDValue NewSt = splitMergedValStore(ST))
22097 return NewSt;
22098
22099 return ReduceLoadOpStoreWidth(N);
22100}
22101
22102SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22103 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22104 if (!LifetimeEnd->hasOffset())
22105 return SDValue();
22106
22107 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
22108 LifetimeEnd->getOffset(), false);
22109
22110 // We walk up the chains to find stores.
22111 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22112 while (!Chains.empty()) {
22113 SDValue Chain = Chains.pop_back_val();
22114 if (!Chain.hasOneUse())
22115 continue;
22116 switch (Chain.getOpcode()) {
22117 case ISD::TokenFactor:
22118 for (unsigned Nops = Chain.getNumOperands(); Nops;)
22119 Chains.push_back(Chain.getOperand(--Nops));
22120 break;
22122 case ISD::LIFETIME_END:
22123 // We can forward past any lifetime start/end that can be proven not to
22124 // alias the node.
22125 if (!mayAlias(Chain.getNode(), N))
22126 Chains.push_back(Chain.getOperand(0));
22127 break;
22128 case ISD::STORE: {
22129 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22130 // TODO: Can relax for unordered atomics (see D66309)
22131 if (!ST->isSimple() || ST->isIndexed())
22132 continue;
22133 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22134 // The bounds of a scalable store are not known until runtime, so this
22135 // store cannot be elided.
22136 if (StoreSize.isScalable())
22137 continue;
22138 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
22139 // If we store purely within object bounds just before its lifetime ends,
22140 // we can remove the store.
22141 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
22142 StoreSize.getFixedValue() * 8)) {
22143 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
22144 dbgs() << "\nwithin LIFETIME_END of : ";
22145 LifetimeEndBase.dump(); dbgs() << "\n");
22146 CombineTo(ST, ST->getChain());
22147 return SDValue(N, 0);
22148 }
22149 }
22150 }
22151 }
22152 return SDValue();
22153}
22154
22155/// For the instruction sequence of store below, F and I values
22156/// are bundled together as an i64 value before being stored into memory.
22157/// Sometimes it is more efficent to generate separate stores for F and I,
22158/// which can remove the bitwise instructions or sink them to colder places.
22159///
22160/// (store (or (zext (bitcast F to i32) to i64),
22161/// (shl (zext I to i64), 32)), addr) -->
22162/// (store F, addr) and (store I, addr+4)
22163///
22164/// Similarly, splitting for other merged store can also be beneficial, like:
22165/// For pair of {i32, i32}, i64 store --> two i32 stores.
22166/// For pair of {i32, i16}, i64 store --> two i32 stores.
22167/// For pair of {i16, i16}, i32 store --> two i16 stores.
22168/// For pair of {i16, i8}, i32 store --> two i16 stores.
22169/// For pair of {i8, i8}, i16 store --> two i8 stores.
22170///
22171/// We allow each target to determine specifically which kind of splitting is
22172/// supported.
22173///
22174/// The store patterns are commonly seen from the simple code snippet below
22175/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
22176/// void goo(const std::pair<int, float> &);
22177/// hoo() {
22178/// ...
22179/// goo(std::make_pair(tmp, ftmp));
22180/// ...
22181/// }
22182///
22183SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
22184 if (OptLevel == CodeGenOptLevel::None)
22185 return SDValue();
22186
22187 // Can't change the number of memory accesses for a volatile store or break
22188 // atomicity for an atomic one.
22189 if (!ST->isSimple())
22190 return SDValue();
22191
22192 SDValue Val = ST->getValue();
22193 SDLoc DL(ST);
22194
22195 // Match OR operand.
22196 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
22197 return SDValue();
22198
22199 // Match SHL operand and get Lower and Higher parts of Val.
22200 SDValue Op1 = Val.getOperand(0);
22201 SDValue Op2 = Val.getOperand(1);
22202 SDValue Lo, Hi;
22203 if (Op1.getOpcode() != ISD::SHL) {
22204 std::swap(Op1, Op2);
22205 if (Op1.getOpcode() != ISD::SHL)
22206 return SDValue();
22207 }
22208 Lo = Op2;
22209 Hi = Op1.getOperand(0);
22210 if (!Op1.hasOneUse())
22211 return SDValue();
22212
22213 // Match shift amount to HalfValBitSize.
22214 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
22215 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
22216 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
22217 return SDValue();
22218
22219 // Lo and Hi are zero-extended from int with size less equal than 32
22220 // to i64.
22221 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
22222 !Lo.getOperand(0).getValueType().isScalarInteger() ||
22223 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
22224 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
22225 !Hi.getOperand(0).getValueType().isScalarInteger() ||
22226 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
22227 return SDValue();
22228
22229 // Use the EVT of low and high parts before bitcast as the input
22230 // of target query.
22231 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
22232 ? Lo.getOperand(0).getValueType()
22233 : Lo.getValueType();
22234 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
22235 ? Hi.getOperand(0).getValueType()
22236 : Hi.getValueType();
22237 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
22238 return SDValue();
22239
22240 // Start to split store.
22241 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22242 AAMDNodes AAInfo = ST->getAAInfo();
22243
22244 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
22245 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
22246 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
22247 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
22248
22249 SDValue Chain = ST->getChain();
22250 SDValue Ptr = ST->getBasePtr();
22251 // Lower value store.
22252 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22253 ST->getOriginalAlign(), MMOFlags, AAInfo);
22254 Ptr =
22255 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
22256 // Higher value store.
22257 SDValue St1 = DAG.getStore(
22258 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
22259 ST->getOriginalAlign(), MMOFlags, AAInfo);
22260 return St1;
22261}
22262
22263// Merge an insertion into an existing shuffle:
22264// (insert_vector_elt (vector_shuffle X, Y, Mask),
22265// .(extract_vector_elt X, N), InsIndex)
22266// --> (vector_shuffle X, Y, NewMask)
22267// and variations where shuffle operands may be CONCAT_VECTORS.
22269 SmallVectorImpl<int> &NewMask, SDValue Elt,
22270 unsigned InsIndex) {
22271 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22272 !isa<ConstantSDNode>(Elt.getOperand(1)))
22273 return false;
22274
22275 // Vec's operand 0 is using indices from 0 to N-1 and
22276 // operand 1 from N to 2N - 1, where N is the number of
22277 // elements in the vectors.
22278 SDValue InsertVal0 = Elt.getOperand(0);
22279 int ElementOffset = -1;
22280
22281 // We explore the inputs of the shuffle in order to see if we find the
22282 // source of the extract_vector_elt. If so, we can use it to modify the
22283 // shuffle rather than perform an insert_vector_elt.
22285 ArgWorkList.emplace_back(Mask.size(), Y);
22286 ArgWorkList.emplace_back(0, X);
22287
22288 while (!ArgWorkList.empty()) {
22289 int ArgOffset;
22290 SDValue ArgVal;
22291 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
22292
22293 if (ArgVal == InsertVal0) {
22294 ElementOffset = ArgOffset;
22295 break;
22296 }
22297
22298 // Peek through concat_vector.
22299 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
22300 int CurrentArgOffset =
22301 ArgOffset + ArgVal.getValueType().getVectorNumElements();
22302 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
22303 for (SDValue Op : reverse(ArgVal->ops())) {
22304 CurrentArgOffset -= Step;
22305 ArgWorkList.emplace_back(CurrentArgOffset, Op);
22306 }
22307
22308 // Make sure we went through all the elements and did not screw up index
22309 // computation.
22310 assert(CurrentArgOffset == ArgOffset);
22311 }
22312 }
22313
22314 // If we failed to find a match, see if we can replace an UNDEF shuffle
22315 // operand.
22316 if (ElementOffset == -1) {
22317 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
22318 return false;
22319 ElementOffset = Mask.size();
22320 Y = InsertVal0;
22321 }
22322
22323 NewMask.assign(Mask.begin(), Mask.end());
22324 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
22325 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
22326 "NewMask[InsIndex] is out of bound");
22327 return true;
22328}
22329
22330// Merge an insertion into an existing shuffle:
22331// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
22332// InsIndex)
22333// --> (vector_shuffle X, Y) and variations where shuffle operands may be
22334// CONCAT_VECTORS.
22335SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
22336 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22337 "Expected extract_vector_elt");
22338 SDValue InsertVal = N->getOperand(1);
22339 SDValue Vec = N->getOperand(0);
22340
22341 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
22342 if (!SVN || !Vec.hasOneUse())
22343 return SDValue();
22344
22345 ArrayRef<int> Mask = SVN->getMask();
22346 SDValue X = Vec.getOperand(0);
22347 SDValue Y = Vec.getOperand(1);
22348
22349 SmallVector<int, 16> NewMask(Mask);
22350 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
22351 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
22352 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
22353 if (LegalShuffle)
22354 return LegalShuffle;
22355 }
22356
22357 return SDValue();
22358}
22359
22360// Convert a disguised subvector insertion into a shuffle:
22361// insert_vector_elt V, (bitcast X from vector type), IdxC -->
22362// bitcast(shuffle (bitcast V), (extended X), Mask)
22363// Note: We do not use an insert_subvector node because that requires a
22364// legal subvector type.
22365SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
22366 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22367 "Expected extract_vector_elt");
22368 SDValue InsertVal = N->getOperand(1);
22369
22370 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
22371 !InsertVal.getOperand(0).getValueType().isVector())
22372 return SDValue();
22373
22374 SDValue SubVec = InsertVal.getOperand(0);
22375 SDValue DestVec = N->getOperand(0);
22376 EVT SubVecVT = SubVec.getValueType();
22377 EVT VT = DestVec.getValueType();
22378 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
22379 // If the source only has a single vector element, the cost of creating adding
22380 // it to a vector is likely to exceed the cost of a insert_vector_elt.
22381 if (NumSrcElts == 1)
22382 return SDValue();
22383 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
22384 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
22385
22386 // Step 1: Create a shuffle mask that implements this insert operation. The
22387 // vector that we are inserting into will be operand 0 of the shuffle, so
22388 // those elements are just 'i'. The inserted subvector is in the first
22389 // positions of operand 1 of the shuffle. Example:
22390 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
22391 SmallVector<int, 16> Mask(NumMaskVals);
22392 for (unsigned i = 0; i != NumMaskVals; ++i) {
22393 if (i / NumSrcElts == InsIndex)
22394 Mask[i] = (i % NumSrcElts) + NumMaskVals;
22395 else
22396 Mask[i] = i;
22397 }
22398
22399 // Bail out if the target can not handle the shuffle we want to create.
22400 EVT SubVecEltVT = SubVecVT.getVectorElementType();
22401 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
22402 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
22403 return SDValue();
22404
22405 // Step 2: Create a wide vector from the inserted source vector by appending
22406 // undefined elements. This is the same size as our destination vector.
22407 SDLoc DL(N);
22408 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
22409 ConcatOps[0] = SubVec;
22410 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
22411
22412 // Step 3: Shuffle in the padded subvector.
22413 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
22414 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
22415 AddToWorklist(PaddedSubV.getNode());
22416 AddToWorklist(DestVecBC.getNode());
22417 AddToWorklist(Shuf.getNode());
22418 return DAG.getBitcast(VT, Shuf);
22419}
22420
22421// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
22422// possible and the new load will be quick. We use more loads but less shuffles
22423// and inserts.
22424SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
22425 EVT VT = N->getValueType(0);
22426
22427 // InsIndex is expected to be the first of last lane.
22428 if (!VT.isFixedLengthVector() ||
22429 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
22430 return SDValue();
22431
22432 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
22433 // depending on the InsIndex.
22434 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
22435 SDValue Scalar = N->getOperand(1);
22436 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
22437 return InsIndex == P.index() || P.value() < 0 ||
22438 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
22439 (InsIndex == VT.getVectorNumElements() - 1 &&
22440 P.value() == (int)P.index() + 1);
22441 }))
22442 return SDValue();
22443
22444 // We optionally skip over an extend so long as both loads are extended in the
22445 // same way from the same type.
22446 unsigned Extend = 0;
22447 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
22448 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
22449 Scalar.getOpcode() == ISD::ANY_EXTEND) {
22450 Extend = Scalar.getOpcode();
22451 Scalar = Scalar.getOperand(0);
22452 }
22453
22454 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
22455 if (!ScalarLoad)
22456 return SDValue();
22457
22458 SDValue Vec = Shuffle->getOperand(0);
22459 if (Extend) {
22460 if (Vec.getOpcode() != Extend)
22461 return SDValue();
22462 Vec = Vec.getOperand(0);
22463 }
22464 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
22465 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
22466 return SDValue();
22467
22468 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
22469 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
22470 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22471 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22472 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
22473 return SDValue();
22474
22475 // Check that the offset between the pointers to produce a single continuous
22476 // load.
22477 if (InsIndex == 0) {
22478 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
22479 -1))
22480 return SDValue();
22481 } else {
22483 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22484 return SDValue();
22485 }
22486
22487 // And that the new unaligned load will be fast.
22488 unsigned IsFast = 0;
22489 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22490 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22491 Vec.getValueType(), VecLoad->getAddressSpace(),
22492 NewAlign, VecLoad->getMemOperand()->getFlags(),
22493 &IsFast) ||
22494 !IsFast)
22495 return SDValue();
22496
22497 // Calculate the new Ptr and create the new load.
22498 SDLoc DL(N);
22499 SDValue Ptr = ScalarLoad->getBasePtr();
22500 if (InsIndex != 0)
22501 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22502 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22503 MachinePointerInfo PtrInfo =
22504 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22505 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22506
22507 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22508 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22509 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22510 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22511 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22512}
22513
22514SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22515 SDValue InVec = N->getOperand(0);
22516 SDValue InVal = N->getOperand(1);
22517 SDValue EltNo = N->getOperand(2);
22518 SDLoc DL(N);
22519
22520 EVT VT = InVec.getValueType();
22521 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22522
22523 // Insert into out-of-bounds element is undefined.
22524 if (IndexC && VT.isFixedLengthVector() &&
22525 IndexC->getZExtValue() >= VT.getVectorNumElements())
22526 return DAG.getUNDEF(VT);
22527
22528 // Remove redundant insertions:
22529 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22530 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22531 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22532 return InVec;
22533
22534 if (!IndexC) {
22535 // If this is variable insert to undef vector, it might be better to splat:
22536 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22537 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22538 return DAG.getSplat(VT, DL, InVal);
22539 return SDValue();
22540 }
22541
22542 if (VT.isScalableVector())
22543 return SDValue();
22544
22545 unsigned NumElts = VT.getVectorNumElements();
22546
22547 // We must know which element is being inserted for folds below here.
22548 unsigned Elt = IndexC->getZExtValue();
22549
22550 // Handle <1 x ???> vector insertion special cases.
22551 if (NumElts == 1) {
22552 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22553 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22554 InVal.getOperand(0).getValueType() == VT &&
22555 isNullConstant(InVal.getOperand(1)))
22556 return InVal.getOperand(0);
22557 }
22558
22559 // Canonicalize insert_vector_elt dag nodes.
22560 // Example:
22561 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22562 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22563 //
22564 // Do this only if the child insert_vector node has one use; also
22565 // do this only if indices are both constants and Idx1 < Idx0.
22566 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22567 && isa<ConstantSDNode>(InVec.getOperand(2))) {
22568 unsigned OtherElt = InVec.getConstantOperandVal(2);
22569 if (Elt < OtherElt) {
22570 // Swap nodes.
22571 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22572 InVec.getOperand(0), InVal, EltNo);
22573 AddToWorklist(NewOp.getNode());
22574 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22575 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22576 }
22577 }
22578
22579 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22580 return Shuf;
22581
22582 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22583 return Shuf;
22584
22585 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22586 return Shuf;
22587
22588 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22589 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22590 // vXi1 vector - we don't need to recurse.
22591 if (NumElts == 1)
22592 return DAG.getBuildVector(VT, DL, {InVal});
22593
22594 // If we haven't already collected the element, insert into the op list.
22595 EVT MaxEltVT = InVal.getValueType();
22596 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22597 unsigned Idx) {
22598 if (!Ops[Idx]) {
22599 Ops[Idx] = Elt;
22600 if (VT.isInteger()) {
22601 EVT EltVT = Elt.getValueType();
22602 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22603 }
22604 }
22605 };
22606
22607 // Ensure all the operands are the same value type, fill any missing
22608 // operands with UNDEF and create the BUILD_VECTOR.
22609 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22610 assert(Ops.size() == NumElts && "Unexpected vector size");
22611 for (SDValue &Op : Ops) {
22612 if (Op)
22613 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22614 else
22615 Op = DAG.getUNDEF(MaxEltVT);
22616 }
22617 return DAG.getBuildVector(VT, DL, Ops);
22618 };
22619
22620 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22621 Ops[Elt] = InVal;
22622
22623 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22624 for (SDValue CurVec = InVec; CurVec;) {
22625 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22626 if (CurVec.isUndef())
22627 return CanonicalizeBuildVector(Ops);
22628
22629 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22630 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22631 for (unsigned I = 0; I != NumElts; ++I)
22632 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22633 return CanonicalizeBuildVector(Ops);
22634 }
22635
22636 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22637 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22638 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22639 return CanonicalizeBuildVector(Ops);
22640 }
22641
22642 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22643 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22644 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22645 if (CurIdx->getAPIntValue().ult(NumElts)) {
22646 unsigned Idx = CurIdx->getZExtValue();
22647 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22648
22649 // Found entire BUILD_VECTOR.
22650 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22651 return CanonicalizeBuildVector(Ops);
22652
22653 CurVec = CurVec->getOperand(0);
22654 continue;
22655 }
22656
22657 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22658 // update the shuffle mask (and second operand if we started with unary
22659 // shuffle) and create a new legal shuffle.
22660 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22661 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22662 SDValue LHS = SVN->getOperand(0);
22663 SDValue RHS = SVN->getOperand(1);
22665 bool Merged = true;
22666 for (auto I : enumerate(Ops)) {
22667 SDValue &Op = I.value();
22668 if (Op) {
22669 SmallVector<int, 16> NewMask;
22670 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22671 Merged = false;
22672 break;
22673 }
22674 Mask = std::move(NewMask);
22675 }
22676 }
22677 if (Merged)
22678 if (SDValue NewShuffle =
22679 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22680 return NewShuffle;
22681 }
22682
22683 // If all insertions are zero value, try to convert to AND mask.
22684 // TODO: Do this for -1 with OR mask?
22685 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22686 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22687 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22688 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22689 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22691 for (unsigned I = 0; I != NumElts; ++I)
22692 Mask[I] = Ops[I] ? Zero : AllOnes;
22693 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22694 DAG.getBuildVector(VT, DL, Mask));
22695 }
22696
22697 // Failed to find a match in the chain - bail.
22698 break;
22699 }
22700
22701 // See if we can fill in the missing constant elements as zeros.
22702 // TODO: Should we do this for any constant?
22703 APInt DemandedZeroElts = APInt::getZero(NumElts);
22704 for (unsigned I = 0; I != NumElts; ++I)
22705 if (!Ops[I])
22706 DemandedZeroElts.setBit(I);
22707
22708 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22709 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22710 : DAG.getConstantFP(0, DL, MaxEltVT);
22711 for (unsigned I = 0; I != NumElts; ++I)
22712 if (!Ops[I])
22713 Ops[I] = Zero;
22714
22715 return CanonicalizeBuildVector(Ops);
22716 }
22717 }
22718
22719 return SDValue();
22720}
22721
22722SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22723 SDValue EltNo,
22724 LoadSDNode *OriginalLoad) {
22725 assert(OriginalLoad->isSimple());
22726
22727 EVT ResultVT = EVE->getValueType(0);
22728 EVT VecEltVT = InVecVT.getVectorElementType();
22729
22730 // If the vector element type is not a multiple of a byte then we are unable
22731 // to correctly compute an address to load only the extracted element as a
22732 // scalar.
22733 if (!VecEltVT.isByteSized())
22734 return SDValue();
22735
22736 ISD::LoadExtType ExtTy =
22737 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
22738 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22739 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22740 return SDValue();
22741
22742 Align Alignment = OriginalLoad->getAlign();
22744 SDLoc DL(EVE);
22745 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22746 int Elt = ConstEltNo->getZExtValue();
22747 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22748 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22749 Alignment = commonAlignment(Alignment, PtrOff);
22750 } else {
22751 // Discard the pointer info except the address space because the memory
22752 // operand can't represent this new access since the offset is variable.
22753 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22754 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22755 }
22756
22757 unsigned IsFast = 0;
22758 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22759 OriginalLoad->getAddressSpace(), Alignment,
22760 OriginalLoad->getMemOperand()->getFlags(),
22761 &IsFast) ||
22762 !IsFast)
22763 return SDValue();
22764
22765 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22766 InVecVT, EltNo);
22767
22768 // We are replacing a vector load with a scalar load. The new load must have
22769 // identical memory op ordering to the original.
22770 SDValue Load;
22771 if (ResultVT.bitsGT(VecEltVT)) {
22772 // If the result type of vextract is wider than the load, then issue an
22773 // extending load instead.
22774 ISD::LoadExtType ExtType =
22775 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22776 : ISD::EXTLOAD;
22777 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22778 NewPtr, MPI, VecEltVT, Alignment,
22779 OriginalLoad->getMemOperand()->getFlags(),
22780 OriginalLoad->getAAInfo());
22781 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22782 } else {
22783 // The result type is narrower or the same width as the vector element
22784 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22785 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22786 OriginalLoad->getAAInfo());
22787 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22788 if (ResultVT.bitsLT(VecEltVT))
22789 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22790 else
22791 Load = DAG.getBitcast(ResultVT, Load);
22792 }
22793 ++OpsNarrowed;
22794 return Load;
22795}
22796
22797/// Transform a vector binary operation into a scalar binary operation by moving
22798/// the math/logic after an extract element of a vector.
22800 const SDLoc &DL, bool LegalTypes) {
22801 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22802 SDValue Vec = ExtElt->getOperand(0);
22803 SDValue Index = ExtElt->getOperand(1);
22804 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22805 unsigned Opc = Vec.getOpcode();
22806 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
22807 Vec->getNumValues() != 1)
22808 return SDValue();
22809
22810 EVT ResVT = ExtElt->getValueType(0);
22811 if (Opc == ISD::SETCC &&
22812 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
22813 return SDValue();
22814
22815 // Targets may want to avoid this to prevent an expensive register transfer.
22816 if (!TLI.shouldScalarizeBinop(Vec))
22817 return SDValue();
22818
22819 // Extracting an element of a vector constant is constant-folded, so this
22820 // transform is just replacing a vector op with a scalar op while moving the
22821 // extract.
22822 SDValue Op0 = Vec.getOperand(0);
22823 SDValue Op1 = Vec.getOperand(1);
22824 APInt SplatVal;
22825 if (!isAnyConstantBuildVector(Op0, true) &&
22826 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
22827 !isAnyConstantBuildVector(Op1, true) &&
22828 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
22829 return SDValue();
22830
22831 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
22832 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
22833 if (Opc == ISD::SETCC) {
22834 EVT OpVT = Op0.getValueType().getVectorElementType();
22835 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
22836 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
22837 return DAG.getSetCC(DL, ResVT, Op0, Op1,
22838 cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22839 }
22840 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
22841 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
22842 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
22843}
22844
22845// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22846// recursively analyse all of it's users. and try to model themselves as
22847// bit sequence extractions. If all of them agree on the new, narrower element
22848// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22849// new element type, do so now.
22850// This is mainly useful to recover from legalization that scalarized
22851// the vector as wide elements, but tries to rebuild it with narrower elements.
22852//
22853// Some more nodes could be modelled if that helps cover interesting patterns.
22854bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22855 SDNode *N) {
22856 // We perform this optimization post type-legalization because
22857 // the type-legalizer often scalarizes integer-promoted vectors.
22858 // Performing this optimization before may cause legalizaton cycles.
22859 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22860 return false;
22861
22862 // TODO: Add support for big-endian.
22863 if (DAG.getDataLayout().isBigEndian())
22864 return false;
22865
22866 SDValue VecOp = N->getOperand(0);
22867 EVT VecVT = VecOp.getValueType();
22868 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22869
22870 // We must start with a constant extraction index.
22871 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22872 if (!IndexC)
22873 return false;
22874
22875 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22876 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22877
22878 // TODO: deal with the case of implicit anyext of the extraction.
22879 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22880 EVT ScalarVT = N->getValueType(0);
22881 if (VecVT.getScalarType() != ScalarVT)
22882 return false;
22883
22884 // TODO: deal with the cases other than everything being integer-typed.
22885 if (!ScalarVT.isScalarInteger())
22886 return false;
22887
22888 struct Entry {
22890
22891 // Which bits of VecOp does it contain?
22892 unsigned BitPos;
22893 int NumBits;
22894 // NOTE: the actual width of \p Producer may be wider than NumBits!
22895
22896 Entry(Entry &&) = default;
22897 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22898 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22899
22900 Entry() = delete;
22901 Entry(const Entry &) = delete;
22902 Entry &operator=(const Entry &) = delete;
22903 Entry &operator=(Entry &&) = delete;
22904 };
22905 SmallVector<Entry, 32> Worklist;
22907
22908 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22909 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22910 /*NumBits=*/VecEltBitWidth);
22911
22912 while (!Worklist.empty()) {
22913 Entry E = Worklist.pop_back_val();
22914 // Does the node not even use any of the VecOp bits?
22915 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22916 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22917 return false; // Let's allow the other combines clean this up first.
22918 // Did we fail to model any of the users of the Producer?
22919 bool ProducerIsLeaf = false;
22920 // Look at each user of this Producer.
22921 for (SDNode *User : E.Producer->users()) {
22922 switch (User->getOpcode()) {
22923 // TODO: support ISD::BITCAST
22924 // TODO: support ISD::ANY_EXTEND
22925 // TODO: support ISD::ZERO_EXTEND
22926 // TODO: support ISD::SIGN_EXTEND
22927 case ISD::TRUNCATE:
22928 // Truncation simply means we keep position, but extract less bits.
22929 Worklist.emplace_back(User, E.BitPos,
22930 /*NumBits=*/User->getValueSizeInBits(0));
22931 break;
22932 // TODO: support ISD::SRA
22933 // TODO: support ISD::SHL
22934 case ISD::SRL:
22935 // We should be shifting the Producer by a constant amount.
22936 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22937 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22938 // Logical right-shift means that we start extraction later,
22939 // but stop it at the same position we did previously.
22940 unsigned ShAmt = ShAmtC->getZExtValue();
22941 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22942 break;
22943 }
22944 [[fallthrough]];
22945 default:
22946 // We can not model this user of the Producer.
22947 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22948 ProducerIsLeaf = true;
22949 // Profitability check: all users that we can not model
22950 // must be ISD::BUILD_VECTOR's.
22951 if (User->getOpcode() != ISD::BUILD_VECTOR)
22952 return false;
22953 break;
22954 }
22955 }
22956 if (ProducerIsLeaf)
22957 Leafs.emplace_back(std::move(E));
22958 }
22959
22960 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22961
22962 // If we are still at the same element granularity, give up,
22963 if (NewVecEltBitWidth == VecEltBitWidth)
22964 return false;
22965
22966 // The vector width must be a multiple of the new element width.
22967 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22968 return false;
22969
22970 // All leafs must agree on the new element width.
22971 // All leafs must not expect any "padding" bits ontop of that width.
22972 // All leafs must start extraction from multiple of that width.
22973 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22974 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22975 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22976 E.BitPos % NewVecEltBitWidth == 0;
22977 }))
22978 return false;
22979
22980 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22981 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22982 VecVT.getSizeInBits() / NewVecEltBitWidth);
22983
22984 if (LegalTypes &&
22985 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22986 return false;
22987
22988 if (LegalOperations &&
22989 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22991 return false;
22992
22993 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22994 for (const Entry &E : Leafs) {
22995 SDLoc DL(E.Producer);
22996 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22997 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22998 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22999 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23000 DAG.getVectorIdxConstant(NewIndex, DL));
23001 CombineTo(E.Producer, V);
23002 }
23003
23004 return true;
23005}
23006
23007SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23008 SDValue VecOp = N->getOperand(0);
23009 SDValue Index = N->getOperand(1);
23010 EVT ScalarVT = N->getValueType(0);
23011 EVT VecVT = VecOp.getValueType();
23012 if (VecOp.isUndef())
23013 return DAG.getUNDEF(ScalarVT);
23014
23015 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23016 //
23017 // This only really matters if the index is non-constant since other combines
23018 // on the constant elements already work.
23019 SDLoc DL(N);
23020 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23021 Index == VecOp.getOperand(2)) {
23022 SDValue Elt = VecOp.getOperand(1);
23023 AddUsersToWorklist(VecOp.getNode());
23024 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23025 }
23026
23027 // (vextract (scalar_to_vector val, 0) -> val
23028 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23029 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23030 if (DAG.isKnownNeverZero(Index))
23031 return DAG.getUNDEF(ScalarVT);
23032
23033 // Check if the result type doesn't match the inserted element type.
23034 // The inserted element and extracted element may have mismatched bitwidth.
23035 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23036 SDValue InOp = VecOp.getOperand(0);
23037 if (InOp.getValueType() != ScalarVT) {
23038 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23039 if (InOp.getValueType().bitsGT(ScalarVT))
23040 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23041 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23042 }
23043 return InOp;
23044 }
23045
23046 // extract_vector_elt of out-of-bounds element -> UNDEF
23047 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23048 if (IndexC && VecVT.isFixedLengthVector() &&
23049 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23050 return DAG.getUNDEF(ScalarVT);
23051
23052 // extract_vector_elt (build_vector x, y), 1 -> y
23053 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23054 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23055 TLI.isTypeLegal(VecVT)) {
23056 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23057 VecVT.isFixedLengthVector()) &&
23058 "BUILD_VECTOR used for scalable vectors");
23059 unsigned IndexVal =
23060 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23061 SDValue Elt = VecOp.getOperand(IndexVal);
23062 EVT InEltVT = Elt.getValueType();
23063
23064 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23065 isNullConstant(Elt)) {
23066 // Sometimes build_vector's scalar input types do not match result type.
23067 if (ScalarVT == InEltVT)
23068 return Elt;
23069
23070 // TODO: It may be useful to truncate if free if the build_vector
23071 // implicitly converts.
23072 }
23073 }
23074
23075 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23076 return BO;
23077
23078 if (VecVT.isScalableVector())
23079 return SDValue();
23080
23081 // All the code from this point onwards assumes fixed width vectors, but it's
23082 // possible that some of the combinations could be made to work for scalable
23083 // vectors too.
23084 unsigned NumElts = VecVT.getVectorNumElements();
23085 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23086
23087 // See if the extracted element is constant, in which case fold it if its
23088 // a legal fp immediate.
23089 if (IndexC && ScalarVT.isFloatingPoint()) {
23090 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23091 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23092 if (KnownElt.isConstant()) {
23093 APFloat CstFP =
23094 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23095 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23096 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23097 }
23098 }
23099
23100 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23101 // there are regressions on multiple targets without it. We can end up with a
23102 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23103 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23104 VecOp.hasOneUse()) {
23105 // The vector index of the LSBs of the source depend on the endian-ness.
23106 bool IsLE = DAG.getDataLayout().isLittleEndian();
23107 unsigned ExtractIndex = IndexC->getZExtValue();
23108 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23109 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23110 SDValue BCSrc = VecOp.getOperand(0);
23111 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23112 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23113
23114 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23115 if (LegalTypes && BCSrc.getValueType().isInteger() &&
23116 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23117 BCSrc.getScalarValueSizeInBits() ==
23119 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23120 // trunc i64 X to i32
23121 SDValue X = BCSrc.getOperand(0);
23122 EVT XVT = X.getValueType();
23123 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23124 "Extract element and scalar to vector can't change element type "
23125 "from FP to integer.");
23126 unsigned XBitWidth = X.getValueSizeInBits();
23127 unsigned Scale = XBitWidth / VecEltBitWidth;
23128 BCTruncElt = IsLE ? 0 : Scale - 1;
23129
23130 // An extract element return value type can be wider than its vector
23131 // operand element type. In that case, the high bits are undefined, so
23132 // it's possible that we may need to extend rather than truncate.
23133 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23134 assert(XBitWidth % VecEltBitWidth == 0 &&
23135 "Scalar bitwidth must be a multiple of vector element bitwidth");
23136
23137 if (ExtractIndex != BCTruncElt) {
23138 unsigned ShiftIndex =
23139 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23140 X = DAG.getNode(
23141 ISD::SRL, DL, XVT, X,
23142 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23143 }
23144
23145 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23146 }
23147 }
23148 }
23149
23150 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23151 // We only perform this optimization before the op legalization phase because
23152 // we may introduce new vector instructions which are not backed by TD
23153 // patterns. For example on AVX, extracting elements from a wide vector
23154 // without using extract_subvector. However, if we can find an underlying
23155 // scalar value, then we can always use that.
23156 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23157 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23158 // Find the new index to extract from.
23159 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23160
23161 // Extracting an undef index is undef.
23162 if (OrigElt == -1)
23163 return DAG.getUNDEF(ScalarVT);
23164
23165 // Select the right vector half to extract from.
23166 SDValue SVInVec;
23167 if (OrigElt < (int)NumElts) {
23168 SVInVec = VecOp.getOperand(0);
23169 } else {
23170 SVInVec = VecOp.getOperand(1);
23171 OrigElt -= NumElts;
23172 }
23173
23174 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
23175 SDValue InOp = SVInVec.getOperand(OrigElt);
23176 if (InOp.getValueType() != ScalarVT) {
23177 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23178 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
23179 }
23180
23181 return InOp;
23182 }
23183
23184 // FIXME: We should handle recursing on other vector shuffles and
23185 // scalar_to_vector here as well.
23186
23187 if (!LegalOperations ||
23188 // FIXME: Should really be just isOperationLegalOrCustom.
23191 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
23192 DAG.getVectorIdxConstant(OrigElt, DL));
23193 }
23194 }
23195
23196 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
23197 // simplify it based on the (valid) extraction indices.
23198 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23199 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23200 Use->getOperand(0) == VecOp &&
23201 isa<ConstantSDNode>(Use->getOperand(1));
23202 })) {
23203 APInt DemandedElts = APInt::getZero(NumElts);
23204 for (SDNode *User : VecOp->users()) {
23205 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23206 if (CstElt->getAPIntValue().ult(NumElts))
23207 DemandedElts.setBit(CstElt->getZExtValue());
23208 }
23209 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
23210 // We simplified the vector operand of this extract element. If this
23211 // extract is not dead, visit it again so it is folded properly.
23212 if (N->getOpcode() != ISD::DELETED_NODE)
23213 AddToWorklist(N);
23214 return SDValue(N, 0);
23215 }
23216 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
23217 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
23218 // We simplified the vector operand of this extract element. If this
23219 // extract is not dead, visit it again so it is folded properly.
23220 if (N->getOpcode() != ISD::DELETED_NODE)
23221 AddToWorklist(N);
23222 return SDValue(N, 0);
23223 }
23224 }
23225
23226 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
23227 return SDValue(N, 0);
23228
23229 // Everything under here is trying to match an extract of a loaded value.
23230 // If the result of load has to be truncated, then it's not necessarily
23231 // profitable.
23232 bool BCNumEltsChanged = false;
23233 EVT ExtVT = VecVT.getVectorElementType();
23234 EVT LVT = ExtVT;
23235 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
23236 return SDValue();
23237
23238 if (VecOp.getOpcode() == ISD::BITCAST) {
23239 // Don't duplicate a load with other uses.
23240 if (!VecOp.hasOneUse())
23241 return SDValue();
23242
23243 EVT BCVT = VecOp.getOperand(0).getValueType();
23244 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
23245 return SDValue();
23246 if (NumElts != BCVT.getVectorNumElements())
23247 BCNumEltsChanged = true;
23248 VecOp = VecOp.getOperand(0);
23249 ExtVT = BCVT.getVectorElementType();
23250 }
23251
23252 // extract (vector load $addr), i --> load $addr + i * size
23253 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
23254 ISD::isNormalLoad(VecOp.getNode()) &&
23255 !Index->hasPredecessor(VecOp.getNode())) {
23256 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
23257 if (VecLoad && VecLoad->isSimple())
23258 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
23259 }
23260
23261 // Perform only after legalization to ensure build_vector / vector_shuffle
23262 // optimizations have already been done.
23263 if (!LegalOperations || !IndexC)
23264 return SDValue();
23265
23266 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
23267 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
23268 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
23269 int Elt = IndexC->getZExtValue();
23270 LoadSDNode *LN0 = nullptr;
23271 if (ISD::isNormalLoad(VecOp.getNode())) {
23272 LN0 = cast<LoadSDNode>(VecOp);
23273 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23274 VecOp.getOperand(0).getValueType() == ExtVT &&
23275 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
23276 // Don't duplicate a load with other uses.
23277 if (!VecOp.hasOneUse())
23278 return SDValue();
23279
23280 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
23281 }
23282 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
23283 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
23284 // =>
23285 // (load $addr+1*size)
23286
23287 // Don't duplicate a load with other uses.
23288 if (!VecOp.hasOneUse())
23289 return SDValue();
23290
23291 // If the bit convert changed the number of elements, it is unsafe
23292 // to examine the mask.
23293 if (BCNumEltsChanged)
23294 return SDValue();
23295
23296 // Select the input vector, guarding against out of range extract vector.
23297 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
23298 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
23299
23300 if (VecOp.getOpcode() == ISD::BITCAST) {
23301 // Don't duplicate a load with other uses.
23302 if (!VecOp.hasOneUse())
23303 return SDValue();
23304
23305 VecOp = VecOp.getOperand(0);
23306 }
23307 if (ISD::isNormalLoad(VecOp.getNode())) {
23308 LN0 = cast<LoadSDNode>(VecOp);
23309 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
23310 Index = DAG.getConstant(Elt, DL, Index.getValueType());
23311 }
23312 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
23313 VecVT.getVectorElementType() == ScalarVT &&
23314 (!LegalTypes ||
23315 TLI.isTypeLegal(
23317 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
23318 // -> extract_vector_elt a, 0
23319 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
23320 // -> extract_vector_elt a, 1
23321 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
23322 // -> extract_vector_elt b, 0
23323 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
23324 // -> extract_vector_elt b, 1
23325 EVT ConcatVT = VecOp.getOperand(0).getValueType();
23326 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
23327 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
23328 Index.getValueType());
23329
23330 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
23332 ConcatVT.getVectorElementType(),
23333 ConcatOp, NewIdx);
23334 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
23335 }
23336
23337 // Make sure we found a non-volatile load and the extractelement is
23338 // the only use.
23339 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
23340 return SDValue();
23341
23342 // If Idx was -1 above, Elt is going to be -1, so just return undef.
23343 if (Elt == -1)
23344 return DAG.getUNDEF(LVT);
23345
23346 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
23347}
23348
23349// Simplify (build_vec (ext )) to (bitcast (build_vec ))
23350SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
23351 // We perform this optimization post type-legalization because
23352 // the type-legalizer often scalarizes integer-promoted vectors.
23353 // Performing this optimization before may create bit-casts which
23354 // will be type-legalized to complex code sequences.
23355 // We perform this optimization only before the operation legalizer because we
23356 // may introduce illegal operations.
23357 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23358 return SDValue();
23359
23360 unsigned NumInScalars = N->getNumOperands();
23361 SDLoc DL(N);
23362 EVT VT = N->getValueType(0);
23363
23364 // Check to see if this is a BUILD_VECTOR of a bunch of values
23365 // which come from any_extend or zero_extend nodes. If so, we can create
23366 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
23367 // optimizations. We do not handle sign-extend because we can't fill the sign
23368 // using shuffles.
23369 EVT SourceType = MVT::Other;
23370 bool AllAnyExt = true;
23371
23372 for (unsigned i = 0; i != NumInScalars; ++i) {
23373 SDValue In = N->getOperand(i);
23374 // Ignore undef inputs.
23375 if (In.isUndef()) continue;
23376
23377 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
23378 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
23379
23380 // Abort if the element is not an extension.
23381 if (!ZeroExt && !AnyExt) {
23382 SourceType = MVT::Other;
23383 break;
23384 }
23385
23386 // The input is a ZeroExt or AnyExt. Check the original type.
23387 EVT InTy = In.getOperand(0).getValueType();
23388
23389 // Check that all of the widened source types are the same.
23390 if (SourceType == MVT::Other)
23391 // First time.
23392 SourceType = InTy;
23393 else if (InTy != SourceType) {
23394 // Multiple income types. Abort.
23395 SourceType = MVT::Other;
23396 break;
23397 }
23398
23399 // Check if all of the extends are ANY_EXTENDs.
23400 AllAnyExt &= AnyExt;
23401 }
23402
23403 // In order to have valid types, all of the inputs must be extended from the
23404 // same source type and all of the inputs must be any or zero extend.
23405 // Scalar sizes must be a power of two.
23406 EVT OutScalarTy = VT.getScalarType();
23407 bool ValidTypes =
23408 SourceType != MVT::Other &&
23409 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
23410 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
23411
23412 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
23413 // turn into a single shuffle instruction.
23414 if (!ValidTypes)
23415 return SDValue();
23416
23417 // If we already have a splat buildvector, then don't fold it if it means
23418 // introducing zeros.
23419 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
23420 return SDValue();
23421
23422 bool isLE = DAG.getDataLayout().isLittleEndian();
23423 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
23424 assert(ElemRatio > 1 && "Invalid element size ratio");
23425 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
23426 DAG.getConstant(0, DL, SourceType);
23427
23428 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
23429 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
23430
23431 // Populate the new build_vector
23432 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23433 SDValue Cast = N->getOperand(i);
23434 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
23435 Cast.getOpcode() == ISD::ZERO_EXTEND ||
23436 Cast.isUndef()) && "Invalid cast opcode");
23437 SDValue In;
23438 if (Cast.isUndef())
23439 In = DAG.getUNDEF(SourceType);
23440 else
23441 In = Cast->getOperand(0);
23442 unsigned Index = isLE ? (i * ElemRatio) :
23443 (i * ElemRatio + (ElemRatio - 1));
23444
23445 assert(Index < Ops.size() && "Invalid index");
23446 Ops[Index] = In;
23447 }
23448
23449 // The type of the new BUILD_VECTOR node.
23450 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
23451 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
23452 "Invalid vector size");
23453 // Check if the new vector type is legal.
23454 if (!isTypeLegal(VecVT) ||
23455 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
23457 return SDValue();
23458
23459 // Make the new BUILD_VECTOR.
23460 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
23461
23462 // The new BUILD_VECTOR node has the potential to be further optimized.
23463 AddToWorklist(BV.getNode());
23464 // Bitcast to the desired type.
23465 return DAG.getBitcast(VT, BV);
23466}
23467
23468// Simplify (build_vec (trunc $1)
23469// (trunc (srl $1 half-width))
23470// (trunc (srl $1 (2 * half-width))))
23471// to (bitcast $1)
23472SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
23473 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23474
23475 EVT VT = N->getValueType(0);
23476
23477 // Don't run this before LegalizeTypes if VT is legal.
23478 // Targets may have other preferences.
23479 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
23480 return SDValue();
23481
23482 // Only for little endian
23483 if (!DAG.getDataLayout().isLittleEndian())
23484 return SDValue();
23485
23486 SDLoc DL(N);
23487 EVT OutScalarTy = VT.getScalarType();
23488 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
23489
23490 // Only for power of two types to be sure that bitcast works well
23491 if (!isPowerOf2_64(ScalarTypeBitsize))
23492 return SDValue();
23493
23494 unsigned NumInScalars = N->getNumOperands();
23495
23496 // Look through bitcasts
23497 auto PeekThroughBitcast = [](SDValue Op) {
23498 if (Op.getOpcode() == ISD::BITCAST)
23499 return Op.getOperand(0);
23500 return Op;
23501 };
23502
23503 // The source value where all the parts are extracted.
23504 SDValue Src;
23505 for (unsigned i = 0; i != NumInScalars; ++i) {
23506 SDValue In = PeekThroughBitcast(N->getOperand(i));
23507 // Ignore undef inputs.
23508 if (In.isUndef()) continue;
23509
23510 if (In.getOpcode() != ISD::TRUNCATE)
23511 return SDValue();
23512
23513 In = PeekThroughBitcast(In.getOperand(0));
23514
23515 if (In.getOpcode() != ISD::SRL) {
23516 // For now only build_vec without shuffling, handle shifts here in the
23517 // future.
23518 if (i != 0)
23519 return SDValue();
23520
23521 Src = In;
23522 } else {
23523 // In is SRL
23524 SDValue part = PeekThroughBitcast(In.getOperand(0));
23525
23526 if (!Src) {
23527 Src = part;
23528 } else if (Src != part) {
23529 // Vector parts do not stem from the same variable
23530 return SDValue();
23531 }
23532
23533 SDValue ShiftAmtVal = In.getOperand(1);
23534 if (!isa<ConstantSDNode>(ShiftAmtVal))
23535 return SDValue();
23536
23537 uint64_t ShiftAmt = In.getConstantOperandVal(1);
23538
23539 // The extracted value is not extracted at the right position
23540 if (ShiftAmt != i * ScalarTypeBitsize)
23541 return SDValue();
23542 }
23543 }
23544
23545 // Only cast if the size is the same
23546 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23547 return SDValue();
23548
23549 return DAG.getBitcast(VT, Src);
23550}
23551
23552SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23553 ArrayRef<int> VectorMask,
23554 SDValue VecIn1, SDValue VecIn2,
23555 unsigned LeftIdx, bool DidSplitVec) {
23556 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23557
23558 EVT VT = N->getValueType(0);
23559 EVT InVT1 = VecIn1.getValueType();
23560 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23561
23562 unsigned NumElems = VT.getVectorNumElements();
23563 unsigned ShuffleNumElems = NumElems;
23564
23565 // If we artificially split a vector in two already, then the offsets in the
23566 // operands will all be based off of VecIn1, even those in VecIn2.
23567 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23568
23569 uint64_t VTSize = VT.getFixedSizeInBits();
23570 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23571 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23572
23573 assert(InVT2Size <= InVT1Size &&
23574 "Inputs must be sorted to be in non-increasing vector size order.");
23575
23576 // We can't generate a shuffle node with mismatched input and output types.
23577 // Try to make the types match the type of the output.
23578 if (InVT1 != VT || InVT2 != VT) {
23579 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23580 // If the output vector length is a multiple of both input lengths,
23581 // we can concatenate them and pad the rest with undefs.
23582 unsigned NumConcats = VTSize / InVT1Size;
23583 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23584 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23585 ConcatOps[0] = VecIn1;
23586 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23587 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23588 VecIn2 = SDValue();
23589 } else if (InVT1Size == VTSize * 2) {
23590 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23591 return SDValue();
23592
23593 if (!VecIn2.getNode()) {
23594 // If we only have one input vector, and it's twice the size of the
23595 // output, split it in two.
23596 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23597 DAG.getVectorIdxConstant(NumElems, DL));
23598 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23599 // Since we now have shorter input vectors, adjust the offset of the
23600 // second vector's start.
23601 Vec2Offset = NumElems;
23602 } else {
23603 assert(InVT2Size <= InVT1Size &&
23604 "Second input is not going to be larger than the first one.");
23605
23606 // VecIn1 is wider than the output, and we have another, possibly
23607 // smaller input. Pad the smaller input with undefs, shuffle at the
23608 // input vector width, and extract the output.
23609 // The shuffle type is different than VT, so check legality again.
23610 if (LegalOperations &&
23612 return SDValue();
23613
23614 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23615 // lower it back into a BUILD_VECTOR. So if the inserted type is
23616 // illegal, don't even try.
23617 if (InVT1 != InVT2) {
23618 if (!TLI.isTypeLegal(InVT2))
23619 return SDValue();
23620 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23621 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23622 }
23623 ShuffleNumElems = NumElems * 2;
23624 }
23625 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23626 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23627 ConcatOps[0] = VecIn2;
23628 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23629 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23630 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23631 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23632 return SDValue();
23633 // If dest vector has less than two elements, then use shuffle and extract
23634 // from larger regs will cost even more.
23635 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23636 return SDValue();
23637 assert(InVT2Size <= InVT1Size &&
23638 "Second input is not going to be larger than the first one.");
23639
23640 // VecIn1 is wider than the output, and we have another, possibly
23641 // smaller input. Pad the smaller input with undefs, shuffle at the
23642 // input vector width, and extract the output.
23643 // The shuffle type is different than VT, so check legality again.
23644 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23645 return SDValue();
23646
23647 if (InVT1 != InVT2) {
23648 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23649 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23650 }
23651 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23652 } else {
23653 // TODO: Support cases where the length mismatch isn't exactly by a
23654 // factor of 2.
23655 // TODO: Move this check upwards, so that if we have bad type
23656 // mismatches, we don't create any DAG nodes.
23657 return SDValue();
23658 }
23659 }
23660
23661 // Initialize mask to undef.
23662 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23663
23664 // Only need to run up to the number of elements actually used, not the
23665 // total number of elements in the shuffle - if we are shuffling a wider
23666 // vector, the high lanes should be set to undef.
23667 for (unsigned i = 0; i != NumElems; ++i) {
23668 if (VectorMask[i] <= 0)
23669 continue;
23670
23671 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23672 if (VectorMask[i] == (int)LeftIdx) {
23673 Mask[i] = ExtIndex;
23674 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23675 Mask[i] = Vec2Offset + ExtIndex;
23676 }
23677 }
23678
23679 // The type the input vectors may have changed above.
23680 InVT1 = VecIn1.getValueType();
23681
23682 // If we already have a VecIn2, it should have the same type as VecIn1.
23683 // If we don't, get an undef/zero vector of the appropriate type.
23684 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23685 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23686
23687 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23688 if (ShuffleNumElems > NumElems)
23689 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23690
23691 return Shuffle;
23692}
23693
23695 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23696
23697 // First, determine where the build vector is not undef.
23698 // TODO: We could extend this to handle zero elements as well as undefs.
23699 int NumBVOps = BV->getNumOperands();
23700 int ZextElt = -1;
23701 for (int i = 0; i != NumBVOps; ++i) {
23702 SDValue Op = BV->getOperand(i);
23703 if (Op.isUndef())
23704 continue;
23705 if (ZextElt == -1)
23706 ZextElt = i;
23707 else
23708 return SDValue();
23709 }
23710 // Bail out if there's no non-undef element.
23711 if (ZextElt == -1)
23712 return SDValue();
23713
23714 // The build vector contains some number of undef elements and exactly
23715 // one other element. That other element must be a zero-extended scalar
23716 // extracted from a vector at a constant index to turn this into a shuffle.
23717 // Also, require that the build vector does not implicitly truncate/extend
23718 // its elements.
23719 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23720 EVT VT = BV->getValueType(0);
23721 SDValue Zext = BV->getOperand(ZextElt);
23722 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23724 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23726 return SDValue();
23727
23728 // The zero-extend must be a multiple of the source size, and we must be
23729 // building a vector of the same size as the source of the extract element.
23730 SDValue Extract = Zext.getOperand(0);
23731 unsigned DestSize = Zext.getValueSizeInBits();
23732 unsigned SrcSize = Extract.getValueSizeInBits();
23733 if (DestSize % SrcSize != 0 ||
23734 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23735 return SDValue();
23736
23737 // Create a shuffle mask that will combine the extracted element with zeros
23738 // and undefs.
23739 int ZextRatio = DestSize / SrcSize;
23740 int NumMaskElts = NumBVOps * ZextRatio;
23741 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23742 for (int i = 0; i != NumMaskElts; ++i) {
23743 if (i / ZextRatio == ZextElt) {
23744 // The low bits of the (potentially translated) extracted element map to
23745 // the source vector. The high bits map to zero. We will use a zero vector
23746 // as the 2nd source operand of the shuffle, so use the 1st element of
23747 // that vector (mask value is number-of-elements) for the high bits.
23748 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23749 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23750 : NumMaskElts;
23751 }
23752
23753 // Undef elements of the build vector remain undef because we initialize
23754 // the shuffle mask with -1.
23755 }
23756
23757 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23758 // bitcast (shuffle V, ZeroVec, VectorMask)
23759 SDLoc DL(BV);
23760 EVT VecVT = Extract.getOperand(0).getValueType();
23761 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23762 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23763 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23764 ZeroVec, ShufMask, DAG);
23765 if (!Shuf)
23766 return SDValue();
23767 return DAG.getBitcast(VT, Shuf);
23768}
23769
23770// FIXME: promote to STLExtras.
23771template <typename R, typename T>
23772static auto getFirstIndexOf(R &&Range, const T &Val) {
23773 auto I = find(Range, Val);
23774 if (I == Range.end())
23775 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23776 return std::distance(Range.begin(), I);
23777}
23778
23779// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23780// operations. If the types of the vectors we're extracting from allow it,
23781// turn this into a vector_shuffle node.
23782SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23783 SDLoc DL(N);
23784 EVT VT = N->getValueType(0);
23785
23786 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23787 if (!isTypeLegal(VT))
23788 return SDValue();
23789
23791 return V;
23792
23793 // May only combine to shuffle after legalize if shuffle is legal.
23794 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23795 return SDValue();
23796
23797 bool UsesZeroVector = false;
23798 unsigned NumElems = N->getNumOperands();
23799
23800 // Record, for each element of the newly built vector, which input vector
23801 // that element comes from. -1 stands for undef, 0 for the zero vector,
23802 // and positive values for the input vectors.
23803 // VectorMask maps each element to its vector number, and VecIn maps vector
23804 // numbers to their initial SDValues.
23805
23806 SmallVector<int, 8> VectorMask(NumElems, -1);
23808 VecIn.push_back(SDValue());
23809
23810 for (unsigned i = 0; i != NumElems; ++i) {
23811 SDValue Op = N->getOperand(i);
23812
23813 if (Op.isUndef())
23814 continue;
23815
23816 // See if we can use a blend with a zero vector.
23817 // TODO: Should we generalize this to a blend with an arbitrary constant
23818 // vector?
23820 UsesZeroVector = true;
23821 VectorMask[i] = 0;
23822 continue;
23823 }
23824
23825 // Not an undef or zero. If the input is something other than an
23826 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23827 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23828 !isa<ConstantSDNode>(Op.getOperand(1)))
23829 return SDValue();
23830 SDValue ExtractedFromVec = Op.getOperand(0);
23831
23832 if (ExtractedFromVec.getValueType().isScalableVector())
23833 return SDValue();
23834
23835 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23836 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23837 return SDValue();
23838
23839 // All inputs must have the same element type as the output.
23840 if (VT.getVectorElementType() !=
23841 ExtractedFromVec.getValueType().getVectorElementType())
23842 return SDValue();
23843
23844 // Have we seen this input vector before?
23845 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23846 // a map back from SDValues to numbers isn't worth it.
23847 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23848 if (Idx == -1) { // A new source vector?
23849 Idx = VecIn.size();
23850 VecIn.push_back(ExtractedFromVec);
23851 }
23852
23853 VectorMask[i] = Idx;
23854 }
23855
23856 // If we didn't find at least one input vector, bail out.
23857 if (VecIn.size() < 2)
23858 return SDValue();
23859
23860 // If all the Operands of BUILD_VECTOR extract from same
23861 // vector, then split the vector efficiently based on the maximum
23862 // vector access index and adjust the VectorMask and
23863 // VecIn accordingly.
23864 bool DidSplitVec = false;
23865 if (VecIn.size() == 2) {
23866 unsigned MaxIndex = 0;
23867 unsigned NearestPow2 = 0;
23868 SDValue Vec = VecIn.back();
23869 EVT InVT = Vec.getValueType();
23870 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23871
23872 for (unsigned i = 0; i < NumElems; i++) {
23873 if (VectorMask[i] <= 0)
23874 continue;
23875 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23876 IndexVec[i] = Index;
23877 MaxIndex = std::max(MaxIndex, Index);
23878 }
23879
23880 NearestPow2 = PowerOf2Ceil(MaxIndex);
23881 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23882 NumElems * 2 < NearestPow2) {
23883 unsigned SplitSize = NearestPow2 / 2;
23884 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23885 InVT.getVectorElementType(), SplitSize);
23886 if (TLI.isTypeLegal(SplitVT) &&
23887 SplitSize + SplitVT.getVectorNumElements() <=
23888 InVT.getVectorNumElements()) {
23889 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23890 DAG.getVectorIdxConstant(SplitSize, DL));
23891 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23892 DAG.getVectorIdxConstant(0, DL));
23893 VecIn.pop_back();
23894 VecIn.push_back(VecIn1);
23895 VecIn.push_back(VecIn2);
23896 DidSplitVec = true;
23897
23898 for (unsigned i = 0; i < NumElems; i++) {
23899 if (VectorMask[i] <= 0)
23900 continue;
23901 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23902 }
23903 }
23904 }
23905 }
23906
23907 // Sort input vectors by decreasing vector element count,
23908 // while preserving the relative order of equally-sized vectors.
23909 // Note that we keep the first "implicit zero vector as-is.
23910 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23911 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23912 [](const SDValue &a, const SDValue &b) {
23913 return a.getValueType().getVectorNumElements() >
23914 b.getValueType().getVectorNumElements();
23915 });
23916
23917 // We now also need to rebuild the VectorMask, because it referenced element
23918 // order in VecIn, and we just sorted them.
23919 for (int &SourceVectorIndex : VectorMask) {
23920 if (SourceVectorIndex <= 0)
23921 continue;
23922 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23923 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23924 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23925 SourceVectorIndex = Idx;
23926 }
23927
23928 VecIn = std::move(SortedVecIn);
23929
23930 // TODO: Should this fire if some of the input vectors has illegal type (like
23931 // it does now), or should we let legalization run its course first?
23932
23933 // Shuffle phase:
23934 // Take pairs of vectors, and shuffle them so that the result has elements
23935 // from these vectors in the correct places.
23936 // For example, given:
23937 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23938 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23939 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23940 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23941 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23942 // We will generate:
23943 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23944 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23945 SmallVector<SDValue, 4> Shuffles;
23946 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23947 unsigned LeftIdx = 2 * In + 1;
23948 SDValue VecLeft = VecIn[LeftIdx];
23949 SDValue VecRight =
23950 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23951
23952 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23953 VecRight, LeftIdx, DidSplitVec))
23954 Shuffles.push_back(Shuffle);
23955 else
23956 return SDValue();
23957 }
23958
23959 // If we need the zero vector as an "ingredient" in the blend tree, add it
23960 // to the list of shuffles.
23961 if (UsesZeroVector)
23962 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23963 : DAG.getConstantFP(0.0, DL, VT));
23964
23965 // If we only have one shuffle, we're done.
23966 if (Shuffles.size() == 1)
23967 return Shuffles[0];
23968
23969 // Update the vector mask to point to the post-shuffle vectors.
23970 for (int &Vec : VectorMask)
23971 if (Vec == 0)
23972 Vec = Shuffles.size() - 1;
23973 else
23974 Vec = (Vec - 1) / 2;
23975
23976 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23977 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23978 // generate:
23979 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23980 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23981 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23982 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23983 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23984 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23985 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23986
23987 // Make sure the initial size of the shuffle list is even.
23988 if (Shuffles.size() % 2)
23989 Shuffles.push_back(DAG.getUNDEF(VT));
23990
23991 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23992 if (CurSize % 2) {
23993 Shuffles[CurSize] = DAG.getUNDEF(VT);
23994 CurSize++;
23995 }
23996 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23997 int Left = 2 * In;
23998 int Right = 2 * In + 1;
23999 SmallVector<int, 8> Mask(NumElems, -1);
24000 SDValue L = Shuffles[Left];
24001 ArrayRef<int> LMask;
24002 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24003 L.use_empty() && L.getOperand(1).isUndef() &&
24004 L.getOperand(0).getValueType() == L.getValueType();
24005 if (IsLeftShuffle) {
24006 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24007 L = L.getOperand(0);
24008 }
24009 SDValue R = Shuffles[Right];
24010 ArrayRef<int> RMask;
24011 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24012 R.use_empty() && R.getOperand(1).isUndef() &&
24013 R.getOperand(0).getValueType() == R.getValueType();
24014 if (IsRightShuffle) {
24015 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24016 R = R.getOperand(0);
24017 }
24018 for (unsigned I = 0; I != NumElems; ++I) {
24019 if (VectorMask[I] == Left) {
24020 Mask[I] = I;
24021 if (IsLeftShuffle)
24022 Mask[I] = LMask[I];
24023 VectorMask[I] = In;
24024 } else if (VectorMask[I] == Right) {
24025 Mask[I] = I + NumElems;
24026 if (IsRightShuffle)
24027 Mask[I] = RMask[I] + NumElems;
24028 VectorMask[I] = In;
24029 }
24030 }
24031
24032 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24033 }
24034 }
24035 return Shuffles[0];
24036}
24037
24038// Try to turn a build vector of zero extends of extract vector elts into a
24039// a vector zero extend and possibly an extract subvector.
24040// TODO: Support sign extend?
24041// TODO: Allow undef elements?
24042SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24043 if (LegalOperations)
24044 return SDValue();
24045
24046 EVT VT = N->getValueType(0);
24047
24048 bool FoundZeroExtend = false;
24049 SDValue Op0 = N->getOperand(0);
24050 auto checkElem = [&](SDValue Op) -> int64_t {
24051 unsigned Opc = Op.getOpcode();
24052 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24053 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24054 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24055 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24056 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24057 return C->getZExtValue();
24058 return -1;
24059 };
24060
24061 // Make sure the first element matches
24062 // (zext (extract_vector_elt X, C))
24063 // Offset must be a constant multiple of the
24064 // known-minimum vector length of the result type.
24065 int64_t Offset = checkElem(Op0);
24066 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24067 return SDValue();
24068
24069 unsigned NumElems = N->getNumOperands();
24070 SDValue In = Op0.getOperand(0).getOperand(0);
24071 EVT InSVT = In.getValueType().getScalarType();
24072 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24073
24074 // Don't create an illegal input type after type legalization.
24075 if (LegalTypes && !TLI.isTypeLegal(InVT))
24076 return SDValue();
24077
24078 // Ensure all the elements come from the same vector and are adjacent.
24079 for (unsigned i = 1; i != NumElems; ++i) {
24080 if ((Offset + i) != checkElem(N->getOperand(i)))
24081 return SDValue();
24082 }
24083
24084 SDLoc DL(N);
24085 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24086 Op0.getOperand(0).getOperand(1));
24087 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24088 VT, In);
24089}
24090
24091// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24092// and all other elements being constant zero's, granularize the BUILD_VECTOR's
24093// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24094// This patten can appear during legalization.
24095//
24096// NOTE: This can be generalized to allow more than a single
24097// non-constant-zero op, UNDEF's, and to be KnownBits-based,
24098SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24099 // Don't run this after legalization. Targets may have other preferences.
24100 if (Level >= AfterLegalizeDAG)
24101 return SDValue();
24102
24103 // FIXME: support big-endian.
24104 if (DAG.getDataLayout().isBigEndian())
24105 return SDValue();
24106
24107 EVT VT = N->getValueType(0);
24108 EVT OpVT = N->getOperand(0).getValueType();
24109 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24110
24111 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24112
24113 if (!TLI.isTypeLegal(OpIntVT) ||
24114 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24115 return SDValue();
24116
24117 unsigned EltBitwidth = VT.getScalarSizeInBits();
24118 // NOTE: the actual width of operands may be wider than that!
24119
24120 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24121 // active bits they all have? We'll want to truncate them all to that width.
24122 unsigned ActiveBits = 0;
24123 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24124 for (auto I : enumerate(N->ops())) {
24125 SDValue Op = I.value();
24126 // FIXME: support UNDEF elements?
24127 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
24128 unsigned OpActiveBits =
24129 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24130 if (OpActiveBits == 0) {
24131 KnownZeroOps.setBit(I.index());
24132 continue;
24133 }
24134 // Profitability check: don't allow non-zero constant operands.
24135 return SDValue();
24136 }
24137 // Profitability check: there must only be a single non-zero operand,
24138 // and it must be the first operand of the BUILD_VECTOR.
24139 if (I.index() != 0)
24140 return SDValue();
24141 // The operand must be a zero-extension itself.
24142 // FIXME: this could be generalized to known leading zeros check.
24143 if (Op.getOpcode() != ISD::ZERO_EXTEND)
24144 return SDValue();
24145 unsigned CurrActiveBits =
24146 Op.getOperand(0).getValueSizeInBits().getFixedValue();
24147 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24148 ActiveBits = CurrActiveBits;
24149 // We want to at least halve the element size.
24150 if (2 * ActiveBits > EltBitwidth)
24151 return SDValue();
24152 }
24153
24154 // This BUILD_VECTOR must have at least one non-constant-zero operand.
24155 if (ActiveBits == 0)
24156 return SDValue();
24157
24158 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
24159 // into how many chunks can we split our element width?
24160 EVT NewScalarIntVT, NewIntVT;
24161 std::optional<unsigned> Factor;
24162 // We can split the element into at least two chunks, but not into more
24163 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
24164 // for which the element width is a multiple of it,
24165 // and the resulting types/operations on that chunk width are legal.
24166 assert(2 * ActiveBits <= EltBitwidth &&
24167 "We know that half or less bits of the element are active.");
24168 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24169 if (EltBitwidth % Scale != 0)
24170 continue;
24171 unsigned ChunkBitwidth = EltBitwidth / Scale;
24172 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
24173 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
24174 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
24175 Scale * N->getNumOperands());
24176 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
24177 (LegalOperations &&
24178 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
24180 continue;
24181 Factor = Scale;
24182 break;
24183 }
24184 if (!Factor)
24185 return SDValue();
24186
24187 SDLoc DL(N);
24188 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
24189
24190 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
24192 NewOps.reserve(NewIntVT.getVectorNumElements());
24193 for (auto I : enumerate(N->ops())) {
24194 SDValue Op = I.value();
24195 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
24196 unsigned SrcOpIdx = I.index();
24197 if (KnownZeroOps[SrcOpIdx]) {
24198 NewOps.append(*Factor, ZeroOp);
24199 continue;
24200 }
24201 Op = DAG.getBitcast(OpIntVT, Op);
24202 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
24203 NewOps.emplace_back(Op);
24204 NewOps.append(*Factor - 1, ZeroOp);
24205 }
24206 assert(NewOps.size() == NewIntVT.getVectorNumElements());
24207 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
24208 NewBV = DAG.getBitcast(VT, NewBV);
24209 return NewBV;
24210}
24211
24212SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
24213 EVT VT = N->getValueType(0);
24214
24215 // A vector built entirely of undefs is undef.
24217 return DAG.getUNDEF(VT);
24218
24219 // If this is a splat of a bitcast from another vector, change to a
24220 // concat_vector.
24221 // For example:
24222 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
24223 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
24224 //
24225 // If X is a build_vector itself, the concat can become a larger build_vector.
24226 // TODO: Maybe this is useful for non-splat too?
24227 if (!LegalOperations) {
24228 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
24229 // Only change build_vector to a concat_vector if the splat value type is
24230 // same as the vector element type.
24231 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
24233 EVT SrcVT = Splat.getValueType();
24234 if (SrcVT.isVector()) {
24235 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
24236 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
24237 SrcVT.getVectorElementType(), NumElts);
24238 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
24239 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
24240 SDValue Concat =
24241 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
24242 return DAG.getBitcast(VT, Concat);
24243 }
24244 }
24245 }
24246 }
24247
24248 // Check if we can express BUILD VECTOR via subvector extract.
24249 if (!LegalTypes && (N->getNumOperands() > 1)) {
24250 SDValue Op0 = N->getOperand(0);
24251 auto checkElem = [&](SDValue Op) -> uint64_t {
24252 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
24253 (Op0.getOperand(0) == Op.getOperand(0)))
24254 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
24255 return CNode->getZExtValue();
24256 return -1;
24257 };
24258
24259 int Offset = checkElem(Op0);
24260 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
24261 if (Offset + i != checkElem(N->getOperand(i))) {
24262 Offset = -1;
24263 break;
24264 }
24265 }
24266
24267 if ((Offset == 0) &&
24268 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
24269 return Op0.getOperand(0);
24270 if ((Offset != -1) &&
24271 ((Offset % N->getValueType(0).getVectorNumElements()) ==
24272 0)) // IDX must be multiple of output size.
24273 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
24274 Op0.getOperand(0), Op0.getOperand(1));
24275 }
24276
24277 if (SDValue V = convertBuildVecZextToZext(N))
24278 return V;
24279
24280 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
24281 return V;
24282
24283 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
24284 return V;
24285
24286 if (SDValue V = reduceBuildVecTruncToBitCast(N))
24287 return V;
24288
24289 if (SDValue V = reduceBuildVecToShuffle(N))
24290 return V;
24291
24292 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
24293 // Do this late as some of the above may replace the splat.
24295 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
24296 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
24297 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
24298 }
24299
24300 return SDValue();
24301}
24302
24304 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24305 EVT OpVT = N->getOperand(0).getValueType();
24306
24307 // If the operands are legal vectors, leave them alone.
24308 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
24309 return SDValue();
24310
24311 SDLoc DL(N);
24312 EVT VT = N->getValueType(0);
24314 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24315
24316 // Keep track of what we encounter.
24317 EVT AnyFPVT;
24318
24319 for (const SDValue &Op : N->ops()) {
24320 if (ISD::BITCAST == Op.getOpcode() &&
24321 !Op.getOperand(0).getValueType().isVector())
24322 Ops.push_back(Op.getOperand(0));
24323 else if (ISD::UNDEF == Op.getOpcode())
24324 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
24325 else
24326 return SDValue();
24327
24328 // Note whether we encounter an integer or floating point scalar.
24329 // If it's neither, bail out, it could be something weird like x86mmx.
24330 EVT LastOpVT = Ops.back().getValueType();
24331 if (LastOpVT.isFloatingPoint())
24332 AnyFPVT = LastOpVT;
24333 else if (!LastOpVT.isInteger())
24334 return SDValue();
24335 }
24336
24337 // If any of the operands is a floating point scalar bitcast to a vector,
24338 // use floating point types throughout, and bitcast everything.
24339 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
24340 if (AnyFPVT != EVT()) {
24341 SVT = AnyFPVT;
24342 for (SDValue &Op : Ops) {
24343 if (Op.getValueType() == SVT)
24344 continue;
24345 if (Op.isUndef())
24346 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
24347 else
24348 Op = DAG.getBitcast(SVT, Op);
24349 }
24350 }
24351
24352 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
24353 VT.getSizeInBits() / SVT.getSizeInBits());
24354 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
24355}
24356
24357// Attempt to merge nested concat_vectors/undefs.
24358// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
24359// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
24361 SelectionDAG &DAG) {
24362 EVT VT = N->getValueType(0);
24363
24364 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
24365 EVT SubVT;
24366 SDValue FirstConcat;
24367 for (const SDValue &Op : N->ops()) {
24368 if (Op.isUndef())
24369 continue;
24370 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
24371 return SDValue();
24372 if (!FirstConcat) {
24373 SubVT = Op.getOperand(0).getValueType();
24374 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
24375 return SDValue();
24376 FirstConcat = Op;
24377 continue;
24378 }
24379 if (SubVT != Op.getOperand(0).getValueType())
24380 return SDValue();
24381 }
24382 assert(FirstConcat && "Concat of all-undefs found");
24383
24384 SmallVector<SDValue> ConcatOps;
24385 for (const SDValue &Op : N->ops()) {
24386 if (Op.isUndef()) {
24387 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
24388 continue;
24389 }
24390 ConcatOps.append(Op->op_begin(), Op->op_end());
24391 }
24392 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
24393}
24394
24395// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
24396// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
24397// most two distinct vectors the same size as the result, attempt to turn this
24398// into a legal shuffle.
24400 EVT VT = N->getValueType(0);
24401 EVT OpVT = N->getOperand(0).getValueType();
24402
24403 // We currently can't generate an appropriate shuffle for a scalable vector.
24404 if (VT.isScalableVector())
24405 return SDValue();
24406
24407 int NumElts = VT.getVectorNumElements();
24408 int NumOpElts = OpVT.getVectorNumElements();
24409
24410 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
24412
24413 for (SDValue Op : N->ops()) {
24415
24416 // UNDEF nodes convert to UNDEF shuffle mask values.
24417 if (Op.isUndef()) {
24418 Mask.append((unsigned)NumOpElts, -1);
24419 continue;
24420 }
24421
24422 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24423 return SDValue();
24424
24425 // What vector are we extracting the subvector from and at what index?
24426 SDValue ExtVec = Op.getOperand(0);
24427 int ExtIdx = Op.getConstantOperandVal(1);
24428
24429 // We want the EVT of the original extraction to correctly scale the
24430 // extraction index.
24431 EVT ExtVT = ExtVec.getValueType();
24432 ExtVec = peekThroughBitcasts(ExtVec);
24433
24434 // UNDEF nodes convert to UNDEF shuffle mask values.
24435 if (ExtVec.isUndef()) {
24436 Mask.append((unsigned)NumOpElts, -1);
24437 continue;
24438 }
24439
24440 // Ensure that we are extracting a subvector from a vector the same
24441 // size as the result.
24442 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
24443 return SDValue();
24444
24445 // Scale the subvector index to account for any bitcast.
24446 int NumExtElts = ExtVT.getVectorNumElements();
24447 if (0 == (NumExtElts % NumElts))
24448 ExtIdx /= (NumExtElts / NumElts);
24449 else if (0 == (NumElts % NumExtElts))
24450 ExtIdx *= (NumElts / NumExtElts);
24451 else
24452 return SDValue();
24453
24454 // At most we can reference 2 inputs in the final shuffle.
24455 if (SV0.isUndef() || SV0 == ExtVec) {
24456 SV0 = ExtVec;
24457 for (int i = 0; i != NumOpElts; ++i)
24458 Mask.push_back(i + ExtIdx);
24459 } else if (SV1.isUndef() || SV1 == ExtVec) {
24460 SV1 = ExtVec;
24461 for (int i = 0; i != NumOpElts; ++i)
24462 Mask.push_back(i + ExtIdx + NumElts);
24463 } else {
24464 return SDValue();
24465 }
24466 }
24467
24468 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24469 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
24470 DAG.getBitcast(VT, SV1), Mask, DAG);
24471}
24472
24474 unsigned CastOpcode = N->getOperand(0).getOpcode();
24475 switch (CastOpcode) {
24476 case ISD::SINT_TO_FP:
24477 case ISD::UINT_TO_FP:
24478 case ISD::FP_TO_SINT:
24479 case ISD::FP_TO_UINT:
24480 // TODO: Allow more opcodes?
24481 // case ISD::BITCAST:
24482 // case ISD::TRUNCATE:
24483 // case ISD::ZERO_EXTEND:
24484 // case ISD::SIGN_EXTEND:
24485 // case ISD::FP_EXTEND:
24486 break;
24487 default:
24488 return SDValue();
24489 }
24490
24491 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
24492 if (!SrcVT.isVector())
24493 return SDValue();
24494
24495 // All operands of the concat must be the same kind of cast from the same
24496 // source type.
24498 for (SDValue Op : N->ops()) {
24499 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
24500 Op.getOperand(0).getValueType() != SrcVT)
24501 return SDValue();
24502 SrcOps.push_back(Op.getOperand(0));
24503 }
24504
24505 // The wider cast must be supported by the target. This is unusual because
24506 // the operation support type parameter depends on the opcode. In addition,
24507 // check the other type in the cast to make sure this is really legal.
24508 EVT VT = N->getValueType(0);
24509 EVT SrcEltVT = SrcVT.getVectorElementType();
24510 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24511 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
24512 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24513 switch (CastOpcode) {
24514 case ISD::SINT_TO_FP:
24515 case ISD::UINT_TO_FP:
24516 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
24517 !TLI.isTypeLegal(VT))
24518 return SDValue();
24519 break;
24520 case ISD::FP_TO_SINT:
24521 case ISD::FP_TO_UINT:
24522 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24523 !TLI.isTypeLegal(ConcatSrcVT))
24524 return SDValue();
24525 break;
24526 default:
24527 llvm_unreachable("Unexpected cast opcode");
24528 }
24529
24530 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24531 SDLoc DL(N);
24532 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24533 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24534}
24535
24536// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24537// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24538// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24540 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24541 bool LegalOperations) {
24542 EVT VT = N->getValueType(0);
24543 EVT OpVT = N->getOperand(0).getValueType();
24544 if (VT.isScalableVector())
24545 return SDValue();
24546
24547 // For now, only allow simple 2-operand concatenations.
24548 if (N->getNumOperands() != 2)
24549 return SDValue();
24550
24551 // Don't create illegal types/shuffles when not allowed to.
24552 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24553 (LegalOperations &&
24555 return SDValue();
24556
24557 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24558 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24559 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24560 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24561 // (4) and for now, the SHUFFLE_VECTOR must be unary.
24562 ShuffleVectorSDNode *SVN = nullptr;
24563 for (SDValue Op : N->ops()) {
24564 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24565 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24566 all_of(N->ops(), [CurSVN](SDValue Op) {
24567 // FIXME: can we allow UNDEF operands?
24568 return !Op.isUndef() &&
24569 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24570 })) {
24571 SVN = CurSVN;
24572 break;
24573 }
24574 }
24575 if (!SVN)
24576 return SDValue();
24577
24578 // We are going to pad the shuffle operands, so any indice, that was picking
24579 // from the second operand, must be adjusted.
24580 SmallVector<int, 16> AdjustedMask;
24581 AdjustedMask.reserve(SVN->getMask().size());
24582 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24583 append_range(AdjustedMask, SVN->getMask());
24584
24585 // Identity masks for the operands of the (padded) shuffle.
24586 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24587 MutableArrayRef<int> FirstShufOpIdentityMask =
24588 MutableArrayRef<int>(IdentityMask)
24590 MutableArrayRef<int> SecondShufOpIdentityMask =
24592 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24593 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24595
24596 // New combined shuffle mask.
24598 Mask.reserve(VT.getVectorNumElements());
24599 for (SDValue Op : N->ops()) {
24600 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24601 if (Op.getNode() == SVN) {
24602 append_range(Mask, AdjustedMask);
24603 continue;
24604 }
24605 if (Op == SVN->getOperand(0)) {
24606 append_range(Mask, FirstShufOpIdentityMask);
24607 continue;
24608 }
24609 if (Op == SVN->getOperand(1)) {
24610 append_range(Mask, SecondShufOpIdentityMask);
24611 continue;
24612 }
24613 llvm_unreachable("Unexpected operand!");
24614 }
24615
24616 // Don't create illegal shuffle masks.
24617 if (!TLI.isShuffleMaskLegal(Mask, VT))
24618 return SDValue();
24619
24620 // Pad the shuffle operands with UNDEF.
24621 SDLoc dl(N);
24622 std::array<SDValue, 2> ShufOps;
24623 for (auto I : zip(SVN->ops(), ShufOps)) {
24624 SDValue ShufOp = std::get<0>(I);
24625 SDValue &NewShufOp = std::get<1>(I);
24626 if (ShufOp.isUndef())
24627 NewShufOp = DAG.getUNDEF(VT);
24628 else {
24629 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24630 DAG.getUNDEF(OpVT));
24631 ShufOpParts[0] = ShufOp;
24632 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24633 }
24634 }
24635 // Finally, create the new wide shuffle.
24636 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24637}
24638
24639SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24640 // If we only have one input vector, we don't need to do any concatenation.
24641 if (N->getNumOperands() == 1)
24642 return N->getOperand(0);
24643
24644 // Check if all of the operands are undefs.
24645 EVT VT = N->getValueType(0);
24647 return DAG.getUNDEF(VT);
24648
24649 // Optimize concat_vectors where all but the first of the vectors are undef.
24650 if (all_of(drop_begin(N->ops()),
24651 [](const SDValue &Op) { return Op.isUndef(); })) {
24652 SDValue In = N->getOperand(0);
24653 assert(In.getValueType().isVector() && "Must concat vectors");
24654
24655 // If the input is a concat_vectors, just make a larger concat by padding
24656 // with smaller undefs.
24657 //
24658 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24659 // here could cause an infinite loop. That legalizing happens when LegalDAG
24660 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24661 // scalable.
24662 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24663 !(LegalDAG && In.getValueType().isScalableVector())) {
24664 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24665 SmallVector<SDValue, 4> Ops(In->ops());
24666 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24667 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24668 }
24669
24671
24672 // concat_vectors(scalar_to_vector(scalar), undef) ->
24673 // scalar_to_vector(scalar)
24674 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24675 Scalar.hasOneUse()) {
24676 EVT SVT = Scalar.getValueType().getVectorElementType();
24677 if (SVT == Scalar.getOperand(0).getValueType())
24678 Scalar = Scalar.getOperand(0);
24679 }
24680
24681 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24682 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24683 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24684 // look through the trunc so we can still do the transform:
24685 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24686 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24687 !TLI.isTypeLegal(Scalar.getValueType()) &&
24688 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24689 Scalar = Scalar->getOperand(0);
24690
24691 EVT SclTy = Scalar.getValueType();
24692
24693 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24694 return SDValue();
24695
24696 // Bail out if the vector size is not a multiple of the scalar size.
24697 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24698 return SDValue();
24699
24700 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24701 if (VNTNumElms < 2)
24702 return SDValue();
24703
24704 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24705 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24706 return SDValue();
24707
24708 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24709 return DAG.getBitcast(VT, Res);
24710 }
24711 }
24712
24713 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24714 // We have already tested above for an UNDEF only concatenation.
24715 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24716 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24717 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24718 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24719 };
24720 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24722 EVT SVT = VT.getScalarType();
24723
24724 EVT MinVT = SVT;
24725 if (!SVT.isFloatingPoint()) {
24726 // If BUILD_VECTOR are from built from integer, they may have different
24727 // operand types. Get the smallest type and truncate all operands to it.
24728 bool FoundMinVT = false;
24729 for (const SDValue &Op : N->ops())
24730 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24731 EVT OpSVT = Op.getOperand(0).getValueType();
24732 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24733 FoundMinVT = true;
24734 }
24735 assert(FoundMinVT && "Concat vector type mismatch");
24736 }
24737
24738 for (const SDValue &Op : N->ops()) {
24739 EVT OpVT = Op.getValueType();
24740 unsigned NumElts = OpVT.getVectorNumElements();
24741
24742 if (ISD::UNDEF == Op.getOpcode())
24743 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24744
24745 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24746 if (SVT.isFloatingPoint()) {
24747 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24748 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24749 } else {
24750 for (unsigned i = 0; i != NumElts; ++i)
24751 Opnds.push_back(
24752 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24753 }
24754 }
24755 }
24756
24757 assert(VT.getVectorNumElements() == Opnds.size() &&
24758 "Concat vector type mismatch");
24759 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24760 }
24761
24762 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24763 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24765 return V;
24766
24767 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24768 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24770 return V;
24771
24772 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24774 return V;
24775 }
24776
24777 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24778 return V;
24779
24781 N, DAG, TLI, LegalTypes, LegalOperations))
24782 return V;
24783
24784 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24785 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24786 // operands and look for a CONCAT operations that place the incoming vectors
24787 // at the exact same location.
24788 //
24789 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24790 SDValue SingleSource = SDValue();
24791 unsigned PartNumElem =
24792 N->getOperand(0).getValueType().getVectorMinNumElements();
24793
24794 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24795 SDValue Op = N->getOperand(i);
24796
24797 if (Op.isUndef())
24798 continue;
24799
24800 // Check if this is the identity extract:
24801 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24802 return SDValue();
24803
24804 // Find the single incoming vector for the extract_subvector.
24805 if (SingleSource.getNode()) {
24806 if (Op.getOperand(0) != SingleSource)
24807 return SDValue();
24808 } else {
24809 SingleSource = Op.getOperand(0);
24810
24811 // Check the source type is the same as the type of the result.
24812 // If not, this concat may extend the vector, so we can not
24813 // optimize it away.
24814 if (SingleSource.getValueType() != N->getValueType(0))
24815 return SDValue();
24816 }
24817
24818 // Check that we are reading from the identity index.
24819 unsigned IdentityIndex = i * PartNumElem;
24820 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24821 return SDValue();
24822 }
24823
24824 if (SingleSource.getNode())
24825 return SingleSource;
24826
24827 return SDValue();
24828}
24829
24830// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24831// if the subvector can be sourced for free.
24832static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
24833 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24834 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24835 return V.getOperand(1);
24836 }
24837 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24838 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24839 V.getOperand(0).getValueType() == SubVT &&
24840 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24841 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24842 return V.getOperand(SubIdx);
24843 }
24844 return SDValue();
24845}
24846
24848 SelectionDAG &DAG,
24849 bool LegalOperations) {
24850 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24851 SDValue BinOp = Extract->getOperand(0);
24852 unsigned BinOpcode = BinOp.getOpcode();
24853 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24854 return SDValue();
24855
24856 EVT VecVT = BinOp.getValueType();
24857 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24858 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24859 return SDValue();
24860
24861 SDValue Index = Extract->getOperand(1);
24862 EVT SubVT = Extract->getValueType(0);
24863 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24864 return SDValue();
24865
24866 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24867 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24868
24869 // TODO: We could handle the case where only 1 operand is being inserted by
24870 // creating an extract of the other operand, but that requires checking
24871 // number of uses and/or costs.
24872 if (!Sub0 || !Sub1)
24873 return SDValue();
24874
24875 // We are inserting both operands of the wide binop only to extract back
24876 // to the narrow vector size. Eliminate all of the insert/extract:
24877 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24878 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24879 BinOp->getFlags());
24880}
24881
24882/// If we are extracting a subvector produced by a wide binary operator try
24883/// to use a narrow binary operator and/or avoid concatenation and extraction.
24885 bool LegalOperations) {
24886 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24887 // some of these bailouts with other transforms.
24888
24889 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24890 return V;
24891
24892 // The extract index must be a constant, so we can map it to a concat operand.
24893 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24894 if (!ExtractIndexC)
24895 return SDValue();
24896
24897 // We are looking for an optionally bitcasted wide vector binary operator
24898 // feeding an extract subvector.
24899 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24900 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24901 unsigned BOpcode = BinOp.getOpcode();
24902 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24903 return SDValue();
24904
24905 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24906 // reduced to the unary fneg when it is visited, and we probably want to deal
24907 // with fneg in a target-specific way.
24908 if (BOpcode == ISD::FSUB) {
24909 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24910 if (C && C->getValueAPF().isNegZero())
24911 return SDValue();
24912 }
24913
24914 // The binop must be a vector type, so we can extract some fraction of it.
24915 EVT WideBVT = BinOp.getValueType();
24916 // The optimisations below currently assume we are dealing with fixed length
24917 // vectors. It is possible to add support for scalable vectors, but at the
24918 // moment we've done no analysis to prove whether they are profitable or not.
24919 if (!WideBVT.isFixedLengthVector())
24920 return SDValue();
24921
24922 EVT VT = Extract->getValueType(0);
24923 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24924 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24925 "Extract index is not a multiple of the vector length.");
24926
24927 // Bail out if this is not a proper multiple width extraction.
24928 unsigned WideWidth = WideBVT.getSizeInBits();
24929 unsigned NarrowWidth = VT.getSizeInBits();
24930 if (WideWidth % NarrowWidth != 0)
24931 return SDValue();
24932
24933 // Bail out if we are extracting a fraction of a single operation. This can
24934 // occur because we potentially looked through a bitcast of the binop.
24935 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24936 unsigned WideNumElts = WideBVT.getVectorNumElements();
24937 if (WideNumElts % NarrowingRatio != 0)
24938 return SDValue();
24939
24940 // Bail out if the target does not support a narrower version of the binop.
24941 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24942 WideNumElts / NarrowingRatio);
24943 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24944 LegalOperations))
24945 return SDValue();
24946
24947 // If extraction is cheap, we don't need to look at the binop operands
24948 // for concat ops. The narrow binop alone makes this transform profitable.
24949 // We can't just reuse the original extract index operand because we may have
24950 // bitcasted.
24951 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24952 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24953 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24954 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24955 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24956 SDLoc DL(Extract);
24957 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24958 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24959 BinOp.getOperand(0), NewExtIndex);
24960 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24961 BinOp.getOperand(1), NewExtIndex);
24962 SDValue NarrowBinOp =
24963 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24964 return DAG.getBitcast(VT, NarrowBinOp);
24965 }
24966
24967 // Only handle the case where we are doubling and then halving. A larger ratio
24968 // may require more than two narrow binops to replace the wide binop.
24969 if (NarrowingRatio != 2)
24970 return SDValue();
24971
24972 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24973 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24974 // flavors, but no other 256-bit integer support. This could be extended to
24975 // handle any binop, but that may require fixing/adding other folds to avoid
24976 // codegen regressions.
24977 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24978 return SDValue();
24979
24980 // We need at least one concatenation operation of a binop operand to make
24981 // this transform worthwhile. The concat must double the input vector sizes.
24982 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24983 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24984 return V.getOperand(ConcatOpNum);
24985 return SDValue();
24986 };
24987 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24988 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24989
24990 if (SubVecL || SubVecR) {
24991 // If a binop operand was not the result of a concat, we must extract a
24992 // half-sized operand for our new narrow binop:
24993 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24994 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24995 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24996 SDLoc DL(Extract);
24997 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24998 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24999 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25000 BinOp.getOperand(0), IndexC);
25001
25002 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25003 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25004 BinOp.getOperand(1), IndexC);
25005
25006 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25007 return DAG.getBitcast(VT, NarrowBinOp);
25008 }
25009
25010 return SDValue();
25011}
25012
25013/// If we are extracting a subvector from a wide vector load, convert to a
25014/// narrow load to eliminate the extraction:
25015/// (extract_subvector (load wide vector)) --> (load narrow vector)
25017 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25018 if (DAG.getDataLayout().isBigEndian())
25019 return SDValue();
25020
25021 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
25022 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
25023 return SDValue();
25024
25025 // Allow targets to opt-out.
25026 EVT VT = Extract->getValueType(0);
25027
25028 // We can only create byte sized loads.
25029 if (!VT.isByteSized())
25030 return SDValue();
25031
25032 unsigned Index = Extract->getConstantOperandVal(1);
25033 unsigned NumElts = VT.getVectorMinNumElements();
25034 // A fixed length vector being extracted from a scalable vector
25035 // may not be any *smaller* than the scalable one.
25036 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25037 return SDValue();
25038
25039 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25040 // multiple of the minimum number of elements in the result type.
25041 assert(Index % NumElts == 0 && "The extract subvector index is not a "
25042 "multiple of the result's element count");
25043
25044 // It's fine to use TypeSize here as we know the offset will not be negative.
25045 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25046
25047 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25048 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25049 return SDValue();
25050
25051 // The narrow load will be offset from the base address of the old load if
25052 // we are extracting from something besides index 0 (little-endian).
25053 SDLoc DL(Extract);
25054
25055 // TODO: Use "BaseIndexOffset" to make this more effective.
25056 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25057
25060 MachineMemOperand *MMO;
25061 if (Offset.isScalable()) {
25062 MachinePointerInfo MPI =
25064 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
25065 } else
25066 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25067 StoreSize);
25068
25069 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25070 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25071 return NewLd;
25072}
25073
25074/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25075/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25076/// EXTRACT_SUBVECTOR(Op?, ?),
25077/// Mask'))
25078/// iff it is legal and profitable to do so. Notably, the trimmed mask
25079/// (containing only the elements that are extracted)
25080/// must reference at most two subvectors.
25082 SelectionDAG &DAG,
25083 const TargetLowering &TLI,
25084 bool LegalOperations) {
25085 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25086 "Must only be called on EXTRACT_SUBVECTOR's");
25087
25088 SDValue N0 = N->getOperand(0);
25089
25090 // Only deal with non-scalable vectors.
25091 EVT NarrowVT = N->getValueType(0);
25092 EVT WideVT = N0.getValueType();
25093 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25094 return SDValue();
25095
25096 // The operand must be a shufflevector.
25097 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
25098 if (!WideShuffleVector)
25099 return SDValue();
25100
25101 // The old shuffleneeds to go away.
25102 if (!WideShuffleVector->hasOneUse())
25103 return SDValue();
25104
25105 // And the narrow shufflevector that we'll form must be legal.
25106 if (LegalOperations &&
25108 return SDValue();
25109
25110 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
25111 int NumEltsExtracted = NarrowVT.getVectorNumElements();
25112 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
25113 "Extract index is not a multiple of the output vector length.");
25114
25115 int WideNumElts = WideVT.getVectorNumElements();
25116
25117 SmallVector<int, 16> NewMask;
25118 NewMask.reserve(NumEltsExtracted);
25119 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
25120 DemandedSubvectors;
25121
25122 // Try to decode the wide mask into narrow mask from at most two subvectors.
25123 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
25124 NumEltsExtracted)) {
25125 assert((M >= -1) && (M < (2 * WideNumElts)) &&
25126 "Out-of-bounds shuffle mask?");
25127
25128 if (M < 0) {
25129 // Does not depend on operands, does not require adjustment.
25130 NewMask.emplace_back(M);
25131 continue;
25132 }
25133
25134 // From which operand of the shuffle does this shuffle mask element pick?
25135 int WideShufOpIdx = M / WideNumElts;
25136 // Which element of that operand is picked?
25137 int OpEltIdx = M % WideNumElts;
25138
25139 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
25140 "Shuffle mask vector decomposition failure.");
25141
25142 // And which NumEltsExtracted-sized subvector of that operand is that?
25143 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
25144 // And which element within that subvector of that operand is that?
25145 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
25146
25147 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
25148 "Shuffle mask subvector decomposition failure.");
25149
25150 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
25151 WideShufOpIdx * WideNumElts) == M &&
25152 "Shuffle mask full decomposition failure.");
25153
25154 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25155
25156 if (Op.isUndef()) {
25157 // Picking from an undef operand. Let's adjust mask instead.
25158 NewMask.emplace_back(-1);
25159 continue;
25160 }
25161
25162 const std::pair<SDValue, int> DemandedSubvector =
25163 std::make_pair(Op, OpSubvecIdx);
25164
25165 if (DemandedSubvectors.insert(DemandedSubvector)) {
25166 if (DemandedSubvectors.size() > 2)
25167 return SDValue(); // We can't handle more than two subvectors.
25168 // How many elements into the WideVT does this subvector start?
25169 int Index = NumEltsExtracted * OpSubvecIdx;
25170 // Bail out if the extraction isn't going to be cheap.
25171 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
25172 return SDValue();
25173 }
25174
25175 // Ok, but from which operand of the new shuffle will this element pick?
25176 int NewOpIdx =
25177 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
25178 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
25179
25180 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
25181 NewMask.emplace_back(AdjM);
25182 }
25183 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
25184 assert(DemandedSubvectors.size() <= 2 &&
25185 "Should have ended up demanding at most two subvectors.");
25186
25187 // Did we discover that the shuffle does not actually depend on operands?
25188 if (DemandedSubvectors.empty())
25189 return DAG.getUNDEF(NarrowVT);
25190
25191 // Profitability check: only deal with extractions from the first subvector
25192 // unless the mask becomes an identity mask.
25193 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
25194 any_of(NewMask, [](int M) { return M < 0; }))
25195 for (auto &DemandedSubvector : DemandedSubvectors)
25196 if (DemandedSubvector.second != 0)
25197 return SDValue();
25198
25199 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
25200 // operand[s]/index[es], so there is no point in checking for it's legality.
25201
25202 // Do not turn a legal shuffle into an illegal one.
25203 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
25204 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
25205 return SDValue();
25206
25207 SDLoc DL(N);
25208
25210 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
25211 &DemandedSubvector : DemandedSubvectors) {
25212 // How many elements into the WideVT does this subvector start?
25213 int Index = NumEltsExtracted * DemandedSubvector.second;
25214 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
25215 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
25216 DemandedSubvector.first, IndexC));
25217 }
25218 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
25219 "Should end up with either one or two ops");
25220
25221 // If we ended up with only one operand, pad with an undef.
25222 if (NewOps.size() == 1)
25223 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
25224
25225 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
25226}
25227
25228SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
25229 EVT NVT = N->getValueType(0);
25230 SDValue V = N->getOperand(0);
25231 uint64_t ExtIdx = N->getConstantOperandVal(1);
25232 SDLoc DL(N);
25233
25234 // Extract from UNDEF is UNDEF.
25235 if (V.isUndef())
25236 return DAG.getUNDEF(NVT);
25237
25239 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
25240 return NarrowLoad;
25241
25242 // Combine an extract of an extract into a single extract_subvector.
25243 // ext (ext X, C), 0 --> ext X, C
25244 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
25245 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
25246 V.getConstantOperandVal(1)) &&
25248 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
25249 V.getOperand(1));
25250 }
25251 }
25252
25253 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
25254 if (V.getOpcode() == ISD::SPLAT_VECTOR)
25255 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
25256 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
25257 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
25258
25259 // extract_subvector(insert_subvector(x,y,c1),c2)
25260 // --> extract_subvector(y,c2-c1)
25261 // iff we're just extracting from the inserted subvector.
25262 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25263 SDValue InsSub = V.getOperand(1);
25264 EVT InsSubVT = InsSub.getValueType();
25265 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
25266 unsigned InsIdx = V.getConstantOperandVal(2);
25267 unsigned NumSubElts = NVT.getVectorMinNumElements();
25268 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
25269 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
25270 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
25271 V.getValueType().isFixedLengthVector())
25272 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
25273 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
25274 }
25275
25276 // Try to move vector bitcast after extract_subv by scaling extraction index:
25277 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
25278 if (V.getOpcode() == ISD::BITCAST &&
25279 V.getOperand(0).getValueType().isVector() &&
25280 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
25281 SDValue SrcOp = V.getOperand(0);
25282 EVT SrcVT = SrcOp.getValueType();
25283 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
25284 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
25285 if ((SrcNumElts % DestNumElts) == 0) {
25286 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
25287 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
25288 EVT NewExtVT =
25289 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
25291 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
25292 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25293 V.getOperand(0), NewIndex);
25294 return DAG.getBitcast(NVT, NewExtract);
25295 }
25296 }
25297 if ((DestNumElts % SrcNumElts) == 0) {
25298 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
25299 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
25300 ElementCount NewExtEC =
25301 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
25302 EVT ScalarVT = SrcVT.getScalarType();
25303 if ((ExtIdx % DestSrcRatio) == 0) {
25304 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
25305 EVT NewExtVT =
25306 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
25308 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25309 SDValue NewExtract =
25310 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25311 V.getOperand(0), NewIndex);
25312 return DAG.getBitcast(NVT, NewExtract);
25313 }
25314 if (NewExtEC.isScalar() &&
25316 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25317 SDValue NewExtract =
25318 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
25319 V.getOperand(0), NewIndex);
25320 return DAG.getBitcast(NVT, NewExtract);
25321 }
25322 }
25323 }
25324 }
25325 }
25326
25327 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
25328 unsigned ExtNumElts = NVT.getVectorMinNumElements();
25329 EVT ConcatSrcVT = V.getOperand(0).getValueType();
25330 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
25331 "Concat and extract subvector do not change element type");
25332 assert((ExtIdx % ExtNumElts) == 0 &&
25333 "Extract index is not a multiple of the input vector length.");
25334
25335 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
25336 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
25337
25338 // If the concatenated source types match this extract, it's a direct
25339 // simplification:
25340 // extract_subvec (concat V1, V2, ...), i --> Vi
25341 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
25342 return V.getOperand(ConcatOpIdx);
25343
25344 // If the concatenated source vectors are a multiple length of this extract,
25345 // then extract a fraction of one of those source vectors directly from a
25346 // concat operand. Example:
25347 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
25348 // v2i8 extract_subvec v8i8 Y, 6
25349 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
25350 ConcatSrcNumElts % ExtNumElts == 0) {
25351 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
25352 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
25353 "Trying to extract from >1 concat operand?");
25354 assert(NewExtIdx % ExtNumElts == 0 &&
25355 "Extract index is not a multiple of the input vector length.");
25356 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
25357 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
25358 V.getOperand(ConcatOpIdx), NewIndexC);
25359 }
25360 }
25361
25362 if (SDValue V =
25363 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
25364 return V;
25365
25367
25368 // If the input is a build vector. Try to make a smaller build vector.
25369 if (V.getOpcode() == ISD::BUILD_VECTOR) {
25370 EVT InVT = V.getValueType();
25371 unsigned ExtractSize = NVT.getSizeInBits();
25372 unsigned EltSize = InVT.getScalarSizeInBits();
25373 // Only do this if we won't split any elements.
25374 if (ExtractSize % EltSize == 0) {
25375 unsigned NumElems = ExtractSize / EltSize;
25376 EVT EltVT = InVT.getVectorElementType();
25377 EVT ExtractVT =
25378 NumElems == 1 ? EltVT
25379 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
25380 if ((Level < AfterLegalizeDAG ||
25381 (NumElems == 1 ||
25382 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
25383 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
25384 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
25385
25386 if (NumElems == 1) {
25387 SDValue Src = V->getOperand(IdxVal);
25388 if (EltVT != Src.getValueType())
25389 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
25390 return DAG.getBitcast(NVT, Src);
25391 }
25392
25393 // Extract the pieces from the original build_vector.
25394 SDValue BuildVec =
25395 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
25396 return DAG.getBitcast(NVT, BuildVec);
25397 }
25398 }
25399 }
25400
25401 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25402 // Handle only simple case where vector being inserted and vector
25403 // being extracted are of same size.
25404 EVT SmallVT = V.getOperand(1).getValueType();
25405 if (!NVT.bitsEq(SmallVT))
25406 return SDValue();
25407
25408 // Combine:
25409 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
25410 // Into:
25411 // indices are equal or bit offsets are equal => V1
25412 // otherwise => (extract_subvec V1, ExtIdx)
25413 uint64_t InsIdx = V.getConstantOperandVal(2);
25414 if (InsIdx * SmallVT.getScalarSizeInBits() ==
25415 ExtIdx * NVT.getScalarSizeInBits()) {
25416 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
25417 return SDValue();
25418
25419 return DAG.getBitcast(NVT, V.getOperand(1));
25420 }
25421 return DAG.getNode(
25423 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
25424 N->getOperand(1));
25425 }
25426
25427 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
25428 return NarrowBOp;
25429
25431 return SDValue(N, 0);
25432
25433 return SDValue();
25434}
25435
25436/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
25437/// followed by concatenation. Narrow vector ops may have better performance
25438/// than wide ops, and this can unlock further narrowing of other vector ops.
25439/// Targets can invert this transform later if it is not profitable.
25441 SelectionDAG &DAG) {
25442 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
25443 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
25444 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
25445 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
25446 return SDValue();
25447
25448 // Split the wide shuffle mask into halves. Any mask element that is accessing
25449 // operand 1 is offset down to account for narrowing of the vectors.
25450 ArrayRef<int> Mask = Shuf->getMask();
25451 EVT VT = Shuf->getValueType(0);
25452 unsigned NumElts = VT.getVectorNumElements();
25453 unsigned HalfNumElts = NumElts / 2;
25454 SmallVector<int, 16> Mask0(HalfNumElts, -1);
25455 SmallVector<int, 16> Mask1(HalfNumElts, -1);
25456 for (unsigned i = 0; i != NumElts; ++i) {
25457 if (Mask[i] == -1)
25458 continue;
25459 // If we reference the upper (undef) subvector then the element is undef.
25460 if ((Mask[i] % NumElts) >= HalfNumElts)
25461 continue;
25462 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
25463 if (i < HalfNumElts)
25464 Mask0[i] = M;
25465 else
25466 Mask1[i - HalfNumElts] = M;
25467 }
25468
25469 // Ask the target if this is a valid transform.
25470 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25471 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
25472 HalfNumElts);
25473 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
25474 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
25475 return SDValue();
25476
25477 // shuffle (concat X, undef), (concat Y, undef), Mask -->
25478 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
25479 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
25480 SDLoc DL(Shuf);
25481 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
25482 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
25483 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
25484}
25485
25486// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
25487// or turn a shuffle of a single concat into simpler shuffle then concat.
25489 EVT VT = N->getValueType(0);
25490 unsigned NumElts = VT.getVectorNumElements();
25491
25492 SDValue N0 = N->getOperand(0);
25493 SDValue N1 = N->getOperand(1);
25494 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25495 ArrayRef<int> Mask = SVN->getMask();
25496
25498 EVT ConcatVT = N0.getOperand(0).getValueType();
25499 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
25500 unsigned NumConcats = NumElts / NumElemsPerConcat;
25501
25502 auto IsUndefMaskElt = [](int i) { return i == -1; };
25503
25504 // Special case: shuffle(concat(A,B)) can be more efficiently represented
25505 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
25506 // half vector elements.
25507 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
25508 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
25509 IsUndefMaskElt)) {
25510 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
25511 N0.getOperand(1),
25512 Mask.slice(0, NumElemsPerConcat));
25513 N1 = DAG.getUNDEF(ConcatVT);
25514 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
25515 }
25516
25517 // Look at every vector that's inserted. We're looking for exact
25518 // subvector-sized copies from a concatenated vector
25519 for (unsigned I = 0; I != NumConcats; ++I) {
25520 unsigned Begin = I * NumElemsPerConcat;
25521 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25522
25523 // Make sure we're dealing with a copy.
25524 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25525 Ops.push_back(DAG.getUNDEF(ConcatVT));
25526 continue;
25527 }
25528
25529 int OpIdx = -1;
25530 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25531 if (IsUndefMaskElt(SubMask[i]))
25532 continue;
25533 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25534 return SDValue();
25535 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25536 if (0 <= OpIdx && EltOpIdx != OpIdx)
25537 return SDValue();
25538 OpIdx = EltOpIdx;
25539 }
25540 assert(0 <= OpIdx && "Unknown concat_vectors op");
25541
25542 if (OpIdx < (int)N0.getNumOperands())
25543 Ops.push_back(N0.getOperand(OpIdx));
25544 else
25545 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25546 }
25547
25548 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25549}
25550
25551// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25552// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25553//
25554// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25555// a simplification in some sense, but it isn't appropriate in general: some
25556// BUILD_VECTORs are substantially cheaper than others. The general case
25557// of a BUILD_VECTOR requires inserting each element individually (or
25558// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25559// all constants is a single constant pool load. A BUILD_VECTOR where each
25560// element is identical is a splat. A BUILD_VECTOR where most of the operands
25561// are undef lowers to a small number of element insertions.
25562//
25563// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25564// We don't fold shuffles where one side is a non-zero constant, and we don't
25565// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25566// non-constant operands. This seems to work out reasonably well in practice.
25568 SelectionDAG &DAG,
25569 const TargetLowering &TLI) {
25570 EVT VT = SVN->getValueType(0);
25571 unsigned NumElts = VT.getVectorNumElements();
25572 SDValue N0 = SVN->getOperand(0);
25573 SDValue N1 = SVN->getOperand(1);
25574
25575 if (!N0->hasOneUse())
25576 return SDValue();
25577
25578 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25579 // discussed above.
25580 if (!N1.isUndef()) {
25581 if (!N1->hasOneUse())
25582 return SDValue();
25583
25584 bool N0AnyConst = isAnyConstantBuildVector(N0);
25585 bool N1AnyConst = isAnyConstantBuildVector(N1);
25586 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25587 return SDValue();
25588 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25589 return SDValue();
25590 }
25591
25592 // If both inputs are splats of the same value then we can safely merge this
25593 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25594 bool IsSplat = false;
25595 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25596 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25597 if (BV0 && BV1)
25598 if (SDValue Splat0 = BV0->getSplatValue())
25599 IsSplat = (Splat0 == BV1->getSplatValue());
25600
25602 SmallSet<SDValue, 16> DuplicateOps;
25603 for (int M : SVN->getMask()) {
25604 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25605 if (M >= 0) {
25606 int Idx = M < (int)NumElts ? M : M - NumElts;
25607 SDValue &S = (M < (int)NumElts ? N0 : N1);
25608 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25609 Op = S.getOperand(Idx);
25610 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25611 SDValue Op0 = S.getOperand(0);
25612 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25613 } else {
25614 // Operand can't be combined - bail out.
25615 return SDValue();
25616 }
25617 }
25618
25619 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25620 // generating a splat; semantically, this is fine, but it's likely to
25621 // generate low-quality code if the target can't reconstruct an appropriate
25622 // shuffle.
25623 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25624 if (!IsSplat && !DuplicateOps.insert(Op).second)
25625 return SDValue();
25626
25627 Ops.push_back(Op);
25628 }
25629
25630 // BUILD_VECTOR requires all inputs to be of the same type, find the
25631 // maximum type and extend them all.
25632 EVT SVT = VT.getScalarType();
25633 if (SVT.isInteger())
25634 for (SDValue &Op : Ops)
25635 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25636 if (SVT != VT.getScalarType())
25637 for (SDValue &Op : Ops)
25638 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25639 : (TLI.isZExtFree(Op.getValueType(), SVT)
25640 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25641 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25642 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25643}
25644
25645// Match shuffles that can be converted to *_vector_extend_in_reg.
25646// This is often generated during legalization.
25647// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25648// and returns the EVT to which the extension should be performed.
25649// NOTE: this assumes that the src is the first operand of the shuffle.
25651 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25652 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25653 bool LegalOperations) {
25654 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25655
25656 // TODO Add support for big-endian when we have a test case.
25657 if (!VT.isInteger() || IsBigEndian)
25658 return std::nullopt;
25659
25660 unsigned NumElts = VT.getVectorNumElements();
25661 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25662
25663 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25664 // power-of-2 extensions as they are the most likely.
25665 // FIXME: should try Scale == NumElts case too,
25666 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25667 // The vector width must be a multiple of Scale.
25668 if (NumElts % Scale != 0)
25669 continue;
25670
25671 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25672 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25673
25674 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25675 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25676 continue;
25677
25678 if (Match(Scale))
25679 return OutVT;
25680 }
25681
25682 return std::nullopt;
25683}
25684
25685// Match shuffles that can be converted to any_vector_extend_in_reg.
25686// This is often generated during legalization.
25687// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25689 SelectionDAG &DAG,
25690 const TargetLowering &TLI,
25691 bool LegalOperations) {
25692 EVT VT = SVN->getValueType(0);
25693 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25694
25695 // TODO Add support for big-endian when we have a test case.
25696 if (!VT.isInteger() || IsBigEndian)
25697 return SDValue();
25698
25699 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25700 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25701 Mask = SVN->getMask()](unsigned Scale) {
25702 for (unsigned i = 0; i != NumElts; ++i) {
25703 if (Mask[i] < 0)
25704 continue;
25705 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25706 continue;
25707 return false;
25708 }
25709 return true;
25710 };
25711
25712 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25713 SDValue N0 = SVN->getOperand(0);
25714 // Never create an illegal type. Only create unsupported operations if we
25715 // are pre-legalization.
25716 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25717 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25718 if (!OutVT)
25719 return SDValue();
25720 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25721}
25722
25723// Match shuffles that can be converted to zero_extend_vector_inreg.
25724// This is often generated during legalization.
25725// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25727 SelectionDAG &DAG,
25728 const TargetLowering &TLI,
25729 bool LegalOperations) {
25730 bool LegalTypes = true;
25731 EVT VT = SVN->getValueType(0);
25732 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25733 unsigned NumElts = VT.getVectorNumElements();
25734 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25735
25736 // TODO: add support for big-endian when we have a test case.
25737 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25738 if (!VT.isInteger() || IsBigEndian)
25739 return SDValue();
25740
25741 SmallVector<int, 16> Mask(SVN->getMask());
25742 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25743 for (int &Indice : Mask) {
25744 if (Indice < 0)
25745 continue;
25746 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25747 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25748 Fn(Indice, OpIdx, OpEltIdx);
25749 }
25750 };
25751
25752 // Which elements of which operand does this shuffle demand?
25753 std::array<APInt, 2> OpsDemandedElts;
25754 for (APInt &OpDemandedElts : OpsDemandedElts)
25755 OpDemandedElts = APInt::getZero(NumElts);
25756 ForEachDecomposedIndice(
25757 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25758 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25759 });
25760
25761 // Element-wise(!), which of these demanded elements are know to be zero?
25762 std::array<APInt, 2> OpsKnownZeroElts;
25763 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25764 std::get<2>(I) =
25765 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25766
25767 // Manifest zeroable element knowledge in the shuffle mask.
25768 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25769 // this is a local invention, but it won't leak into DAG.
25770 // FIXME: should we not manifest them, but just check when matching?
25771 bool HadZeroableElts = false;
25772 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25773 int &Indice, int OpIdx, int OpEltIdx) {
25774 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25775 Indice = -2; // Zeroable element.
25776 HadZeroableElts = true;
25777 }
25778 });
25779
25780 // Don't proceed unless we've refined at least one zeroable mask indice.
25781 // If we didn't, then we are still trying to match the same shuffle mask
25782 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25783 // and evidently failed. Proceeding will lead to endless combine loops.
25784 if (!HadZeroableElts)
25785 return SDValue();
25786
25787 // The shuffle may be more fine-grained than we want. Widen elements first.
25788 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25789 SmallVector<int, 16> ScaledMask;
25790 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25791 assert(Mask.size() >= ScaledMask.size() &&
25792 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25793 int Prescale = Mask.size() / ScaledMask.size();
25794
25795 NumElts = ScaledMask.size();
25796 EltSizeInBits *= Prescale;
25797
25798 EVT PrescaledVT = EVT::getVectorVT(
25799 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25800 NumElts);
25801
25802 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25803 return SDValue();
25804
25805 // For example,
25806 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25807 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25808 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25809 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25810 "Unexpected mask scaling factor.");
25811 ArrayRef<int> Mask = ScaledMask;
25812 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25813 SrcElt != NumSrcElts; ++SrcElt) {
25814 // Analyze the shuffle mask in Scale-sized chunks.
25815 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25816 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25817 Mask = Mask.drop_front(MaskChunk.size());
25818 // The first indice in this chunk must be SrcElt, but not zero!
25819 // FIXME: undef should be fine, but that results in more-defined result.
25820 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25821 return false;
25822 // The rest of the indices in this chunk must be zeros.
25823 // FIXME: undef should be fine, but that results in more-defined result.
25824 if (!all_of(MaskChunk.drop_front(1),
25825 [](int Indice) { return Indice == -2; }))
25826 return false;
25827 }
25828 assert(Mask.empty() && "Did not process the whole mask?");
25829 return true;
25830 };
25831
25832 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25833 for (bool Commuted : {false, true}) {
25834 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25835 if (Commuted)
25837 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25838 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25839 LegalOperations);
25840 if (OutVT)
25841 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25842 DAG.getBitcast(PrescaledVT, Op)));
25843 }
25844 return SDValue();
25845}
25846
25847// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25848// each source element of a large type into the lowest elements of a smaller
25849// destination type. This is often generated during legalization.
25850// If the source node itself was a '*_extend_vector_inreg' node then we should
25851// then be able to remove it.
25853 SelectionDAG &DAG) {
25854 EVT VT = SVN->getValueType(0);
25855 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25856
25857 // TODO Add support for big-endian when we have a test case.
25858 if (!VT.isInteger() || IsBigEndian)
25859 return SDValue();
25860
25862
25863 unsigned Opcode = N0.getOpcode();
25864 if (!ISD::isExtVecInRegOpcode(Opcode))
25865 return SDValue();
25866
25867 SDValue N00 = N0.getOperand(0);
25868 ArrayRef<int> Mask = SVN->getMask();
25869 unsigned NumElts = VT.getVectorNumElements();
25870 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25871 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25872 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25873
25874 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25875 return SDValue();
25876 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25877
25878 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25879 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25880 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25881 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25882 for (unsigned i = 0; i != NumElts; ++i) {
25883 if (Mask[i] < 0)
25884 continue;
25885 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25886 continue;
25887 return false;
25888 }
25889 return true;
25890 };
25891
25892 // At the moment we just handle the case where we've truncated back to the
25893 // same size as before the extension.
25894 // TODO: handle more extension/truncation cases as cases arise.
25895 if (EltSizeInBits != ExtSrcSizeInBits)
25896 return SDValue();
25897
25898 // We can remove *extend_vector_inreg only if the truncation happens at
25899 // the same scale as the extension.
25900 if (isTruncate(ExtScale))
25901 return DAG.getBitcast(VT, N00);
25902
25903 return SDValue();
25904}
25905
25906// Combine shuffles of splat-shuffles of the form:
25907// shuffle (shuffle V, undef, splat-mask), undef, M
25908// If splat-mask contains undef elements, we need to be careful about
25909// introducing undef's in the folded mask which are not the result of composing
25910// the masks of the shuffles.
25912 SelectionDAG &DAG) {
25913 EVT VT = Shuf->getValueType(0);
25914 unsigned NumElts = VT.getVectorNumElements();
25915
25916 if (!Shuf->getOperand(1).isUndef())
25917 return SDValue();
25918
25919 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25920 // in disguise, with all demanded elements being identical.
25921 // FIXME: this can be done per-operand.
25922 if (!Shuf->isSplat()) {
25923 APInt DemandedElts(NumElts, 0);
25924 for (int Idx : Shuf->getMask()) {
25925 if (Idx < 0)
25926 continue; // Ignore sentinel indices.
25927 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25928 DemandedElts.setBit(Idx);
25929 }
25930 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25931 APInt UndefElts;
25932 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25933 // Even if all demanded elements are splat, some of them could be undef.
25934 // Which lowest demanded element is *not* known-undef?
25935 std::optional<unsigned> MinNonUndefIdx;
25936 for (int Idx : Shuf->getMask()) {
25937 if (Idx < 0 || UndefElts[Idx])
25938 continue; // Ignore sentinel indices, and undef elements.
25939 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25940 }
25941 if (!MinNonUndefIdx)
25942 return DAG.getUNDEF(VT); // All undef - result is undef.
25943 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25944 SmallVector<int, 8> SplatMask(Shuf->getMask());
25945 for (int &Idx : SplatMask) {
25946 if (Idx < 0)
25947 continue; // Passthrough sentinel indices.
25948 // Otherwise, just pick the lowest demanded non-undef element.
25949 // Or sentinel undef, if we know we'd pick a known-undef element.
25950 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25951 }
25952 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25953 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25954 Shuf->getOperand(1), SplatMask);
25955 }
25956 }
25957
25958 // If the inner operand is a known splat with no undefs, just return that directly.
25959 // TODO: Create DemandedElts mask from Shuf's mask.
25960 // TODO: Allow undef elements and merge with the shuffle code below.
25961 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25962 return Shuf->getOperand(0);
25963
25964 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25965 if (!Splat || !Splat->isSplat())
25966 return SDValue();
25967
25968 ArrayRef<int> ShufMask = Shuf->getMask();
25969 ArrayRef<int> SplatMask = Splat->getMask();
25970 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25971
25972 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25973 // every undef mask element in the splat-shuffle has a corresponding undef
25974 // element in the user-shuffle's mask or if the composition of mask elements
25975 // would result in undef.
25976 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25977 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25978 // In this case it is not legal to simplify to the splat-shuffle because we
25979 // may be exposing the users of the shuffle an undef element at index 1
25980 // which was not there before the combine.
25981 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25982 // In this case the composition of masks yields SplatMask, so it's ok to
25983 // simplify to the splat-shuffle.
25984 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25985 // In this case the composed mask includes all undef elements of SplatMask
25986 // and in addition sets element zero to undef. It is safe to simplify to
25987 // the splat-shuffle.
25988 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25989 ArrayRef<int> SplatMask) {
25990 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25991 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25992 SplatMask[UserMask[i]] != -1)
25993 return false;
25994 return true;
25995 };
25996 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25997 return Shuf->getOperand(0);
25998
25999 // Create a new shuffle with a mask that is composed of the two shuffles'
26000 // masks.
26001 SmallVector<int, 32> NewMask;
26002 for (int Idx : ShufMask)
26003 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26004
26005 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26006 Splat->getOperand(0), Splat->getOperand(1),
26007 NewMask);
26008}
26009
26010// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26011// the mask can be treated as a larger type.
26013 SelectionDAG &DAG,
26014 const TargetLowering &TLI,
26015 bool LegalOperations) {
26016 SDValue Op0 = SVN->getOperand(0);
26017 SDValue Op1 = SVN->getOperand(1);
26018 EVT VT = SVN->getValueType(0);
26019 if (Op0.getOpcode() != ISD::BITCAST)
26020 return SDValue();
26021 EVT InVT = Op0.getOperand(0).getValueType();
26022 if (!InVT.isVector() ||
26023 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26024 Op1.getOperand(0).getValueType() != InVT)))
26025 return SDValue();
26027 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26028 return SDValue();
26029
26030 int VTLanes = VT.getVectorNumElements();
26031 int InLanes = InVT.getVectorNumElements();
26032 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26033 (LegalOperations &&
26035 return SDValue();
26036 int Factor = VTLanes / InLanes;
26037
26038 // Check that each group of lanes in the mask are either undef or make a valid
26039 // mask for the wider lane type.
26040 ArrayRef<int> Mask = SVN->getMask();
26041 SmallVector<int> NewMask;
26042 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26043 return SDValue();
26044
26045 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26046 return SDValue();
26047
26048 // Create the new shuffle with the new mask and bitcast it back to the
26049 // original type.
26050 SDLoc DL(SVN);
26051 Op0 = Op0.getOperand(0);
26052 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26053 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26054 return DAG.getBitcast(VT, NewShuf);
26055}
26056
26057/// Combine shuffle of shuffle of the form:
26058/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26060 SelectionDAG &DAG) {
26061 if (!OuterShuf->getOperand(1).isUndef())
26062 return SDValue();
26063 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26064 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26065 return SDValue();
26066
26067 ArrayRef<int> OuterMask = OuterShuf->getMask();
26068 ArrayRef<int> InnerMask = InnerShuf->getMask();
26069 unsigned NumElts = OuterMask.size();
26070 assert(NumElts == InnerMask.size() && "Mask length mismatch");
26071 SmallVector<int, 32> CombinedMask(NumElts, -1);
26072 int SplatIndex = -1;
26073 for (unsigned i = 0; i != NumElts; ++i) {
26074 // Undef lanes remain undef.
26075 int OuterMaskElt = OuterMask[i];
26076 if (OuterMaskElt == -1)
26077 continue;
26078
26079 // Peek through the shuffle masks to get the underlying source element.
26080 int InnerMaskElt = InnerMask[OuterMaskElt];
26081 if (InnerMaskElt == -1)
26082 continue;
26083
26084 // Initialize the splatted element.
26085 if (SplatIndex == -1)
26086 SplatIndex = InnerMaskElt;
26087
26088 // Non-matching index - this is not a splat.
26089 if (SplatIndex != InnerMaskElt)
26090 return SDValue();
26091
26092 CombinedMask[i] = InnerMaskElt;
26093 }
26094 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26095 getSplatIndex(CombinedMask) != -1) &&
26096 "Expected a splat mask");
26097
26098 // TODO: The transform may be a win even if the mask is not legal.
26099 EVT VT = OuterShuf->getValueType(0);
26100 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26101 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
26102 return SDValue();
26103
26104 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26105 InnerShuf->getOperand(1), CombinedMask);
26106}
26107
26108/// If the shuffle mask is taking exactly one element from the first vector
26109/// operand and passing through all other elements from the second vector
26110/// operand, return the index of the mask element that is choosing an element
26111/// from the first operand. Otherwise, return -1.
26113 int MaskSize = Mask.size();
26114 int EltFromOp0 = -1;
26115 // TODO: This does not match if there are undef elements in the shuffle mask.
26116 // Should we ignore undefs in the shuffle mask instead? The trade-off is
26117 // removing an instruction (a shuffle), but losing the knowledge that some
26118 // vector lanes are not needed.
26119 for (int i = 0; i != MaskSize; ++i) {
26120 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
26121 // We're looking for a shuffle of exactly one element from operand 0.
26122 if (EltFromOp0 != -1)
26123 return -1;
26124 EltFromOp0 = i;
26125 } else if (Mask[i] != i + MaskSize) {
26126 // Nothing from operand 1 can change lanes.
26127 return -1;
26128 }
26129 }
26130 return EltFromOp0;
26131}
26132
26133/// If a shuffle inserts exactly one element from a source vector operand into
26134/// another vector operand and we can access the specified element as a scalar,
26135/// then we can eliminate the shuffle.
26137 SelectionDAG &DAG) {
26138 // First, check if we are taking one element of a vector and shuffling that
26139 // element into another vector.
26140 ArrayRef<int> Mask = Shuf->getMask();
26141 SmallVector<int, 16> CommutedMask(Mask);
26142 SDValue Op0 = Shuf->getOperand(0);
26143 SDValue Op1 = Shuf->getOperand(1);
26144 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
26145 if (ShufOp0Index == -1) {
26146 // Commute mask and check again.
26148 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
26149 if (ShufOp0Index == -1)
26150 return SDValue();
26151 // Commute operands to match the commuted shuffle mask.
26152 std::swap(Op0, Op1);
26153 Mask = CommutedMask;
26154 }
26155
26156 // The shuffle inserts exactly one element from operand 0 into operand 1.
26157 // Now see if we can access that element as a scalar via a real insert element
26158 // instruction.
26159 // TODO: We can try harder to locate the element as a scalar. Examples: it
26160 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
26161 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
26162 "Shuffle mask value must be from operand 0");
26163
26164 SDValue Elt;
26165 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
26166 m_SpecificInt(Mask[ShufOp0Index])))) {
26167 // There's an existing insertelement with constant insertion index, so we
26168 // don't need to check the legality/profitability of a replacement operation
26169 // that differs at most in the constant value. The target should be able to
26170 // lower any of those in a similar way. If not, legalization will expand
26171 // this to a scalar-to-vector plus shuffle.
26172 //
26173 // Note that the shuffle may move the scalar from the position that the
26174 // insert element used. Therefore, our new insert element occurs at the
26175 // shuffle's mask index value, not the insert's index value.
26176 //
26177 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26178 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
26179 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
26180 Op1, Elt, NewInsIndex);
26181 }
26182
26183 return SDValue();
26184}
26185
26186/// If we have a unary shuffle of a shuffle, see if it can be folded away
26187/// completely. This has the potential to lose undef knowledge because the first
26188/// shuffle may not have an undef mask element where the second one does. So
26189/// only call this after doing simplifications based on demanded elements.
26191 // shuf (shuf0 X, Y, Mask0), undef, Mask
26192 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26193 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
26194 return SDValue();
26195
26196 ArrayRef<int> Mask = Shuf->getMask();
26197 ArrayRef<int> Mask0 = Shuf0->getMask();
26198 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
26199 // Ignore undef elements.
26200 if (Mask[i] == -1)
26201 continue;
26202 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
26203
26204 // Is the element of the shuffle operand chosen by this shuffle the same as
26205 // the element chosen by the shuffle operand itself?
26206 if (Mask0[Mask[i]] != Mask0[i])
26207 return SDValue();
26208 }
26209 // Every element of this shuffle is identical to the result of the previous
26210 // shuffle, so we can replace this value.
26211 return Shuf->getOperand(0);
26212}
26213
26214SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
26215 EVT VT = N->getValueType(0);
26216 unsigned NumElts = VT.getVectorNumElements();
26217
26218 SDValue N0 = N->getOperand(0);
26219 SDValue N1 = N->getOperand(1);
26220
26221 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
26222
26223 // Canonicalize shuffle undef, undef -> undef
26224 if (N0.isUndef() && N1.isUndef())
26225 return DAG.getUNDEF(VT);
26226
26227 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
26228
26229 // Canonicalize shuffle v, v -> v, undef
26230 if (N0 == N1)
26231 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
26232 createUnaryMask(SVN->getMask(), NumElts));
26233
26234 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
26235 if (N0.isUndef())
26236 return DAG.getCommutedVectorShuffle(*SVN);
26237
26238 // Remove references to rhs if it is undef
26239 if (N1.isUndef()) {
26240 bool Changed = false;
26241 SmallVector<int, 8> NewMask;
26242 for (unsigned i = 0; i != NumElts; ++i) {
26243 int Idx = SVN->getMaskElt(i);
26244 if (Idx >= (int)NumElts) {
26245 Idx = -1;
26246 Changed = true;
26247 }
26248 NewMask.push_back(Idx);
26249 }
26250 if (Changed)
26251 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
26252 }
26253
26254 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
26255 return InsElt;
26256
26257 // A shuffle of a single vector that is a splatted value can always be folded.
26258 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
26259 return V;
26260
26261 if (SDValue V = formSplatFromShuffles(SVN, DAG))
26262 return V;
26263
26264 // If it is a splat, check if the argument vector is another splat or a
26265 // build_vector.
26266 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
26267 int SplatIndex = SVN->getSplatIndex();
26268 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
26269 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
26270 // splat (vector_bo L, R), Index -->
26271 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
26272 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
26273 SDLoc DL(N);
26274 EVT EltVT = VT.getScalarType();
26275 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
26276 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
26277 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
26278 SDValue NewBO =
26279 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
26280 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
26282 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
26283 }
26284
26285 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
26286 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
26287 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
26288 N0.hasOneUse()) {
26289 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
26290 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
26291
26293 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
26294 if (Idx->getAPIntValue() == SplatIndex)
26295 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
26296
26297 // Look through a bitcast if LE and splatting lane 0, through to a
26298 // scalar_to_vector or a build_vector.
26299 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
26300 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
26303 EVT N00VT = N0.getOperand(0).getValueType();
26304 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
26305 VT.isInteger() && N00VT.isInteger()) {
26306 EVT InVT =
26309 SDLoc(N), InVT);
26310 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
26311 }
26312 }
26313 }
26314
26315 // If this is a bit convert that changes the element type of the vector but
26316 // not the number of vector elements, look through it. Be careful not to
26317 // look though conversions that change things like v4f32 to v2f64.
26318 SDNode *V = N0.getNode();
26319 if (V->getOpcode() == ISD::BITCAST) {
26320 SDValue ConvInput = V->getOperand(0);
26321 if (ConvInput.getValueType().isVector() &&
26322 ConvInput.getValueType().getVectorNumElements() == NumElts)
26323 V = ConvInput.getNode();
26324 }
26325
26326 if (V->getOpcode() == ISD::BUILD_VECTOR) {
26327 assert(V->getNumOperands() == NumElts &&
26328 "BUILD_VECTOR has wrong number of operands");
26329 SDValue Base;
26330 bool AllSame = true;
26331 for (unsigned i = 0; i != NumElts; ++i) {
26332 if (!V->getOperand(i).isUndef()) {
26333 Base = V->getOperand(i);
26334 break;
26335 }
26336 }
26337 // Splat of <u, u, u, u>, return <u, u, u, u>
26338 if (!Base.getNode())
26339 return N0;
26340 for (unsigned i = 0; i != NumElts; ++i) {
26341 if (V->getOperand(i) != Base) {
26342 AllSame = false;
26343 break;
26344 }
26345 }
26346 // Splat of <x, x, x, x>, return <x, x, x, x>
26347 if (AllSame)
26348 return N0;
26349
26350 // Canonicalize any other splat as a build_vector.
26351 SDValue Splatted = V->getOperand(SplatIndex);
26352 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
26353 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
26354
26355 // We may have jumped through bitcasts, so the type of the
26356 // BUILD_VECTOR may not match the type of the shuffle.
26357 if (V->getValueType(0) != VT)
26358 NewBV = DAG.getBitcast(VT, NewBV);
26359 return NewBV;
26360 }
26361 }
26362
26363 // Simplify source operands based on shuffle mask.
26365 return SDValue(N, 0);
26366
26367 // This is intentionally placed after demanded elements simplification because
26368 // it could eliminate knowledge of undef elements created by this shuffle.
26369 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
26370 return ShufOp;
26371
26372 // Match shuffles that can be converted to any_vector_extend_in_reg.
26373 if (SDValue V =
26374 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
26375 return V;
26376
26377 // Combine "truncate_vector_in_reg" style shuffles.
26378 if (SDValue V = combineTruncationShuffle(SVN, DAG))
26379 return V;
26380
26381 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
26382 Level < AfterLegalizeVectorOps &&
26383 (N1.isUndef() ||
26384 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
26385 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
26386 if (SDValue V = partitionShuffleOfConcats(N, DAG))
26387 return V;
26388 }
26389
26390 // A shuffle of a concat of the same narrow vector can be reduced to use
26391 // only low-half elements of a concat with undef:
26392 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
26393 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
26394 N0.getNumOperands() == 2 &&
26395 N0.getOperand(0) == N0.getOperand(1)) {
26396 int HalfNumElts = (int)NumElts / 2;
26397 SmallVector<int, 8> NewMask;
26398 for (unsigned i = 0; i != NumElts; ++i) {
26399 int Idx = SVN->getMaskElt(i);
26400 if (Idx >= HalfNumElts) {
26401 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
26402 Idx -= HalfNumElts;
26403 }
26404 NewMask.push_back(Idx);
26405 }
26406 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
26407 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
26408 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
26409 N0.getOperand(0), UndefVec);
26410 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
26411 }
26412 }
26413
26414 // See if we can replace a shuffle with an insert_subvector.
26415 // e.g. v2i32 into v8i32:
26416 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
26417 // --> insert_subvector(lhs,rhs1,4).
26418 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
26420 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
26421 // Ensure RHS subvectors are legal.
26422 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
26423 EVT SubVT = RHS.getOperand(0).getValueType();
26424 int NumSubVecs = RHS.getNumOperands();
26425 int NumSubElts = SubVT.getVectorNumElements();
26426 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
26427 if (!TLI.isTypeLegal(SubVT))
26428 return SDValue();
26429
26430 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
26431 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
26432 return SDValue();
26433
26434 // Search [NumSubElts] spans for RHS sequence.
26435 // TODO: Can we avoid nested loops to increase performance?
26436 SmallVector<int> InsertionMask(NumElts);
26437 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
26438 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
26439 // Reset mask to identity.
26440 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
26441
26442 // Add subvector insertion.
26443 std::iota(InsertionMask.begin() + SubIdx,
26444 InsertionMask.begin() + SubIdx + NumSubElts,
26445 NumElts + (SubVec * NumSubElts));
26446
26447 // See if the shuffle mask matches the reference insertion mask.
26448 bool MatchingShuffle = true;
26449 for (int i = 0; i != (int)NumElts; ++i) {
26450 int ExpectIdx = InsertionMask[i];
26451 int ActualIdx = Mask[i];
26452 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
26453 MatchingShuffle = false;
26454 break;
26455 }
26456 }
26457
26458 if (MatchingShuffle)
26459 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
26460 RHS.getOperand(SubVec),
26461 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
26462 }
26463 }
26464 return SDValue();
26465 };
26466 ArrayRef<int> Mask = SVN->getMask();
26467 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
26468 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
26469 return InsertN1;
26470 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
26471 SmallVector<int> CommuteMask(Mask);
26473 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
26474 return InsertN0;
26475 }
26476 }
26477
26478 // If we're not performing a select/blend shuffle, see if we can convert the
26479 // shuffle into a AND node, with all the out-of-lane elements are known zero.
26480 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26481 bool IsInLaneMask = true;
26482 ArrayRef<int> Mask = SVN->getMask();
26483 SmallVector<int, 16> ClearMask(NumElts, -1);
26484 APInt DemandedLHS = APInt::getZero(NumElts);
26485 APInt DemandedRHS = APInt::getZero(NumElts);
26486 for (int I = 0; I != (int)NumElts; ++I) {
26487 int M = Mask[I];
26488 if (M < 0)
26489 continue;
26490 ClearMask[I] = M == I ? I : (I + NumElts);
26491 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
26492 if (M != I) {
26493 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
26494 Demanded.setBit(M % NumElts);
26495 }
26496 }
26497 // TODO: Should we try to mask with N1 as well?
26498 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
26499 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
26500 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
26501 SDLoc DL(N);
26504 // Transform the type to a legal type so that the buildvector constant
26505 // elements are not illegal. Make sure that the result is larger than the
26506 // original type, incase the value is split into two (eg i64->i32).
26507 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
26508 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
26509 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
26510 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
26511 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
26512 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
26513 for (int I = 0; I != (int)NumElts; ++I)
26514 if (0 <= Mask[I])
26515 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
26516
26517 // See if a clear mask is legal instead of going via
26518 // XformToShuffleWithZero which loses UNDEF mask elements.
26519 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26520 return DAG.getBitcast(
26521 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26522 DAG.getConstant(0, DL, IntVT), ClearMask));
26523
26524 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26525 return DAG.getBitcast(
26526 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26527 DAG.getBuildVector(IntVT, DL, AndMask)));
26528 }
26529 }
26530 }
26531
26532 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26533 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26534 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26535 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26536 return Res;
26537
26538 // If this shuffle only has a single input that is a bitcasted shuffle,
26539 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26540 // back to their original types.
26541 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26542 N1.isUndef() && Level < AfterLegalizeVectorOps &&
26543 TLI.isTypeLegal(VT)) {
26544
26546 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26547 EVT SVT = VT.getScalarType();
26548 EVT InnerVT = BC0->getValueType(0);
26549 EVT InnerSVT = InnerVT.getScalarType();
26550
26551 // Determine which shuffle works with the smaller scalar type.
26552 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26553 EVT ScaleSVT = ScaleVT.getScalarType();
26554
26555 if (TLI.isTypeLegal(ScaleVT) &&
26556 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26557 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26558 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26559 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26560
26561 // Scale the shuffle masks to the smaller scalar type.
26562 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26563 SmallVector<int, 8> InnerMask;
26564 SmallVector<int, 8> OuterMask;
26565 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26566 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26567
26568 // Merge the shuffle masks.
26569 SmallVector<int, 8> NewMask;
26570 for (int M : OuterMask)
26571 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26572
26573 // Test for shuffle mask legality over both commutations.
26574 SDValue SV0 = BC0->getOperand(0);
26575 SDValue SV1 = BC0->getOperand(1);
26576 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26577 if (!LegalMask) {
26578 std::swap(SV0, SV1);
26580 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26581 }
26582
26583 if (LegalMask) {
26584 SV0 = DAG.getBitcast(ScaleVT, SV0);
26585 SV1 = DAG.getBitcast(ScaleVT, SV1);
26586 return DAG.getBitcast(
26587 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26588 }
26589 }
26590 }
26591 }
26592
26593 // Match shuffles of bitcasts, so long as the mask can be treated as the
26594 // larger type.
26595 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26596 return V;
26597
26598 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26599 // operand, and SV1 as the second operand.
26600 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26601 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26602 auto MergeInnerShuffle =
26603 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26604 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26605 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26606 SmallVectorImpl<int> &Mask) -> bool {
26607 // Don't try to fold splats; they're likely to simplify somehow, or they
26608 // might be free.
26609 if (OtherSVN->isSplat())
26610 return false;
26611
26612 SV0 = SV1 = SDValue();
26613 Mask.clear();
26614
26615 for (unsigned i = 0; i != NumElts; ++i) {
26616 int Idx = SVN->getMaskElt(i);
26617 if (Idx < 0) {
26618 // Propagate Undef.
26619 Mask.push_back(Idx);
26620 continue;
26621 }
26622
26623 if (Commute)
26624 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26625
26626 SDValue CurrentVec;
26627 if (Idx < (int)NumElts) {
26628 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26629 // shuffle mask to identify which vector is actually referenced.
26630 Idx = OtherSVN->getMaskElt(Idx);
26631 if (Idx < 0) {
26632 // Propagate Undef.
26633 Mask.push_back(Idx);
26634 continue;
26635 }
26636 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26637 : OtherSVN->getOperand(1);
26638 } else {
26639 // This shuffle index references an element within N1.
26640 CurrentVec = N1;
26641 }
26642
26643 // Simple case where 'CurrentVec' is UNDEF.
26644 if (CurrentVec.isUndef()) {
26645 Mask.push_back(-1);
26646 continue;
26647 }
26648
26649 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26650 // will be the first or second operand of the combined shuffle.
26651 Idx = Idx % NumElts;
26652 if (!SV0.getNode() || SV0 == CurrentVec) {
26653 // Ok. CurrentVec is the left hand side.
26654 // Update the mask accordingly.
26655 SV0 = CurrentVec;
26656 Mask.push_back(Idx);
26657 continue;
26658 }
26659 if (!SV1.getNode() || SV1 == CurrentVec) {
26660 // Ok. CurrentVec is the right hand side.
26661 // Update the mask accordingly.
26662 SV1 = CurrentVec;
26663 Mask.push_back(Idx + NumElts);
26664 continue;
26665 }
26666
26667 // Last chance - see if the vector is another shuffle and if it
26668 // uses one of the existing candidate shuffle ops.
26669 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26670 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26671 if (InnerIdx < 0) {
26672 Mask.push_back(-1);
26673 continue;
26674 }
26675 SDValue InnerVec = (InnerIdx < (int)NumElts)
26676 ? CurrentSVN->getOperand(0)
26677 : CurrentSVN->getOperand(1);
26678 if (InnerVec.isUndef()) {
26679 Mask.push_back(-1);
26680 continue;
26681 }
26682 InnerIdx %= NumElts;
26683 if (InnerVec == SV0) {
26684 Mask.push_back(InnerIdx);
26685 continue;
26686 }
26687 if (InnerVec == SV1) {
26688 Mask.push_back(InnerIdx + NumElts);
26689 continue;
26690 }
26691 }
26692
26693 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26694 return false;
26695 }
26696
26697 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26698 return true;
26699
26700 // Avoid introducing shuffles with illegal mask.
26701 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26702 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26703 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26704 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26705 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26706 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26707 if (TLI.isShuffleMaskLegal(Mask, VT))
26708 return true;
26709
26710 std::swap(SV0, SV1);
26712 return TLI.isShuffleMaskLegal(Mask, VT);
26713 };
26714
26715 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26716 // Canonicalize shuffles according to rules:
26717 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26718 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26719 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26720 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26722 // The incoming shuffle must be of the same type as the result of the
26723 // current shuffle.
26724 assert(N1->getOperand(0).getValueType() == VT &&
26725 "Shuffle types don't match");
26726
26727 SDValue SV0 = N1->getOperand(0);
26728 SDValue SV1 = N1->getOperand(1);
26729 bool HasSameOp0 = N0 == SV0;
26730 bool IsSV1Undef = SV1.isUndef();
26731 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26732 // Commute the operands of this shuffle so merging below will trigger.
26733 return DAG.getCommutedVectorShuffle(*SVN);
26734 }
26735
26736 // Canonicalize splat shuffles to the RHS to improve merging below.
26737 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26738 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26739 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26740 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26741 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26742 return DAG.getCommutedVectorShuffle(*SVN);
26743 }
26744
26745 // Try to fold according to rules:
26746 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26747 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26748 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26749 // Don't try to fold shuffles with illegal type.
26750 // Only fold if this shuffle is the only user of the other shuffle.
26751 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26752 for (int i = 0; i != 2; ++i) {
26753 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26754 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26755 // The incoming shuffle must be of the same type as the result of the
26756 // current shuffle.
26757 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26758 assert(OtherSV->getOperand(0).getValueType() == VT &&
26759 "Shuffle types don't match");
26760
26761 SDValue SV0, SV1;
26763 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26764 SV0, SV1, Mask)) {
26765 // Check if all indices in Mask are Undef. In case, propagate Undef.
26766 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26767 return DAG.getUNDEF(VT);
26768
26769 return DAG.getVectorShuffle(VT, SDLoc(N),
26770 SV0 ? SV0 : DAG.getUNDEF(VT),
26771 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26772 }
26773 }
26774 }
26775
26776 // Merge shuffles through binops if we are able to merge it with at least
26777 // one other shuffles.
26778 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26779 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26780 unsigned SrcOpcode = N0.getOpcode();
26781 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26782 (N1.isUndef() ||
26783 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26784 // Get binop source ops, or just pass on the undef.
26785 SDValue Op00 = N0.getOperand(0);
26786 SDValue Op01 = N0.getOperand(1);
26787 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26788 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26789 // TODO: We might be able to relax the VT check but we don't currently
26790 // have any isBinOp() that has different result/ops VTs so play safe until
26791 // we have test coverage.
26792 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26793 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26794 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26795 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26796 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26797 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26798 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26799 SmallVectorImpl<int> &Mask, bool LeftOp,
26800 bool Commute) {
26801 SDValue InnerN = Commute ? N1 : N0;
26802 SDValue Op0 = LeftOp ? Op00 : Op01;
26803 SDValue Op1 = LeftOp ? Op10 : Op11;
26804 if (Commute)
26805 std::swap(Op0, Op1);
26806 // Only accept the merged shuffle if we don't introduce undef elements,
26807 // or the inner shuffle already contained undef elements.
26808 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26809 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26810 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26811 Mask) &&
26812 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26813 llvm::none_of(Mask, [](int M) { return M < 0; }));
26814 };
26815
26816 // Ensure we don't increase the number of shuffles - we must merge a
26817 // shuffle from at least one of the LHS and RHS ops.
26818 bool MergedLeft = false;
26819 SDValue LeftSV0, LeftSV1;
26820 SmallVector<int, 4> LeftMask;
26821 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26822 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26823 MergedLeft = true;
26824 } else {
26825 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26826 LeftSV0 = Op00, LeftSV1 = Op10;
26827 }
26828
26829 bool MergedRight = false;
26830 SDValue RightSV0, RightSV1;
26831 SmallVector<int, 4> RightMask;
26832 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26833 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26834 MergedRight = true;
26835 } else {
26836 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26837 RightSV0 = Op01, RightSV1 = Op11;
26838 }
26839
26840 if (MergedLeft || MergedRight) {
26841 SDLoc DL(N);
26843 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26844 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26846 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26847 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26848 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26849 }
26850 }
26851 }
26852 }
26853
26854 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26855 return V;
26856
26857 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26858 // Perform this really late, because it could eliminate knowledge
26859 // of undef elements created by this shuffle.
26860 if (Level < AfterLegalizeTypes)
26861 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26862 LegalOperations))
26863 return V;
26864
26865 return SDValue();
26866}
26867
26868SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26869 EVT VT = N->getValueType(0);
26870 if (!VT.isFixedLengthVector())
26871 return SDValue();
26872
26873 // Try to convert a scalar binop with an extracted vector element to a vector
26874 // binop. This is intended to reduce potentially expensive register moves.
26875 // TODO: Check if both operands are extracted.
26876 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26877 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26878 SDValue Scalar = N->getOperand(0);
26879 unsigned Opcode = Scalar.getOpcode();
26880 EVT VecEltVT = VT.getScalarType();
26881 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26882 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26883 Scalar.getOperand(0).getValueType() == VecEltVT &&
26884 Scalar.getOperand(1).getValueType() == VecEltVT &&
26885 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26886 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26887 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26888 // Match an extract element and get a shuffle mask equivalent.
26889 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26890
26891 for (int i : {0, 1}) {
26892 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26893 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26894 SDValue EE = Scalar.getOperand(i);
26895 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26896 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26897 EE.getOperand(0).getValueType() == VT &&
26898 isa<ConstantSDNode>(EE.getOperand(1))) {
26899 // Mask = {ExtractIndex, undef, undef....}
26900 ShufMask[0] = EE.getConstantOperandVal(1);
26901 // Make sure the shuffle is legal if we are crossing lanes.
26902 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26903 SDLoc DL(N);
26904 SDValue V[] = {EE.getOperand(0),
26905 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26906 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26907 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26908 ShufMask);
26909 }
26910 }
26911 }
26912 }
26913
26914 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26915 // with a VECTOR_SHUFFLE and possible truncate.
26916 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26917 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26918 return SDValue();
26919
26920 // If we have an implicit truncate, truncate here if it is legal.
26921 if (VecEltVT != Scalar.getValueType() &&
26922 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26923 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26924 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26925 }
26926
26927 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26928 if (!ExtIndexC)
26929 return SDValue();
26930
26931 SDValue SrcVec = Scalar.getOperand(0);
26932 EVT SrcVT = SrcVec.getValueType();
26933 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26934 unsigned VTNumElts = VT.getVectorNumElements();
26935 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26936 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26937 SmallVector<int, 8> Mask(SrcNumElts, -1);
26938 Mask[0] = ExtIndexC->getZExtValue();
26939 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26940 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26941 if (!LegalShuffle)
26942 return SDValue();
26943
26944 // If the initial vector is the same size, the shuffle is the result.
26945 if (VT == SrcVT)
26946 return LegalShuffle;
26947
26948 // If not, shorten the shuffled vector.
26949 if (VTNumElts != SrcNumElts) {
26950 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26951 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26952 SrcVT.getVectorElementType(), VTNumElts);
26953 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26954 ZeroIdx);
26955 }
26956 }
26957
26958 return SDValue();
26959}
26960
26961SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26962 EVT VT = N->getValueType(0);
26963 SDValue N0 = N->getOperand(0);
26964 SDValue N1 = N->getOperand(1);
26965 SDValue N2 = N->getOperand(2);
26966 uint64_t InsIdx = N->getConstantOperandVal(2);
26967
26968 // If inserting an UNDEF, just return the original vector.
26969 if (N1.isUndef())
26970 return N0;
26971
26972 // If this is an insert of an extracted vector into an undef vector, we can
26973 // just use the input to the extract if the types match, and can simplify
26974 // in some cases even if they don't.
26975 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26976 N1.getOperand(1) == N2) {
26977 EVT SrcVT = N1.getOperand(0).getValueType();
26978 if (SrcVT == VT)
26979 return N1.getOperand(0);
26980 // TODO: To remove the zero check, need to adjust the offset to
26981 // a multiple of the new src type.
26982 if (isNullConstant(N2)) {
26983 if (VT.knownBitsGE(SrcVT) &&
26984 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
26985 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26986 VT, N0, N1.getOperand(0), N2);
26987 else if (VT.knownBitsLE(SrcVT) &&
26988 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
26990 VT, N1.getOperand(0), N2);
26991 }
26992 }
26993
26994 // Handle case where we've ended up inserting back into the source vector
26995 // we extracted the subvector from.
26996 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26997 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26998 N1.getOperand(1) == N2)
26999 return N0;
27000
27001 // Simplify scalar inserts into an undef vector:
27002 // insert_subvector undef, (splat X), N2 -> splat X
27003 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27004 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27005 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27006
27007 // If we are inserting a bitcast value into an undef, with the same
27008 // number of elements, just use the bitcast input of the extract.
27009 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27010 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27011 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27013 N1.getOperand(0).getOperand(1) == N2 &&
27015 VT.getVectorElementCount() &&
27017 VT.getSizeInBits()) {
27018 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27019 }
27020
27021 // If both N1 and N2 are bitcast values on which insert_subvector
27022 // would makes sense, pull the bitcast through.
27023 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27024 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27025 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27026 SDValue CN0 = N0.getOperand(0);
27027 SDValue CN1 = N1.getOperand(0);
27028 EVT CN0VT = CN0.getValueType();
27029 EVT CN1VT = CN1.getValueType();
27030 if (CN0VT.isVector() && CN1VT.isVector() &&
27031 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27033 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27034 CN0.getValueType(), CN0, CN1, N2);
27035 return DAG.getBitcast(VT, NewINSERT);
27036 }
27037 }
27038
27039 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27040 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27041 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27042 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27043 N0.getOperand(1).getValueType() == N1.getValueType() &&
27044 N0.getOperand(2) == N2)
27045 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27046 N1, N2);
27047
27048 // Eliminate an intermediate insert into an undef vector:
27049 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27050 // insert_subvector undef, X, 0
27051 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
27052 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
27053 isNullConstant(N2))
27054 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
27055 N1.getOperand(1), N2);
27056
27057 // Push subvector bitcasts to the output, adjusting the index as we go.
27058 // insert_subvector(bitcast(v), bitcast(s), c1)
27059 // -> bitcast(insert_subvector(v, s, c2))
27060 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
27061 N1.getOpcode() == ISD::BITCAST) {
27062 SDValue N0Src = peekThroughBitcasts(N0);
27063 SDValue N1Src = peekThroughBitcasts(N1);
27064 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
27065 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
27066 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
27067 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
27068 EVT NewVT;
27069 SDLoc DL(N);
27070 SDValue NewIdx;
27071 LLVMContext &Ctx = *DAG.getContext();
27072 ElementCount NumElts = VT.getVectorElementCount();
27073 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27074 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
27075 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
27076 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
27077 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
27078 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
27079 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
27080 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
27081 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
27082 NumElts.divideCoefficientBy(Scale));
27083 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
27084 }
27085 }
27086 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
27087 SDValue Res = DAG.getBitcast(NewVT, N0Src);
27088 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
27089 return DAG.getBitcast(VT, Res);
27090 }
27091 }
27092 }
27093
27094 // Canonicalize insert_subvector dag nodes.
27095 // Example:
27096 // (insert_subvector (insert_subvector A, Idx0), Idx1)
27097 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27098 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
27099 N1.getValueType() == N0.getOperand(1).getValueType()) {
27100 unsigned OtherIdx = N0.getConstantOperandVal(2);
27101 if (InsIdx < OtherIdx) {
27102 // Swap nodes.
27103 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
27104 N0.getOperand(0), N1, N2);
27105 AddToWorklist(NewOp.getNode());
27106 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
27107 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
27108 }
27109 }
27110
27111 // If the input vector is a concatenation, and the insert replaces
27112 // one of the pieces, we can optimize into a single concat_vectors.
27113 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
27114 N0.getOperand(0).getValueType() == N1.getValueType() &&
27117 unsigned Factor = N1.getValueType().getVectorMinNumElements();
27118 SmallVector<SDValue, 8> Ops(N0->ops());
27119 Ops[InsIdx / Factor] = N1;
27120 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
27121 }
27122
27123 // Simplify source operands based on insertion.
27125 return SDValue(N, 0);
27126
27127 return SDValue();
27128}
27129
27130SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
27131 SDValue N0 = N->getOperand(0);
27132
27133 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27134 if (N0->getOpcode() == ISD::FP16_TO_FP)
27135 return N0->getOperand(0);
27136
27137 return SDValue();
27138}
27139
27140SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
27141 auto Op = N->getOpcode();
27143 "opcode should be FP16_TO_FP or BF16_TO_FP.");
27144 SDValue N0 = N->getOperand(0);
27145
27146 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27147 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27148 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27150 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27151 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27152 }
27153 }
27154
27155 // Sometimes constants manage to survive very late in the pipeline, e.g.,
27156 // because they are wrapped inside the <1 x f16> type. Try one last time to
27157 // get rid of them.
27158 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
27159 N->getValueType(0), {N0});
27160 return Folded;
27161}
27162
27163SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
27164 SDValue N0 = N->getOperand(0);
27165
27166 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
27167 if (N0->getOpcode() == ISD::BF16_TO_FP)
27168 return N0->getOperand(0);
27169
27170 return SDValue();
27171}
27172
27173SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
27174 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27175 return visitFP16_TO_FP(N);
27176}
27177
27178SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
27179 SDValue N0 = N->getOperand(0);
27180 EVT VT = N0.getValueType();
27181 unsigned Opcode = N->getOpcode();
27182
27183 // VECREDUCE over 1-element vector is just an extract.
27184 if (VT.getVectorElementCount().isScalar()) {
27185 SDLoc dl(N);
27186 SDValue Res =
27188 DAG.getVectorIdxConstant(0, dl));
27189 if (Res.getValueType() != N->getValueType(0))
27190 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
27191 return Res;
27192 }
27193
27194 // On an boolean vector an and/or reduction is the same as a umin/umax
27195 // reduction. Convert them if the latter is legal while the former isn't.
27196 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
27197 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
27199 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
27200 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
27202 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
27203 }
27204
27205 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
27206 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
27207 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27208 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
27209 SDValue Vec = N0.getOperand(0);
27210 SDValue Subvec = N0.getOperand(1);
27211 if ((Opcode == ISD::VECREDUCE_OR &&
27212 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
27213 (Opcode == ISD::VECREDUCE_AND &&
27214 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
27215 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
27216 }
27217
27218 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
27219 // Same for zext and anyext, and for and/or/xor reductions.
27220 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
27221 Opcode == ISD::VECREDUCE_XOR) &&
27222 (N0.getOpcode() == ISD::SIGN_EXTEND ||
27223 N0.getOpcode() == ISD::ZERO_EXTEND ||
27224 N0.getOpcode() == ISD::ANY_EXTEND) &&
27225 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
27226 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
27228 N0.getOperand(0));
27229 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
27230 }
27231 return SDValue();
27232}
27233
27234SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
27235 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
27236
27237 // FSUB -> FMA combines:
27238 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
27239 AddToWorklist(Fused.getNode());
27240 return Fused;
27241 }
27242 return SDValue();
27243}
27244
27245SDValue DAGCombiner::visitVPOp(SDNode *N) {
27246
27247 if (N->getOpcode() == ISD::VP_GATHER)
27248 if (SDValue SD = visitVPGATHER(N))
27249 return SD;
27250
27251 if (N->getOpcode() == ISD::VP_SCATTER)
27252 if (SDValue SD = visitVPSCATTER(N))
27253 return SD;
27254
27255 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
27256 if (SDValue SD = visitVP_STRIDED_LOAD(N))
27257 return SD;
27258
27259 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
27260 if (SDValue SD = visitVP_STRIDED_STORE(N))
27261 return SD;
27262
27263 // VP operations in which all vector elements are disabled - either by
27264 // determining that the mask is all false or that the EVL is 0 - can be
27265 // eliminated.
27266 bool AreAllEltsDisabled = false;
27267 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
27268 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
27269 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
27270 AreAllEltsDisabled |=
27271 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
27272
27273 // This is the only generic VP combine we support for now.
27274 if (!AreAllEltsDisabled) {
27275 switch (N->getOpcode()) {
27276 case ISD::VP_FADD:
27277 return visitVP_FADD(N);
27278 case ISD::VP_FSUB:
27279 return visitVP_FSUB(N);
27280 case ISD::VP_FMA:
27281 return visitFMA<VPMatchContext>(N);
27282 case ISD::VP_SELECT:
27283 return visitVP_SELECT(N);
27284 case ISD::VP_MUL:
27285 return visitMUL<VPMatchContext>(N);
27286 case ISD::VP_SUB:
27287 return foldSubCtlzNot<VPMatchContext>(N, DAG);
27288 default:
27289 break;
27290 }
27291 return SDValue();
27292 }
27293
27294 // Binary operations can be replaced by UNDEF.
27295 if (ISD::isVPBinaryOp(N->getOpcode()))
27296 return DAG.getUNDEF(N->getValueType(0));
27297
27298 // VP Memory operations can be replaced by either the chain (stores) or the
27299 // chain + undef (loads).
27300 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
27301 if (MemSD->writeMem())
27302 return MemSD->getChain();
27303 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
27304 }
27305
27306 // Reduction operations return the start operand when no elements are active.
27307 if (ISD::isVPReduction(N->getOpcode()))
27308 return N->getOperand(0);
27309
27310 return SDValue();
27311}
27312
27313SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
27314 SDValue Chain = N->getOperand(0);
27315 SDValue Ptr = N->getOperand(1);
27316 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27317
27318 // Check if the memory, where FP state is written to, is used only in a single
27319 // load operation.
27320 LoadSDNode *LdNode = nullptr;
27321 for (auto *U : Ptr->users()) {
27322 if (U == N)
27323 continue;
27324 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
27325 if (LdNode && LdNode != Ld)
27326 return SDValue();
27327 LdNode = Ld;
27328 continue;
27329 }
27330 return SDValue();
27331 }
27332 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27333 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27335 return SDValue();
27336
27337 // Check if the loaded value is used only in a store operation.
27338 StoreSDNode *StNode = nullptr;
27339 for (SDUse &U : LdNode->uses()) {
27340 if (U.getResNo() == 0) {
27341 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
27342 if (StNode)
27343 return SDValue();
27344 StNode = St;
27345 } else {
27346 return SDValue();
27347 }
27348 }
27349 }
27350 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27351 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27352 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27353 return SDValue();
27354
27355 // Create new node GET_FPENV_MEM, which uses the store address to write FP
27356 // environment.
27357 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
27358 StNode->getMemOperand());
27359 CombineTo(StNode, Res, false);
27360 return Res;
27361}
27362
27363SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
27364 SDValue Chain = N->getOperand(0);
27365 SDValue Ptr = N->getOperand(1);
27366 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27367
27368 // Check if the address of FP state is used also in a store operation only.
27369 StoreSDNode *StNode = nullptr;
27370 for (auto *U : Ptr->users()) {
27371 if (U == N)
27372 continue;
27373 if (auto *St = dyn_cast<StoreSDNode>(U)) {
27374 if (StNode && StNode != St)
27375 return SDValue();
27376 StNode = St;
27377 continue;
27378 }
27379 return SDValue();
27380 }
27381 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27382 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27383 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
27384 return SDValue();
27385
27386 // Check if the stored value is loaded from some location and the loaded
27387 // value is used only in the store operation.
27388 SDValue StValue = StNode->getValue();
27389 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
27390 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27391 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27392 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27393 return SDValue();
27394
27395 // Create new node SET_FPENV_MEM, which uses the load address to read FP
27396 // environment.
27397 SDValue Res =
27398 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
27399 LdNode->getMemOperand());
27400 return Res;
27401}
27402
27403/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
27404/// with the destination vector and a zero vector.
27405/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
27406/// vector_shuffle V, Zero, <0, 4, 2, 4>
27407SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
27408 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
27409
27410 EVT VT = N->getValueType(0);
27411 SDValue LHS = N->getOperand(0);
27412 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
27413 SDLoc DL(N);
27414
27415 // Make sure we're not running after operation legalization where it
27416 // may have custom lowered the vector shuffles.
27417 if (LegalOperations)
27418 return SDValue();
27419
27420 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
27421 return SDValue();
27422
27423 EVT RVT = RHS.getValueType();
27424 unsigned NumElts = RHS.getNumOperands();
27425
27426 // Attempt to create a valid clear mask, splitting the mask into
27427 // sub elements and checking to see if each is
27428 // all zeros or all ones - suitable for shuffle masking.
27429 auto BuildClearMask = [&](int Split) {
27430 int NumSubElts = NumElts * Split;
27431 int NumSubBits = RVT.getScalarSizeInBits() / Split;
27432
27433 SmallVector<int, 8> Indices;
27434 for (int i = 0; i != NumSubElts; ++i) {
27435 int EltIdx = i / Split;
27436 int SubIdx = i % Split;
27437 SDValue Elt = RHS.getOperand(EltIdx);
27438 // X & undef --> 0 (not undef). So this lane must be converted to choose
27439 // from the zero constant vector (same as if the element had all 0-bits).
27440 if (Elt.isUndef()) {
27441 Indices.push_back(i + NumSubElts);
27442 continue;
27443 }
27444
27445 APInt Bits;
27446 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
27447 Bits = Cst->getAPIntValue();
27448 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
27449 Bits = CstFP->getValueAPF().bitcastToAPInt();
27450 else
27451 return SDValue();
27452
27453 // Extract the sub element from the constant bit mask.
27454 if (DAG.getDataLayout().isBigEndian())
27455 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
27456 else
27457 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
27458
27459 if (Bits.isAllOnes())
27460 Indices.push_back(i);
27461 else if (Bits == 0)
27462 Indices.push_back(i + NumSubElts);
27463 else
27464 return SDValue();
27465 }
27466
27467 // Let's see if the target supports this vector_shuffle.
27468 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
27469 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
27470 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
27471 return SDValue();
27472
27473 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
27474 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
27475 DAG.getBitcast(ClearVT, LHS),
27476 Zero, Indices));
27477 };
27478
27479 // Determine maximum split level (byte level masking).
27480 int MaxSplit = 1;
27481 if (RVT.getScalarSizeInBits() % 8 == 0)
27482 MaxSplit = RVT.getScalarSizeInBits() / 8;
27483
27484 for (int Split = 1; Split <= MaxSplit; ++Split)
27485 if (RVT.getScalarSizeInBits() % Split == 0)
27486 if (SDValue S = BuildClearMask(Split))
27487 return S;
27488
27489 return SDValue();
27490}
27491
27492/// If a vector binop is performed on splat values, it may be profitable to
27493/// extract, scalarize, and insert/splat.
27495 const SDLoc &DL, bool LegalTypes) {
27496 SDValue N0 = N->getOperand(0);
27497 SDValue N1 = N->getOperand(1);
27498 unsigned Opcode = N->getOpcode();
27499 EVT VT = N->getValueType(0);
27500 EVT EltVT = VT.getVectorElementType();
27501 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27502
27503 // TODO: Remove/replace the extract cost check? If the elements are available
27504 // as scalars, then there may be no extract cost. Should we ask if
27505 // inserting a scalar back into a vector is cheap instead?
27506 int Index0, Index1;
27507 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27508 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
27509 // Extract element from splat_vector should be free.
27510 // TODO: use DAG.isSplatValue instead?
27511 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
27513 if (!Src0 || !Src1 || Index0 != Index1 ||
27514 Src0.getValueType().getVectorElementType() != EltVT ||
27515 Src1.getValueType().getVectorElementType() != EltVT ||
27516 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27517 // If before type legalization, allow scalar types that will eventually be
27518 // made legal.
27520 Opcode, LegalTypes
27521 ? EltVT
27522 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
27523 return SDValue();
27524
27525 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
27526 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
27527 return SDValue();
27528
27529 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27530 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
27531 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
27532 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27533
27534 // If all lanes but 1 are undefined, no need to splat the scalar result.
27535 // TODO: Keep track of undefs and use that info in the general case.
27536 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27537 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
27538 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
27539 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27540 // build_vec ..undef, (bo X, Y), undef...
27542 Ops[Index0] = ScalarBO;
27543 return DAG.getBuildVector(VT, DL, Ops);
27544 }
27545
27546 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27547 return DAG.getSplat(VT, DL, ScalarBO);
27548}
27549
27550/// Visit a vector cast operation, like FP_EXTEND.
27551SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27552 EVT VT = N->getValueType(0);
27553 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27554 EVT EltVT = VT.getVectorElementType();
27555 unsigned Opcode = N->getOpcode();
27556
27557 SDValue N0 = N->getOperand(0);
27558 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27559
27560 // TODO: promote operation might be also good here?
27561 int Index0;
27562 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27563 if (Src0 &&
27564 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27565 TLI.isExtractVecEltCheap(VT, Index0)) &&
27566 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27567 TLI.preferScalarizeSplat(N)) {
27568 EVT SrcVT = N0.getValueType();
27569 EVT SrcEltVT = SrcVT.getVectorElementType();
27570 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27571 SDValue Elt =
27572 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27573 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27574 if (VT.isScalableVector())
27575 return DAG.getSplatVector(VT, DL, ScalarBO);
27577 return DAG.getBuildVector(VT, DL, Ops);
27578 }
27579
27580 return SDValue();
27581}
27582
27583/// Visit a binary vector operation, like ADD.
27584SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27585 EVT VT = N->getValueType(0);
27586 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27587
27588 SDValue LHS = N->getOperand(0);
27589 SDValue RHS = N->getOperand(1);
27590 unsigned Opcode = N->getOpcode();
27591 SDNodeFlags Flags = N->getFlags();
27592
27593 // Move unary shuffles with identical masks after a vector binop:
27594 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27595 // --> shuffle (VBinOp A, B), Undef, Mask
27596 // This does not require type legality checks because we are creating the
27597 // same types of operations that are in the original sequence. We do have to
27598 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27599 // though. This code is adapted from the identical transform in instcombine.
27600 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27601 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27602 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27603 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27604 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27605 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27606 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27607 RHS.getOperand(0), Flags);
27608 SDValue UndefV = LHS.getOperand(1);
27609 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27610 }
27611
27612 // Try to sink a splat shuffle after a binop with a uniform constant.
27613 // This is limited to cases where neither the shuffle nor the constant have
27614 // undefined elements because that could be poison-unsafe or inhibit
27615 // demanded elements analysis. It is further limited to not change a splat
27616 // of an inserted scalar because that may be optimized better by
27617 // load-folding or other target-specific behaviors.
27618 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27619 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27620 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27621 // binop (splat X), (splat C) --> splat (binop X, C)
27622 SDValue X = Shuf0->getOperand(0);
27623 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27624 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27625 Shuf0->getMask());
27626 }
27627 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27628 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27629 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27630 // binop (splat C), (splat X) --> splat (binop C, X)
27631 SDValue X = Shuf1->getOperand(0);
27632 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27633 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27634 Shuf1->getMask());
27635 }
27636 }
27637
27638 // The following pattern is likely to emerge with vector reduction ops. Moving
27639 // the binary operation ahead of insertion may allow using a narrower vector
27640 // instruction that has better performance than the wide version of the op:
27641 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27642 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27643 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27644 LHS.getOperand(2) == RHS.getOperand(2) &&
27645 (LHS.hasOneUse() || RHS.hasOneUse())) {
27646 SDValue X = LHS.getOperand(1);
27647 SDValue Y = RHS.getOperand(1);
27648 SDValue Z = LHS.getOperand(2);
27649 EVT NarrowVT = X.getValueType();
27650 if (NarrowVT == Y.getValueType() &&
27651 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27652 LegalOperations)) {
27653 // (binop undef, undef) may not return undef, so compute that result.
27654 SDValue VecC =
27655 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27656 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27657 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27658 }
27659 }
27660
27661 // Make sure all but the first op are undef or constant.
27662 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27663 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27664 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27665 return Op.isUndef() ||
27666 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27667 });
27668 };
27669
27670 // The following pattern is likely to emerge with vector reduction ops. Moving
27671 // the binary operation ahead of the concat may allow using a narrower vector
27672 // instruction that has better performance than the wide version of the op:
27673 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27674 // concat (VBinOp X, Y), VecC
27675 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27676 (LHS.hasOneUse() || RHS.hasOneUse())) {
27677 EVT NarrowVT = LHS.getOperand(0).getValueType();
27678 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27679 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27680 unsigned NumOperands = LHS.getNumOperands();
27681 SmallVector<SDValue, 4> ConcatOps;
27682 for (unsigned i = 0; i != NumOperands; ++i) {
27683 // This constant fold for operands 1 and up.
27684 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27685 RHS.getOperand(i)));
27686 }
27687
27688 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27689 }
27690 }
27691
27692 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
27693 return V;
27694
27695 return SDValue();
27696}
27697
27698SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27699 SDValue N2) {
27700 assert(N0.getOpcode() == ISD::SETCC &&
27701 "First argument must be a SetCC node!");
27702
27703 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27704 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27705
27706 // If we got a simplified select_cc node back from SimplifySelectCC, then
27707 // break it down into a new SETCC node, and a new SELECT node, and then return
27708 // the SELECT node, since we were called with a SELECT node.
27709 if (SCC.getNode()) {
27710 // Check to see if we got a select_cc back (to turn into setcc/select).
27711 // Otherwise, just return whatever node we got back, like fabs.
27712 if (SCC.getOpcode() == ISD::SELECT_CC) {
27713 const SDNodeFlags Flags = N0->getFlags();
27715 N0.getValueType(),
27716 SCC.getOperand(0), SCC.getOperand(1),
27717 SCC.getOperand(4), Flags);
27718 AddToWorklist(SETCC.getNode());
27719 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27720 SCC.getOperand(2), SCC.getOperand(3));
27721 SelectNode->setFlags(Flags);
27722 return SelectNode;
27723 }
27724
27725 return SCC;
27726 }
27727 return SDValue();
27728}
27729
27730/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27731/// being selected between, see if we can simplify the select. Callers of this
27732/// should assume that TheSelect is deleted if this returns true. As such, they
27733/// should return the appropriate thing (e.g. the node) back to the top-level of
27734/// the DAG combiner loop to avoid it being looked at.
27735bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27736 SDValue RHS) {
27737 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27738 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27739 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27740 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27741 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27742 SDValue Sqrt = RHS;
27744 SDValue CmpLHS;
27745 const ConstantFPSDNode *Zero = nullptr;
27746
27747 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27748 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27749 CmpLHS = TheSelect->getOperand(0);
27750 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27751 } else {
27752 // SELECT or VSELECT
27753 SDValue Cmp = TheSelect->getOperand(0);
27754 if (Cmp.getOpcode() == ISD::SETCC) {
27755 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27756 CmpLHS = Cmp.getOperand(0);
27757 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27758 }
27759 }
27760 if (Zero && Zero->isZero() &&
27761 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27762 CC == ISD::SETULT || CC == ISD::SETLT)) {
27763 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27764 CombineTo(TheSelect, Sqrt);
27765 return true;
27766 }
27767 }
27768 }
27769 // Cannot simplify select with vector condition
27770 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27771
27772 // If this is a select from two identical things, try to pull the operation
27773 // through the select.
27774 if (LHS.getOpcode() != RHS.getOpcode() ||
27775 !LHS.hasOneUse() || !RHS.hasOneUse())
27776 return false;
27777
27778 // If this is a load and the token chain is identical, replace the select
27779 // of two loads with a load through a select of the address to load from.
27780 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27781 // constants have been dropped into the constant pool.
27782 if (LHS.getOpcode() == ISD::LOAD) {
27783 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27784 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27785
27786 // Token chains must be identical.
27787 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27788 // Do not let this transformation reduce the number of volatile loads.
27789 // Be conservative for atomics for the moment
27790 // TODO: This does appear to be legal for unordered atomics (see D66309)
27791 !LLD->isSimple() || !RLD->isSimple() ||
27792 // FIXME: If either is a pre/post inc/dec load,
27793 // we'd need to split out the address adjustment.
27794 LLD->isIndexed() || RLD->isIndexed() ||
27795 // If this is an EXTLOAD, the VT's must match.
27796 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27797 // If this is an EXTLOAD, the kind of extension must match.
27798 (LLD->getExtensionType() != RLD->getExtensionType() &&
27799 // The only exception is if one of the extensions is anyext.
27800 LLD->getExtensionType() != ISD::EXTLOAD &&
27801 RLD->getExtensionType() != ISD::EXTLOAD) ||
27802 // FIXME: this discards src value information. This is
27803 // over-conservative. It would be beneficial to be able to remember
27804 // both potential memory locations. Since we are discarding
27805 // src value info, don't do the transformation if the memory
27806 // locations are not in the default address space.
27807 LLD->getPointerInfo().getAddrSpace() != 0 ||
27808 RLD->getPointerInfo().getAddrSpace() != 0 ||
27809 // We can't produce a CMOV of a TargetFrameIndex since we won't
27810 // generate the address generation required.
27813 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27814 LLD->getBasePtr().getValueType()))
27815 return false;
27816
27817 // The loads must not depend on one another.
27818 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27819 return false;
27820
27821 // Check that the select condition doesn't reach either load. If so,
27822 // folding this will induce a cycle into the DAG. If not, this is safe to
27823 // xform, so create a select of the addresses.
27824
27827
27828 // Always fail if LLD and RLD are not independent. TheSelect is a
27829 // predecessor to all Nodes in question so we need not search past it.
27830
27831 Visited.insert(TheSelect);
27832 Worklist.push_back(LLD);
27833 Worklist.push_back(RLD);
27834
27835 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27836 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27837 return false;
27838
27839 SDValue Addr;
27840 if (TheSelect->getOpcode() == ISD::SELECT) {
27841 // We cannot do this optimization if any pair of {RLD, LLD} is a
27842 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27843 // Loads, we only need to check if CondNode is a successor to one of the
27844 // loads. We can further avoid this if there's no use of their chain
27845 // value.
27846 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27847 Worklist.push_back(CondNode);
27848
27849 if ((LLD->hasAnyUseOfValue(1) &&
27850 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27851 (RLD->hasAnyUseOfValue(1) &&
27852 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27853 return false;
27854
27855 Addr = DAG.getSelect(SDLoc(TheSelect),
27856 LLD->getBasePtr().getValueType(),
27857 TheSelect->getOperand(0), LLD->getBasePtr(),
27858 RLD->getBasePtr());
27859 } else { // Otherwise SELECT_CC
27860 // We cannot do this optimization if any pair of {RLD, LLD} is a
27861 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27862 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27863 // one of the loads. We can further avoid this if there's no use of their
27864 // chain value.
27865
27866 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27867 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27868 Worklist.push_back(CondLHS);
27869 Worklist.push_back(CondRHS);
27870
27871 if ((LLD->hasAnyUseOfValue(1) &&
27872 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27873 (RLD->hasAnyUseOfValue(1) &&
27874 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27875 return false;
27876
27877 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27878 LLD->getBasePtr().getValueType(),
27879 TheSelect->getOperand(0),
27880 TheSelect->getOperand(1),
27881 LLD->getBasePtr(), RLD->getBasePtr(),
27882 TheSelect->getOperand(4));
27883 }
27884
27885 SDValue Load;
27886 // It is safe to replace the two loads if they have different alignments,
27887 // but the new load must be the minimum (most restrictive) alignment of the
27888 // inputs.
27889 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27890 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27891 if (!RLD->isInvariant())
27892 MMOFlags &= ~MachineMemOperand::MOInvariant;
27893 if (!RLD->isDereferenceable())
27894 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27895 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27896 // FIXME: Discards pointer and AA info.
27897 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27898 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27899 MMOFlags);
27900 } else {
27901 // FIXME: Discards pointer and AA info.
27902 Load = DAG.getExtLoad(
27904 : LLD->getExtensionType(),
27905 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27906 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27907 }
27908
27909 // Users of the select now use the result of the load.
27910 CombineTo(TheSelect, Load);
27911
27912 // Users of the old loads now use the new load's chain. We know the
27913 // old-load value is dead now.
27914 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27915 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27916 return true;
27917 }
27918
27919 return false;
27920}
27921
27922/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27923/// bitwise 'and'.
27924SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27925 SDValue N1, SDValue N2, SDValue N3,
27926 ISD::CondCode CC) {
27927 // If this is a select where the false operand is zero and the compare is a
27928 // check of the sign bit, see if we can perform the "gzip trick":
27929 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27930 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27931 EVT XType = N0.getValueType();
27932 EVT AType = N2.getValueType();
27933 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27934 return SDValue();
27935
27936 // If the comparison is testing for a positive value, we have to invert
27937 // the sign bit mask, so only do that transform if the target has a bitwise
27938 // 'and not' instruction (the invert is free).
27939 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27940 // (X > -1) ? A : 0
27941 // (X > 0) ? X : 0 <-- This is canonical signed max.
27942 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27943 return SDValue();
27944 } else if (CC == ISD::SETLT) {
27945 // (X < 0) ? A : 0
27946 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27947 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27948 return SDValue();
27949 } else {
27950 return SDValue();
27951 }
27952
27953 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27954 // constant.
27955 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27956 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27957 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27958 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27959 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
27960 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27961 AddToWorklist(Shift.getNode());
27962
27963 if (XType.bitsGT(AType)) {
27964 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27965 AddToWorklist(Shift.getNode());
27966 }
27967
27968 if (CC == ISD::SETGT)
27969 Shift = DAG.getNOT(DL, Shift, AType);
27970
27971 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27972 }
27973 }
27974
27975 unsigned ShCt = XType.getSizeInBits() - 1;
27976 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27977 return SDValue();
27978
27979 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
27980 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27981 AddToWorklist(Shift.getNode());
27982
27983 if (XType.bitsGT(AType)) {
27984 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27985 AddToWorklist(Shift.getNode());
27986 }
27987
27988 if (CC == ISD::SETGT)
27989 Shift = DAG.getNOT(DL, Shift, AType);
27990
27991 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27992}
27993
27994// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27995SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27996 SDValue N0 = N->getOperand(0);
27997 SDValue N1 = N->getOperand(1);
27998 SDValue N2 = N->getOperand(2);
27999 SDLoc DL(N);
28000
28001 unsigned BinOpc = N1.getOpcode();
28002 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28003 (N1.getResNo() != N2.getResNo()))
28004 return SDValue();
28005
28006 // The use checks are intentionally on SDNode because we may be dealing
28007 // with opcodes that produce more than one SDValue.
28008 // TODO: Do we really need to check N0 (the condition operand of the select)?
28009 // But removing that clause could cause an infinite loop...
28010 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28011 return SDValue();
28012
28013 // Binops may include opcodes that return multiple values, so all values
28014 // must be created/propagated from the newly created binops below.
28015 SDVTList OpVTs = N1->getVTList();
28016
28017 // Fold select(cond, binop(x, y), binop(z, y))
28018 // --> binop(select(cond, x, z), y)
28019 if (N1.getOperand(1) == N2.getOperand(1)) {
28020 SDValue N10 = N1.getOperand(0);
28021 SDValue N20 = N2.getOperand(0);
28022 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28023 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
28024 NewBinOp->setFlags(N1->getFlags());
28025 NewBinOp->intersectFlagsWith(N2->getFlags());
28026 return SDValue(NewBinOp.getNode(), N1.getResNo());
28027 }
28028
28029 // Fold select(cond, binop(x, y), binop(x, z))
28030 // --> binop(x, select(cond, y, z))
28031 if (N1.getOperand(0) == N2.getOperand(0)) {
28032 SDValue N11 = N1.getOperand(1);
28033 SDValue N21 = N2.getOperand(1);
28034 // Second op VT might be different (e.g. shift amount type)
28035 if (N11.getValueType() == N21.getValueType()) {
28036 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28037 SDValue NewBinOp =
28038 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
28039 NewBinOp->setFlags(N1->getFlags());
28040 NewBinOp->intersectFlagsWith(N2->getFlags());
28041 return SDValue(NewBinOp.getNode(), N1.getResNo());
28042 }
28043 }
28044
28045 // TODO: Handle isCommutativeBinOp patterns as well?
28046 return SDValue();
28047}
28048
28049// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
28050SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
28051 SDValue N0 = N->getOperand(0);
28052 EVT VT = N->getValueType(0);
28053 bool IsFabs = N->getOpcode() == ISD::FABS;
28054 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
28055
28056 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
28057 return SDValue();
28058
28059 SDValue Int = N0.getOperand(0);
28060 EVT IntVT = Int.getValueType();
28061
28062 // The operand to cast should be integer.
28063 if (!IntVT.isInteger() || IntVT.isVector())
28064 return SDValue();
28065
28066 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28067 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28068 APInt SignMask;
28069 if (N0.getValueType().isVector()) {
28070 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
28071 // 0x7f...) per element and splat it.
28073 if (IsFabs)
28074 SignMask = ~SignMask;
28075 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
28076 } else {
28077 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
28078 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
28079 if (IsFabs)
28080 SignMask = ~SignMask;
28081 }
28082 SDLoc DL(N0);
28083 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
28084 DAG.getConstant(SignMask, DL, IntVT));
28085 AddToWorklist(Int.getNode());
28086 return DAG.getBitcast(VT, Int);
28087}
28088
28089/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
28090/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
28091/// in it. This may be a win when the constant is not otherwise available
28092/// because it replaces two constant pool loads with one.
28093SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
28094 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
28095 ISD::CondCode CC) {
28097 return SDValue();
28098
28099 // If we are before legalize types, we want the other legalization to happen
28100 // first (for example, to avoid messing with soft float).
28101 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
28102 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
28103 EVT VT = N2.getValueType();
28104 if (!TV || !FV || !TLI.isTypeLegal(VT))
28105 return SDValue();
28106
28107 // If a constant can be materialized without loads, this does not make sense.
28109 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28110 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28111 return SDValue();
28112
28113 // If both constants have multiple uses, then we won't need to do an extra
28114 // load. The values are likely around in registers for other users.
28115 if (!TV->hasOneUse() && !FV->hasOneUse())
28116 return SDValue();
28117
28118 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28119 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28120 Type *FPTy = Elts[0]->getType();
28121 const DataLayout &TD = DAG.getDataLayout();
28122
28123 // Create a ConstantArray of the two constants.
28124 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
28125 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
28126 TD.getPrefTypeAlign(FPTy));
28127 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28128
28129 // Get offsets to the 0 and 1 elements of the array, so we can select between
28130 // them.
28131 SDValue Zero = DAG.getIntPtrConstant(0, DL);
28132 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28133 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
28134 SDValue Cond =
28135 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
28136 AddToWorklist(Cond.getNode());
28137 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
28138 AddToWorklist(CstOffset.getNode());
28139 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
28140 AddToWorklist(CPIdx.getNode());
28141 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28143 DAG.getMachineFunction()), Alignment);
28144}
28145
28146/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
28147/// where 'cond' is the comparison specified by CC.
28148SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
28150 bool NotExtCompare) {
28151 // (x ? y : y) -> y.
28152 if (N2 == N3) return N2;
28153
28154 EVT CmpOpVT = N0.getValueType();
28155 EVT CmpResVT = getSetCCResultType(CmpOpVT);
28156 EVT VT = N2.getValueType();
28157 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
28158 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28159 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
28160
28161 // Determine if the condition we're dealing with is constant.
28162 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
28163 AddToWorklist(SCC.getNode());
28164 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
28165 // fold select_cc true, x, y -> x
28166 // fold select_cc false, x, y -> y
28167 return !(SCCC->isZero()) ? N2 : N3;
28168 }
28169 }
28170
28171 if (SDValue V =
28172 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
28173 return V;
28174
28175 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
28176 return V;
28177
28178 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
28179 // where y is has a single bit set.
28180 // A plaintext description would be, we can turn the SELECT_CC into an AND
28181 // when the condition can be materialized as an all-ones register. Any
28182 // single bit-test can be materialized as an all-ones register with
28183 // shift-left and shift-right-arith.
28184 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
28185 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
28186 SDValue AndLHS = N0->getOperand(0);
28187 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
28188 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
28189 // Shift the tested bit over the sign bit.
28190 const APInt &AndMask = ConstAndRHS->getAPIntValue();
28191 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
28192 unsigned ShCt = AndMask.getBitWidth() - 1;
28193 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
28194 SDLoc(AndLHS));
28195 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
28196
28197 // Now arithmetic right shift it all the way over, so the result is
28198 // either all-ones, or zero.
28199 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
28200 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
28201
28202 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
28203 }
28204 }
28205 }
28206
28207 // fold select C, 16, 0 -> shl C, 4
28208 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
28209 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
28210
28211 if ((Fold || Swap) &&
28212 TLI.getBooleanContents(CmpOpVT) ==
28214 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
28215
28216 if (Swap) {
28217 CC = ISD::getSetCCInverse(CC, CmpOpVT);
28218 std::swap(N2C, N3C);
28219 }
28220
28221 // If the caller doesn't want us to simplify this into a zext of a compare,
28222 // don't do it.
28223 if (NotExtCompare && N2C->isOne())
28224 return SDValue();
28225
28226 SDValue Temp, SCC;
28227 // zext (setcc n0, n1)
28228 if (LegalTypes) {
28229 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
28230 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
28231 } else {
28232 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
28233 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
28234 }
28235
28236 AddToWorklist(SCC.getNode());
28237 AddToWorklist(Temp.getNode());
28238
28239 if (N2C->isOne())
28240 return Temp;
28241
28242 unsigned ShCt = N2C->getAPIntValue().logBase2();
28243 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
28244 return SDValue();
28245
28246 // shl setcc result by log2 n2c
28247 return DAG.getNode(
28248 ISD::SHL, DL, N2.getValueType(), Temp,
28249 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
28250 }
28251
28252 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
28253 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
28254 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
28255 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
28256 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
28257 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
28258 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
28259 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
28260 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
28261 SDValue ValueOnZero = N2;
28262 SDValue Count = N3;
28263 // If the condition is NE instead of E, swap the operands.
28264 if (CC == ISD::SETNE)
28265 std::swap(ValueOnZero, Count);
28266 // Check if the value on zero is a constant equal to the bits in the type.
28267 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
28268 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
28269 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
28270 // legal, combine to just cttz.
28271 if ((Count.getOpcode() == ISD::CTTZ ||
28272 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
28273 N0 == Count.getOperand(0) &&
28274 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
28275 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
28276 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
28277 // legal, combine to just ctlz.
28278 if ((Count.getOpcode() == ISD::CTLZ ||
28279 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
28280 N0 == Count.getOperand(0) &&
28281 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
28282 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
28283 }
28284 }
28285 }
28286
28287 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
28288 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
28289 if (!NotExtCompare && N1C && N2C && N3C &&
28290 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
28291 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
28292 (N1C->isZero() && CC == ISD::SETLT)) &&
28293 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
28294 SDValue ASR = DAG.getNode(
28295 ISD::SRA, DL, CmpOpVT, N0,
28296 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
28297 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
28298 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
28299 }
28300
28301 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28302 return S;
28303 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28304 return S;
28305 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
28306 return ABD;
28307
28308 return SDValue();
28309}
28310
28311/// This is a stub for TargetLowering::SimplifySetCC.
28312SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
28313 ISD::CondCode Cond, const SDLoc &DL,
28314 bool foldBooleans) {
28316 DagCombineInfo(DAG, Level, false, this);
28317 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
28318}
28319
28320/// Given an ISD::SDIV node expressing a divide by constant, return
28321/// a DAG expression to select that will generate the same value by multiplying
28322/// by a magic number.
28323/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28324SDValue DAGCombiner::BuildSDIV(SDNode *N) {
28325 // when optimising for minimum size, we don't want to expand a div to a mul
28326 // and a shift.
28328 return SDValue();
28329
28331 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28332 for (SDNode *N : Built)
28333 AddToWorklist(N);
28334 return S;
28335 }
28336
28337 return SDValue();
28338}
28339
28340/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
28341/// DAG expression that will generate the same value by right shifting.
28342SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
28343 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28344 if (!C)
28345 return SDValue();
28346
28347 // Avoid division by zero.
28348 if (C->isZero())
28349 return SDValue();
28350
28352 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
28353 for (SDNode *N : Built)
28354 AddToWorklist(N);
28355 return S;
28356 }
28357
28358 return SDValue();
28359}
28360
28361/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
28362/// expression that will generate the same value by multiplying by a magic
28363/// number.
28364/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28365SDValue DAGCombiner::BuildUDIV(SDNode *N) {
28366 // when optimising for minimum size, we don't want to expand a div to a mul
28367 // and a shift.
28369 return SDValue();
28370
28372 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28373 for (SDNode *N : Built)
28374 AddToWorklist(N);
28375 return S;
28376 }
28377
28378 return SDValue();
28379}
28380
28381/// Given an ISD::SREM node expressing a remainder by constant power of 2,
28382/// return a DAG expression that will generate the same value.
28383SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
28384 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28385 if (!C)
28386 return SDValue();
28387
28388 // Avoid division by zero.
28389 if (C->isZero())
28390 return SDValue();
28391
28393 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
28394 for (SDNode *N : Built)
28395 AddToWorklist(N);
28396 return S;
28397 }
28398
28399 return SDValue();
28400}
28401
28402// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
28403//
28404// Returns the node that represents `Log2(Op)`. This may create a new node. If
28405// we are unable to compute `Log2(Op)` its return `SDValue()`.
28406//
28407// All nodes will be created at `DL` and the output will be of type `VT`.
28408//
28409// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
28410// `AssumeNonZero` if this function should simply assume (not require proving
28411// `Op` is non-zero).
28413 SDValue Op, unsigned Depth,
28414 bool AssumeNonZero) {
28415 assert(VT.isInteger() && "Only integer types are supported!");
28416
28417 auto PeekThroughCastsAndTrunc = [](SDValue V) {
28418 while (true) {
28419 switch (V.getOpcode()) {
28420 case ISD::TRUNCATE:
28421 case ISD::ZERO_EXTEND:
28422 V = V.getOperand(0);
28423 break;
28424 default:
28425 return V;
28426 }
28427 }
28428 };
28429
28430 if (VT.isScalableVector())
28431 return SDValue();
28432
28433 Op = PeekThroughCastsAndTrunc(Op);
28434
28435 // Helper for determining whether a value is a power-2 constant scalar or a
28436 // vector of such elements.
28437 SmallVector<APInt> Pow2Constants;
28438 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
28439 if (C->isZero() || C->isOpaque())
28440 return false;
28441 // TODO: We may also be able to support negative powers of 2 here.
28442 if (C->getAPIntValue().isPowerOf2()) {
28443 Pow2Constants.emplace_back(C->getAPIntValue());
28444 return true;
28445 }
28446 return false;
28447 };
28448
28449 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
28450 if (!VT.isVector())
28451 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
28452 // We need to create a build vector
28453 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
28454 return DAG.getSplat(VT, DL,
28455 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
28456 VT.getScalarType()));
28457 SmallVector<SDValue> Log2Ops;
28458 for (const APInt &Pow2 : Pow2Constants)
28459 Log2Ops.emplace_back(
28460 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
28461 return DAG.getBuildVector(VT, DL, Log2Ops);
28462 }
28463
28464 if (Depth >= DAG.MaxRecursionDepth)
28465 return SDValue();
28466
28467 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
28468 ToCast = PeekThroughCastsAndTrunc(ToCast);
28469 EVT CurVT = ToCast.getValueType();
28470 if (NewVT == CurVT)
28471 return ToCast;
28472
28473 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
28474 return DAG.getBitcast(NewVT, ToCast);
28475
28476 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
28477 };
28478
28479 // log2(X << Y) -> log2(X) + Y
28480 if (Op.getOpcode() == ISD::SHL) {
28481 // 1 << Y and X nuw/nsw << Y are all non-zero.
28482 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
28483 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
28484 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
28485 Depth + 1, AssumeNonZero))
28486 return DAG.getNode(ISD::ADD, DL, VT, LogX,
28487 CastToVT(VT, Op.getOperand(1)));
28488 }
28489
28490 // c ? X : Y -> c ? Log2(X) : Log2(Y)
28491 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
28492 Op.hasOneUse()) {
28493 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
28494 Depth + 1, AssumeNonZero))
28495 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
28496 Depth + 1, AssumeNonZero))
28497 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
28498 }
28499
28500 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
28501 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
28502 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
28503 Op.hasOneUse()) {
28504 // Use AssumeNonZero as false here. Otherwise we can hit case where
28505 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
28506 if (SDValue LogX =
28507 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
28508 /*AssumeNonZero*/ false))
28509 if (SDValue LogY =
28510 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
28511 /*AssumeNonZero*/ false))
28512 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
28513 }
28514
28515 return SDValue();
28516}
28517
28518/// Determines the LogBase2 value for a non-null input value using the
28519/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
28520SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
28521 bool KnownNonZero, bool InexpensiveOnly,
28522 std::optional<EVT> OutVT) {
28523 EVT VT = OutVT ? *OutVT : V.getValueType();
28524 SDValue InexpensiveLogBase2 =
28525 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
28526 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
28527 return InexpensiveLogBase2;
28528
28529 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
28530 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28531 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
28532 return LogBase2;
28533}
28534
28535/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28536/// For the reciprocal, we need to find the zero of the function:
28537/// F(X) = 1/X - A [which has a zero at X = 1/A]
28538/// =>
28539/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28540/// does not require additional intermediate precision]
28541/// For the last iteration, put numerator N into it to gain more precision:
28542/// Result = N X_i + X_i (N - N A X_i)
28543SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28544 SDNodeFlags Flags) {
28545 if (LegalDAG)
28546 return SDValue();
28547
28548 // TODO: Handle extended types?
28549 EVT VT = Op.getValueType();
28550 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28551 VT.getScalarType() != MVT::f64)
28552 return SDValue();
28553
28554 // If estimates are explicitly disabled for this function, we're done.
28556 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28557 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28558 return SDValue();
28559
28560 // Estimates may be explicitly enabled for this type with a custom number of
28561 // refinement steps.
28562 int Iterations = TLI.getDivRefinementSteps(VT, MF);
28563 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28564 AddToWorklist(Est.getNode());
28565
28566 SDLoc DL(Op);
28567 if (Iterations) {
28568 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28569
28570 // Newton iterations: Est = Est + Est (N - Arg * Est)
28571 // If this is the last iteration, also multiply by the numerator.
28572 for (int i = 0; i < Iterations; ++i) {
28573 SDValue MulEst = Est;
28574
28575 if (i == Iterations - 1) {
28576 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28577 AddToWorklist(MulEst.getNode());
28578 }
28579
28580 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28581 AddToWorklist(NewEst.getNode());
28582
28583 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28584 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28585 AddToWorklist(NewEst.getNode());
28586
28587 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28588 AddToWorklist(NewEst.getNode());
28589
28590 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28591 AddToWorklist(Est.getNode());
28592 }
28593 } else {
28594 // If no iterations are available, multiply with N.
28595 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28596 AddToWorklist(Est.getNode());
28597 }
28598
28599 return Est;
28600 }
28601
28602 return SDValue();
28603}
28604
28605/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28606/// For the reciprocal sqrt, we need to find the zero of the function:
28607/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28608/// =>
28609/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28610/// As a result, we precompute A/2 prior to the iteration loop.
28611SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28612 unsigned Iterations,
28613 SDNodeFlags Flags, bool Reciprocal) {
28614 EVT VT = Arg.getValueType();
28615 SDLoc DL(Arg);
28616 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28617
28618 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28619 // this entire sequence requires only one FP constant.
28620 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28621 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28622
28623 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28624 for (unsigned i = 0; i < Iterations; ++i) {
28625 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28626 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28627 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28628 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28629 }
28630
28631 // If non-reciprocal square root is requested, multiply the result by Arg.
28632 if (!Reciprocal)
28633 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28634
28635 return Est;
28636}
28637
28638/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28639/// For the reciprocal sqrt, we need to find the zero of the function:
28640/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28641/// =>
28642/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28643SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28644 unsigned Iterations,
28645 SDNodeFlags Flags, bool Reciprocal) {
28646 EVT VT = Arg.getValueType();
28647 SDLoc DL(Arg);
28648 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28649 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28650
28651 // This routine must enter the loop below to work correctly
28652 // when (Reciprocal == false).
28653 assert(Iterations > 0);
28654
28655 // Newton iterations for reciprocal square root:
28656 // E = (E * -0.5) * ((A * E) * E + -3.0)
28657 for (unsigned i = 0; i < Iterations; ++i) {
28658 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28659 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28660 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28661
28662 // When calculating a square root at the last iteration build:
28663 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28664 // (notice a common subexpression)
28665 SDValue LHS;
28666 if (Reciprocal || (i + 1) < Iterations) {
28667 // RSQRT: LHS = (E * -0.5)
28668 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28669 } else {
28670 // SQRT: LHS = (A * E) * -0.5
28671 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28672 }
28673
28674 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28675 }
28676
28677 return Est;
28678}
28679
28680/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28681/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28682/// Op can be zero.
28683SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28684 bool Reciprocal) {
28685 if (LegalDAG)
28686 return SDValue();
28687
28688 // TODO: Handle extended types?
28689 EVT VT = Op.getValueType();
28690 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28691 VT.getScalarType() != MVT::f64)
28692 return SDValue();
28693
28694 // If estimates are explicitly disabled for this function, we're done.
28696 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28697 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28698 return SDValue();
28699
28700 // Estimates may be explicitly enabled for this type with a custom number of
28701 // refinement steps.
28702 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28703
28704 bool UseOneConstNR = false;
28705 if (SDValue Est =
28706 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28707 Reciprocal)) {
28708 AddToWorklist(Est.getNode());
28709
28710 if (Iterations > 0)
28711 Est = UseOneConstNR
28712 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28713 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28714 if (!Reciprocal) {
28715 SDLoc DL(Op);
28716 // Try the target specific test first.
28717 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28718
28719 // The estimate is now completely wrong if the input was exactly 0.0 or
28720 // possibly a denormal. Force the answer to 0.0 or value provided by
28721 // target for those cases.
28722 Est = DAG.getNode(
28723 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28724 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28725 }
28726 return Est;
28727 }
28728
28729 return SDValue();
28730}
28731
28732SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28733 return buildSqrtEstimateImpl(Op, Flags, true);
28734}
28735
28736SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28737 return buildSqrtEstimateImpl(Op, Flags, false);
28738}
28739
28740/// Return true if there is any possibility that the two addresses overlap.
28741bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28742
28743 struct MemUseCharacteristics {
28744 bool IsVolatile;
28745 bool IsAtomic;
28747 int64_t Offset;
28748 LocationSize NumBytes;
28749 MachineMemOperand *MMO;
28750 };
28751
28752 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28753 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28754 int64_t Offset = 0;
28755 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28756 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28757 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28758 ? -1 * C->getSExtValue()
28759 : 0;
28760 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28761 return {LSN->isVolatile(), LSN->isAtomic(),
28762 LSN->getBasePtr(), Offset /*base offset*/,
28763 LocationSize::precise(Size), LSN->getMemOperand()};
28764 }
28765 if (const auto *LN = cast<LifetimeSDNode>(N))
28766 return {false /*isVolatile*/,
28767 /*isAtomic*/ false,
28768 LN->getOperand(1),
28769 (LN->hasOffset()) ? LN->getOffset() : 0,
28770 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28772 (MachineMemOperand *)nullptr};
28773 // Default.
28774 return {false /*isvolatile*/,
28775 /*isAtomic*/ false,
28776 SDValue(),
28777 (int64_t)0 /*offset*/,
28779 (MachineMemOperand *)nullptr};
28780 };
28781
28782 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28783 MUC1 = getCharacteristics(Op1);
28784
28785 // If they are to the same address, then they must be aliases.
28786 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28787 MUC0.Offset == MUC1.Offset)
28788 return true;
28789
28790 // If they are both volatile then they cannot be reordered.
28791 if (MUC0.IsVolatile && MUC1.IsVolatile)
28792 return true;
28793
28794 // Be conservative about atomics for the moment
28795 // TODO: This is way overconservative for unordered atomics (see D66309)
28796 if (MUC0.IsAtomic && MUC1.IsAtomic)
28797 return true;
28798
28799 if (MUC0.MMO && MUC1.MMO) {
28800 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28801 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28802 return false;
28803 }
28804
28805 // If NumBytes is scalable and offset is not 0, conservatively return may
28806 // alias
28807 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28808 MUC0.Offset != 0) ||
28809 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28810 MUC1.Offset != 0))
28811 return true;
28812 // Try to prove that there is aliasing, or that there is no aliasing. Either
28813 // way, we can return now. If nothing can be proved, proceed with more tests.
28814 bool IsAlias;
28815 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28816 DAG, IsAlias))
28817 return IsAlias;
28818
28819 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28820 // either are not known.
28821 if (!MUC0.MMO || !MUC1.MMO)
28822 return true;
28823
28824 // If one operation reads from invariant memory, and the other may store, they
28825 // cannot alias. These should really be checking the equivalent of mayWrite,
28826 // but it only matters for memory nodes other than load /store.
28827 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28828 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28829 return false;
28830
28831 // If we know required SrcValue1 and SrcValue2 have relatively large
28832 // alignment compared to the size and offset of the access, we may be able
28833 // to prove they do not alias. This check is conservative for now to catch
28834 // cases created by splitting vector types, it only works when the offsets are
28835 // multiples of the size of the data.
28836 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28837 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28838 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28839 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28840 LocationSize Size0 = MUC0.NumBytes;
28841 LocationSize Size1 = MUC1.NumBytes;
28842
28843 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28844 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28845 !Size1.isScalable() && Size0 == Size1 &&
28846 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28847 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28848 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28849 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28850 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28851
28852 // There is no overlap between these relatively aligned accesses of
28853 // similar size. Return no alias.
28854 if ((OffAlign0 + static_cast<int64_t>(
28855 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28856 (OffAlign1 + static_cast<int64_t>(
28857 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28858 return false;
28859 }
28860
28863 : DAG.getSubtarget().useAA();
28864#ifndef NDEBUG
28865 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28867 UseAA = false;
28868#endif
28869
28870 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28871 Size0.hasValue() && Size1.hasValue() &&
28872 // Can't represent a scalable size + fixed offset in LocationSize
28873 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28874 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28875 // Use alias analysis information.
28876 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28877 int64_t Overlap0 =
28878 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28879 int64_t Overlap1 =
28880 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28881 LocationSize Loc0 =
28882 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28883 LocationSize Loc1 =
28884 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28885 if (AA->isNoAlias(
28886 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28887 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28888 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28889 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28890 return false;
28891 }
28892
28893 // Otherwise we have to assume they alias.
28894 return true;
28895}
28896
28897/// Walk up chain skipping non-aliasing memory nodes,
28898/// looking for aliasing nodes and adding them to the Aliases vector.
28899void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28900 SmallVectorImpl<SDValue> &Aliases) {
28901 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28902 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28903
28904 // Get alias information for node.
28905 // TODO: relax aliasing for unordered atomics (see D66309)
28906 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28907
28908 // Starting off.
28909 Chains.push_back(OriginalChain);
28910 unsigned Depth = 0;
28911
28912 // Attempt to improve chain by a single step
28913 auto ImproveChain = [&](SDValue &C) -> bool {
28914 switch (C.getOpcode()) {
28915 case ISD::EntryToken:
28916 // No need to mark EntryToken.
28917 C = SDValue();
28918 return true;
28919 case ISD::LOAD:
28920 case ISD::STORE: {
28921 // Get alias information for C.
28922 // TODO: Relax aliasing for unordered atomics (see D66309)
28923 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28924 cast<LSBaseSDNode>(C.getNode())->isSimple();
28925 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28926 // Look further up the chain.
28927 C = C.getOperand(0);
28928 return true;
28929 }
28930 // Alias, so stop here.
28931 return false;
28932 }
28933
28934 case ISD::CopyFromReg:
28935 // Always forward past CopyFromReg.
28936 C = C.getOperand(0);
28937 return true;
28938
28940 case ISD::LIFETIME_END: {
28941 // We can forward past any lifetime start/end that can be proven not to
28942 // alias the memory access.
28943 if (!mayAlias(N, C.getNode())) {
28944 // Look further up the chain.
28945 C = C.getOperand(0);
28946 return true;
28947 }
28948 return false;
28949 }
28950 default:
28951 return false;
28952 }
28953 };
28954
28955 // Look at each chain and determine if it is an alias. If so, add it to the
28956 // aliases list. If not, then continue up the chain looking for the next
28957 // candidate.
28958 while (!Chains.empty()) {
28959 SDValue Chain = Chains.pop_back_val();
28960
28961 // Don't bother if we've seen Chain before.
28962 if (!Visited.insert(Chain.getNode()).second)
28963 continue;
28964
28965 // For TokenFactor nodes, look at each operand and only continue up the
28966 // chain until we reach the depth limit.
28967 //
28968 // FIXME: The depth check could be made to return the last non-aliasing
28969 // chain we found before we hit a tokenfactor rather than the original
28970 // chain.
28971 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28972 Aliases.clear();
28973 Aliases.push_back(OriginalChain);
28974 return;
28975 }
28976
28977 if (Chain.getOpcode() == ISD::TokenFactor) {
28978 // We have to check each of the operands of the token factor for "small"
28979 // token factors, so we queue them up. Adding the operands to the queue
28980 // (stack) in reverse order maintains the original order and increases the
28981 // likelihood that getNode will find a matching token factor (CSE.)
28982 if (Chain.getNumOperands() > 16) {
28983 Aliases.push_back(Chain);
28984 continue;
28985 }
28986 for (unsigned n = Chain.getNumOperands(); n;)
28987 Chains.push_back(Chain.getOperand(--n));
28988 ++Depth;
28989 continue;
28990 }
28991 // Everything else
28992 if (ImproveChain(Chain)) {
28993 // Updated Chain Found, Consider new chain if one exists.
28994 if (Chain.getNode())
28995 Chains.push_back(Chain);
28996 ++Depth;
28997 continue;
28998 }
28999 // No Improved Chain Possible, treat as Alias.
29000 Aliases.push_back(Chain);
29001 }
29002}
29003
29004/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29005/// (aliasing node.)
29006SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
29007 if (OptLevel == CodeGenOptLevel::None)
29008 return OldChain;
29009
29010 // Ops for replacing token factor.
29012
29013 // Accumulate all the aliases to this node.
29014 GatherAllAliases(N, OldChain, Aliases);
29015
29016 // If no operands then chain to entry token.
29017 if (Aliases.empty())
29018 return DAG.getEntryNode();
29019
29020 // If a single operand then chain to it. We don't need to revisit it.
29021 if (Aliases.size() == 1)
29022 return Aliases[0];
29023
29024 // Construct a custom tailored token factor.
29025 return DAG.getTokenFactor(SDLoc(N), Aliases);
29026}
29027
29028// This function tries to collect a bunch of potentially interesting
29029// nodes to improve the chains of, all at once. This might seem
29030// redundant, as this function gets called when visiting every store
29031// node, so why not let the work be done on each store as it's visited?
29032//
29033// I believe this is mainly important because mergeConsecutiveStores
29034// is unable to deal with merging stores of different sizes, so unless
29035// we improve the chains of all the potential candidates up-front
29036// before running mergeConsecutiveStores, it might only see some of
29037// the nodes that will eventually be candidates, and then not be able
29038// to go from a partially-merged state to the desired final
29039// fully-merged state.
29040
29041bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
29042 SmallVector<StoreSDNode *, 8> ChainedStores;
29043 StoreSDNode *STChain = St;
29044 // Intervals records which offsets from BaseIndex have been covered. In
29045 // the common case, every store writes to the immediately previous address
29046 // space and thus merged with the previous interval at insertion time.
29047
29048 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
29050 IMap::Allocator A;
29051 IMap Intervals(A);
29052
29053 // This holds the base pointer, index, and the offset in bytes from the base
29054 // pointer.
29056
29057 // We must have a base and an offset.
29058 if (!BasePtr.getBase().getNode())
29059 return false;
29060
29061 // Do not handle stores to undef base pointers.
29062 if (BasePtr.getBase().isUndef())
29063 return false;
29064
29065 // Do not handle stores to opaque types
29066 if (St->getMemoryVT().isZeroSized())
29067 return false;
29068
29069 // BaseIndexOffset assumes that offsets are fixed-size, which
29070 // is not valid for scalable vectors where the offsets are
29071 // scaled by `vscale`, so bail out early.
29072 if (St->getMemoryVT().isScalableVT())
29073 return false;
29074
29075 // Add ST's interval.
29076 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
29077 std::monostate{});
29078
29079 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
29080 if (Chain->getMemoryVT().isScalableVector())
29081 return false;
29082
29083 // If the chain has more than one use, then we can't reorder the mem ops.
29084 if (!SDValue(Chain, 0)->hasOneUse())
29085 break;
29086 // TODO: Relax for unordered atomics (see D66309)
29087 if (!Chain->isSimple() || Chain->isIndexed())
29088 break;
29089
29090 // Find the base pointer and offset for this memory node.
29091 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
29092 // Check that the base pointer is the same as the original one.
29093 int64_t Offset;
29094 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
29095 break;
29096 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
29097 // Make sure we don't overlap with other intervals by checking the ones to
29098 // the left or right before inserting.
29099 auto I = Intervals.find(Offset);
29100 // If there's a next interval, we should end before it.
29101 if (I != Intervals.end() && I.start() < (Offset + Length))
29102 break;
29103 // If there's a previous interval, we should start after it.
29104 if (I != Intervals.begin() && (--I).stop() <= Offset)
29105 break;
29106 Intervals.insert(Offset, Offset + Length, std::monostate{});
29107
29108 ChainedStores.push_back(Chain);
29109 STChain = Chain;
29110 }
29111
29112 // If we didn't find a chained store, exit.
29113 if (ChainedStores.empty())
29114 return false;
29115
29116 // Improve all chained stores (St and ChainedStores members) starting from
29117 // where the store chain ended and return single TokenFactor.
29118 SDValue NewChain = STChain->getChain();
29120 for (unsigned I = ChainedStores.size(); I;) {
29121 StoreSDNode *S = ChainedStores[--I];
29122 SDValue BetterChain = FindBetterChain(S, NewChain);
29123 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
29124 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
29125 TFOps.push_back(SDValue(S, 0));
29126 ChainedStores[I] = S;
29127 }
29128
29129 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
29130 SDValue BetterChain = FindBetterChain(St, NewChain);
29131 SDValue NewST;
29132 if (St->isTruncatingStore())
29133 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
29134 St->getBasePtr(), St->getMemoryVT(),
29135 St->getMemOperand());
29136 else
29137 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
29138 St->getBasePtr(), St->getMemOperand());
29139
29140 TFOps.push_back(NewST);
29141
29142 // If we improved every element of TFOps, then we've lost the dependence on
29143 // NewChain to successors of St and we need to add it back to TFOps. Do so at
29144 // the beginning to keep relative order consistent with FindBetterChains.
29145 auto hasImprovedChain = [&](SDValue ST) -> bool {
29146 return ST->getOperand(0) != NewChain;
29147 };
29148 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
29149 if (AddNewChain)
29150 TFOps.insert(TFOps.begin(), NewChain);
29151
29152 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
29153 CombineTo(St, TF);
29154
29155 // Add TF and its operands to the worklist.
29156 AddToWorklist(TF.getNode());
29157 for (const SDValue &Op : TF->ops())
29158 AddToWorklist(Op.getNode());
29159 AddToWorklist(STChain);
29160 return true;
29161}
29162
29163bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
29164 if (OptLevel == CodeGenOptLevel::None)
29165 return false;
29166
29168
29169 // We must have a base and an offset.
29170 if (!BasePtr.getBase().getNode())
29171 return false;
29172
29173 // Do not handle stores to undef base pointers.
29174 if (BasePtr.getBase().isUndef())
29175 return false;
29176
29177 // Directly improve a chain of disjoint stores starting at St.
29178 if (parallelizeChainedStores(St))
29179 return true;
29180
29181 // Improve St's Chain..
29182 SDValue BetterChain = FindBetterChain(St, St->getChain());
29183 if (St->getChain() != BetterChain) {
29184 replaceStoreChain(St, BetterChain);
29185 return true;
29186 }
29187 return false;
29188}
29189
29190/// This is the entry point for the file.
29192 CodeGenOptLevel OptLevel) {
29193 /// This is the main entry point to this class.
29194 DAGCombiner(*this, AA, OptLevel).Run(Level);
29195}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static const LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:500
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:1122
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1210
bool isNegative() const
Definition: APFloat.h:1445
bool isNormal() const
Definition: APFloat.h:1449
bool isDenormal() const
Definition: APFloat.h:1446
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1428
const fltSemantics & getSemantics() const
Definition: APFloat.h:1453
bool isNaN() const
Definition: APFloat.h:1443
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:1090
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
bool isLargest() const
Definition: APFloat.h:1461
bool isIEEE() const
Definition: APFloat.h:1463
bool isInfinity() const
Definition: APFloat.h:1442
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1945
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1732
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:617
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1773
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:466
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
int32_t exactLogBase2() const
Definition: APInt.h:1761
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1909
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned countLeadingZeros() const
Definition: APInt.h:1585
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1434
unsigned logBase2() const
Definition: APInt.h:1739
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1934
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:207
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:422
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:415
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:371
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:982
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:577
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:499
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static unsigned getHasPredecessorMaxSteps()
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:890
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:569
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:504
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:873
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:586
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
std::optional< bool > isBoolConstant(SDValue N, bool AllowTruncation=false) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:906
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:936
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
int getNumOccurrences() const
Definition: CommandLine.h:399
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2217
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2222
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2227
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2232
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:502
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ TRUNCATE_SSAT_U
Definition: ISDOpcodes.h:834
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:788
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ TargetConstantFP
Definition: ISDOpcodes.h:165
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ TargetFrameIndex
Definition: ISDOpcodes.h:172
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1377
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1262
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1078
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:267
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1433
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Definition: ISDOpcodes.h:1481
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1211
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1083
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:320
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1572
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1682
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1657
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1677
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1494
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1568
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1568
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1639
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1555
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1606
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1586
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1651
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:903
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4771
constexpr double e
Definition: MathExtras.h:48
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:355
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1565
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:518
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:298
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:348
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1503
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2087
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:384
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:323
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:319
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:318
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:329
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:274
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:187
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:251
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:263
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:320
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:53
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:240
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:82
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:59
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
void setDisjoint(bool b)
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:317
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...