LLVM 20.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/Metadata.h"
60#include "llvm/Support/Debug.h"
68#include <algorithm>
69#include <cassert>
70#include <cstdint>
71#include <functional>
72#include <iterator>
73#include <optional>
74#include <string>
75#include <tuple>
76#include <utility>
77#include <variant>
78
79#include "MatchContext.h"
80
81using namespace llvm;
82using namespace llvm::SDPatternMatch;
83
84#define DEBUG_TYPE "dagcombine"
85
86STATISTIC(NodesCombined , "Number of dag nodes combined");
87STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
88STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
89STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
90STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
91STATISTIC(SlicedLoads, "Number of load sliced");
92STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
93
94DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
95 "Controls whether a DAG combine is performed for a node");
96
97static cl::opt<bool>
98CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
99 cl::desc("Enable DAG combiner's use of IR alias analysis"));
100
101static cl::opt<bool>
102UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
103 cl::desc("Enable DAG combiner's use of TBAA"));
104
105#ifndef NDEBUG
107CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
108 cl::desc("Only use DAG-combiner alias analysis in this"
109 " function"));
110#endif
111
112/// Hidden option to stress test load slicing, i.e., when this option
113/// is enabled, load slicing bypasses most of its profitability guards.
114static cl::opt<bool>
115StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
116 cl::desc("Bypass the profitability model of load slicing"),
117 cl::init(false));
118
119static cl::opt<bool>
120 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
121 cl::desc("DAG combiner may split indexing from loads"));
122
123static cl::opt<bool>
124 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
125 cl::desc("DAG combiner enable merging multiple stores "
126 "into a wider store"));
127
129 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
130 cl::desc("Limit the number of operands to inline for Token Factors"));
131
133 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
134 cl::desc("Limit the number of times for the same StoreNode and RootNode "
135 "to bail out in store merging dependence check"));
136
138 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
139 cl::desc("DAG combiner enable reducing the width of load/op/store "
140 "sequence"));
142 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
143 cl::Hidden, cl::init(false),
144 cl::desc("DAG combiner force override the narrowing profitable check when "
145 "reducing the width of load/op/store sequences"));
146
148 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
149 cl::desc("DAG combiner enable load/<replace bytes>/store with "
150 "a narrower store"));
151
153 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
154 cl::desc(
155 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
156namespace {
157
158 class DAGCombiner {
159 SelectionDAG &DAG;
160 const TargetLowering &TLI;
161 const SelectionDAGTargetInfo *STI;
163 CodeGenOptLevel OptLevel;
164 bool LegalDAG = false;
165 bool LegalOperations = false;
166 bool LegalTypes = false;
167 bool ForCodeSize;
168 bool DisableGenericCombines;
169
170 /// Worklist of all of the nodes that need to be simplified.
171 ///
172 /// This must behave as a stack -- new nodes to process are pushed onto the
173 /// back and when processing we pop off of the back.
174 ///
175 /// The worklist will not contain duplicates but may contain null entries
176 /// due to nodes being deleted from the underlying DAG. For fast lookup and
177 /// deduplication, the index of the node in this vector is stored in the
178 /// node in SDNode::CombinerWorklistIndex.
180
181 /// This records all nodes attempted to be added to the worklist since we
182 /// considered a new worklist entry. As we keep do not add duplicate nodes
183 /// in the worklist, this is different from the tail of the worklist.
185
186 /// Map from candidate StoreNode to the pair of RootNode and count.
187 /// The count is used to track how many times we have seen the StoreNode
188 /// with the same RootNode bail out in dependence check. If we have seen
189 /// the bail out for the same pair many times over a limit, we won't
190 /// consider the StoreNode with the same RootNode as store merging
191 /// candidate again.
193
194 // BatchAA - Used for DAG load/store alias analysis.
195 BatchAAResults *BatchAA;
196
197 /// This caches all chains that have already been processed in
198 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
199 /// stores candidates.
200 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
201
202 /// When an instruction is simplified, add all users of the instruction to
203 /// the work lists because they might get more simplified now.
204 void AddUsersToWorklist(SDNode *N) {
205 for (SDNode *Node : N->users())
206 AddToWorklist(Node);
207 }
208
209 /// Convenient shorthand to add a node and all of its user to the worklist.
210 void AddToWorklistWithUsers(SDNode *N) {
211 AddUsersToWorklist(N);
212 AddToWorklist(N);
213 }
214
215 // Prune potentially dangling nodes. This is called after
216 // any visit to a node, but should also be called during a visit after any
217 // failed combine which may have created a DAG node.
218 void clearAddedDanglingWorklistEntries() {
219 // Check any nodes added to the worklist to see if they are prunable.
220 while (!PruningList.empty()) {
221 auto *N = PruningList.pop_back_val();
222 if (N->use_empty())
223 recursivelyDeleteUnusedNodes(N);
224 }
225 }
226
227 SDNode *getNextWorklistEntry() {
228 // Before we do any work, remove nodes that are not in use.
229 clearAddedDanglingWorklistEntries();
230 SDNode *N = nullptr;
231 // The Worklist holds the SDNodes in order, but it may contain null
232 // entries.
233 while (!N && !Worklist.empty()) {
234 N = Worklist.pop_back_val();
235 }
236
237 if (N) {
238 assert(N->getCombinerWorklistIndex() >= 0 &&
239 "Found a worklist entry without a corresponding map entry!");
240 // Set to -2 to indicate that we combined the node.
241 N->setCombinerWorklistIndex(-2);
242 }
243 return N;
244 }
245
246 /// Call the node-specific routine that folds each particular type of node.
248
249 public:
250 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
251 : DAG(D), TLI(D.getTargetLoweringInfo()),
252 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
253 BatchAA(BatchAA) {
254 ForCodeSize = DAG.shouldOptForSize();
255 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
256
257 MaximumLegalStoreInBits = 0;
258 // We use the minimum store size here, since that's all we can guarantee
259 // for the scalable vector types.
260 for (MVT VT : MVT::all_valuetypes())
261 if (EVT(VT).isSimple() && VT != MVT::Other &&
262 TLI.isTypeLegal(EVT(VT)) &&
263 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
264 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
265 }
266
267 void ConsiderForPruning(SDNode *N) {
268 // Mark this for potential pruning.
269 PruningList.insert(N);
270 }
271
272 /// Add to the worklist making sure its instance is at the back (next to be
273 /// processed.)
274 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
275 bool SkipIfCombinedBefore = false) {
276 assert(N->getOpcode() != ISD::DELETED_NODE &&
277 "Deleted Node added to Worklist");
278
279 // Skip handle nodes as they can't usefully be combined and confuse the
280 // zero-use deletion strategy.
281 if (N->getOpcode() == ISD::HANDLENODE)
282 return;
283
284 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
285 return;
286
287 if (IsCandidateForPruning)
288 ConsiderForPruning(N);
289
290 if (N->getCombinerWorklistIndex() < 0) {
291 N->setCombinerWorklistIndex(Worklist.size());
292 Worklist.push_back(N);
293 }
294 }
295
296 /// Remove all instances of N from the worklist.
297 void removeFromWorklist(SDNode *N) {
298 PruningList.remove(N);
299 StoreRootCountMap.erase(N);
300
301 int WorklistIndex = N->getCombinerWorklistIndex();
302 // If not in the worklist, the index might be -1 or -2 (was combined
303 // before). As the node gets deleted anyway, there's no need to update
304 // the index.
305 if (WorklistIndex < 0)
306 return; // Not in the worklist.
307
308 // Null out the entry rather than erasing it to avoid a linear operation.
309 Worklist[WorklistIndex] = nullptr;
310 N->setCombinerWorklistIndex(-1);
311 }
312
313 void deleteAndRecombine(SDNode *N);
314 bool recursivelyDeleteUnusedNodes(SDNode *N);
315
316 /// Replaces all uses of the results of one DAG node with new values.
317 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
318 bool AddTo = true);
319
320 /// Replaces all uses of the results of one DAG node with new values.
321 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
322 return CombineTo(N, &Res, 1, AddTo);
323 }
324
325 /// Replaces all uses of the results of one DAG node with new values.
326 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
327 bool AddTo = true) {
328 SDValue To[] = { Res0, Res1 };
329 return CombineTo(N, To, 2, AddTo);
330 }
331
332 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
333
334 private:
335 unsigned MaximumLegalStoreInBits;
336
337 /// Check the specified integer node value to see if it can be simplified or
338 /// if things it uses can be simplified by bit propagation.
339 /// If so, return true.
340 bool SimplifyDemandedBits(SDValue Op) {
341 unsigned BitWidth = Op.getScalarValueSizeInBits();
343 return SimplifyDemandedBits(Op, DemandedBits);
344 }
345
346 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
347 EVT VT = Op.getValueType();
348 APInt DemandedElts = VT.isFixedLengthVector()
350 : APInt(1, 1);
351 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
352 }
353
354 /// Check the specified vector node value to see if it can be simplified or
355 /// if things it uses can be simplified as it only uses some of the
356 /// elements. If so, return true.
357 bool SimplifyDemandedVectorElts(SDValue Op) {
358 // TODO: For now just pretend it cannot be simplified.
359 if (Op.getValueType().isScalableVector())
360 return false;
361
362 unsigned NumElts = Op.getValueType().getVectorNumElements();
363 APInt DemandedElts = APInt::getAllOnes(NumElts);
364 return SimplifyDemandedVectorElts(Op, DemandedElts);
365 }
366
367 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
368 const APInt &DemandedElts,
369 bool AssumeSingleUse = false);
370 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
371 bool AssumeSingleUse = false);
372
373 bool CombineToPreIndexedLoadStore(SDNode *N);
374 bool CombineToPostIndexedLoadStore(SDNode *N);
375 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
376 bool SliceUpLoad(SDNode *N);
377
378 // Looks up the chain to find a unique (unaliased) store feeding the passed
379 // load. If no such store is found, returns a nullptr.
380 // Note: This will look past a CALLSEQ_START if the load is chained to it so
381 // so that it can find stack stores for byval params.
382 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
383 // Scalars have size 0 to distinguish from singleton vectors.
384 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
385 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
386 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
387
388 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
389 /// load.
390 ///
391 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
392 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
393 /// \param EltNo index of the vector element to load.
394 /// \param OriginalLoad load that EVE came from to be replaced.
395 /// \returns EVE on success SDValue() on failure.
396 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
397 SDValue EltNo,
398 LoadSDNode *OriginalLoad);
399 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
400 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
401 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
402 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
403 SDValue PromoteIntBinOp(SDValue Op);
404 SDValue PromoteIntShiftOp(SDValue Op);
405 SDValue PromoteExtend(SDValue Op);
406 bool PromoteLoad(SDValue Op);
407
408 SDValue foldShiftToAvg(SDNode *N);
409
410 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
411 SDValue RHS, SDValue True, SDValue False,
413
414 /// Call the node-specific routine that knows how to fold each
415 /// particular type of node. If that doesn't do anything, try the
416 /// target-specific DAG combines.
417 SDValue combine(SDNode *N);
418
419 // Visitation implementation - Implement dag node combining for different
420 // node types. The semantics are as follows:
421 // Return Value:
422 // SDValue.getNode() == 0 - No change was made
423 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
424 // otherwise - N should be replaced by the returned Operand.
425 //
426 SDValue visitTokenFactor(SDNode *N);
427 SDValue visitMERGE_VALUES(SDNode *N);
428 SDValue visitADD(SDNode *N);
429 SDValue visitADDLike(SDNode *N);
430 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFDIV(SDNode *N);
513 SDValue visitFREM(SDNode *N);
514 SDValue visitFSQRT(SDNode *N);
515 SDValue visitFCOPYSIGN(SDNode *N);
516 SDValue visitFPOW(SDNode *N);
517 SDValue visitFCANONICALIZE(SDNode *N);
518 SDValue visitSINT_TO_FP(SDNode *N);
519 SDValue visitUINT_TO_FP(SDNode *N);
520 SDValue visitFP_TO_SINT(SDNode *N);
521 SDValue visitFP_TO_UINT(SDNode *N);
522 SDValue visitXROUND(SDNode *N);
523 SDValue visitFP_ROUND(SDNode *N);
524 SDValue visitFP_EXTEND(SDNode *N);
525 SDValue visitFNEG(SDNode *N);
526 SDValue visitFABS(SDNode *N);
527 SDValue visitFCEIL(SDNode *N);
528 SDValue visitFTRUNC(SDNode *N);
529 SDValue visitFFREXP(SDNode *N);
530 SDValue visitFFLOOR(SDNode *N);
531 SDValue visitFMinMax(SDNode *N);
532 SDValue visitBRCOND(SDNode *N);
533 SDValue visitBR_CC(SDNode *N);
534 SDValue visitLOAD(SDNode *N);
535
536 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541
542 SDValue visitSTORE(SDNode *N);
543 SDValue visitATOMIC_STORE(SDNode *N);
544 SDValue visitLIFETIME_END(SDNode *N);
545 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547 SDValue visitBUILD_VECTOR(SDNode *N);
548 SDValue visitCONCAT_VECTORS(SDNode *N);
549 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
550 SDValue visitVECTOR_SHUFFLE(SDNode *N);
551 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
552 SDValue visitINSERT_SUBVECTOR(SDNode *N);
553 SDValue visitVECTOR_COMPRESS(SDNode *N);
554 SDValue visitMLOAD(SDNode *N);
555 SDValue visitMSTORE(SDNode *N);
556 SDValue visitMGATHER(SDNode *N);
557 SDValue visitMSCATTER(SDNode *N);
558 SDValue visitMHISTOGRAM(SDNode *N);
559 SDValue visitVPGATHER(SDNode *N);
560 SDValue visitVPSCATTER(SDNode *N);
561 SDValue visitVP_STRIDED_LOAD(SDNode *N);
562 SDValue visitVP_STRIDED_STORE(SDNode *N);
563 SDValue visitFP_TO_FP16(SDNode *N);
564 SDValue visitFP16_TO_FP(SDNode *N);
565 SDValue visitFP_TO_BF16(SDNode *N);
566 SDValue visitBF16_TO_FP(SDNode *N);
567 SDValue visitVECREDUCE(SDNode *N);
568 SDValue visitVPOp(SDNode *N);
569 SDValue visitGET_FPENV_MEM(SDNode *N);
570 SDValue visitSET_FPENV_MEM(SDNode *N);
571
572 template <class MatchContextClass>
573 SDValue visitFADDForFMACombine(SDNode *N);
574 template <class MatchContextClass>
575 SDValue visitFSUBForFMACombine(SDNode *N);
576 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
577
578 SDValue XformToShuffleWithZero(SDNode *N);
579 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
580 const SDLoc &DL,
581 SDNode *N,
582 SDValue N0,
583 SDValue N1);
584 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
585 SDValue N1, SDNodeFlags Flags);
586 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
587 SDValue N1, SDNodeFlags Flags);
588 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
589 EVT VT, SDValue N0, SDValue N1,
590 SDNodeFlags Flags = SDNodeFlags());
591
592 SDValue visitShiftByConstant(SDNode *N);
593
594 SDValue foldSelectOfConstants(SDNode *N);
595 SDValue foldVSelectOfConstants(SDNode *N);
596 SDValue foldBinOpIntoSelect(SDNode *BO);
597 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
598 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
599 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
600 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
602 bool NotExtCompare = false);
603 SDValue convertSelectOfFPConstantsToLoadOffset(
604 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
606 SDValue foldSignChangeInBitcast(SDNode *N);
607 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
609 SDValue foldSelectOfBinops(SDNode *N);
610 SDValue foldSextSetcc(SDNode *N);
611 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
612 const SDLoc &DL);
613 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
614 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
615 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
616 SDValue False, ISD::CondCode CC, const SDLoc &DL);
617 SDValue unfoldMaskedMerge(SDNode *N);
618 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
619 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
620 const SDLoc &DL, bool foldBooleans);
621 SDValue rebuildSetCC(SDValue N);
622
623 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
624 SDValue &CC, bool MatchStrict = false) const;
625 bool isOneUseSetCC(SDValue N) const;
626
627 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
628 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
629
630 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
631 unsigned HiOp);
632 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
633 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
634 const TargetLowering &TLI);
635
636 SDValue CombineExtLoad(SDNode *N);
637 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
638 SDValue combineRepeatedFPDivisors(SDNode *N);
639 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
640 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
641 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
642 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
643 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
644 SDValue BuildSDIV(SDNode *N);
645 SDValue BuildSDIVPow2(SDNode *N);
646 SDValue BuildUDIV(SDNode *N);
647 SDValue BuildSREMPow2(SDNode *N);
648 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
649 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
650 bool KnownNeverZero = false,
651 bool InexpensiveOnly = false,
652 std::optional<EVT> OutVT = std::nullopt);
653 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
654 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
655 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
656 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
657 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
658 SDNodeFlags Flags, bool Reciprocal);
659 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
660 SDNodeFlags Flags, bool Reciprocal);
661 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
662 bool DemandHighBits = true);
663 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
664 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
665 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
666 unsigned PosOpcode, unsigned NegOpcode,
667 const SDLoc &DL);
668 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
669 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
670 unsigned PosOpcode, unsigned NegOpcode,
671 const SDLoc &DL);
672 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
673 SDValue MatchLoadCombine(SDNode *N);
674 SDValue mergeTruncStores(StoreSDNode *N);
675 SDValue reduceLoadWidth(SDNode *N);
676 SDValue ReduceLoadOpStoreWidth(SDNode *N);
678 SDValue TransformFPLoadStorePair(SDNode *N);
679 SDValue convertBuildVecZextToZext(SDNode *N);
680 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
681 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
682 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
683 SDValue reduceBuildVecToShuffle(SDNode *N);
684 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
685 ArrayRef<int> VectorMask, SDValue VecIn1,
686 SDValue VecIn2, unsigned LeftIdx,
687 bool DidSplitVec);
688 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
689
690 /// Walk up chain skipping non-aliasing memory nodes,
691 /// looking for aliasing nodes and adding them to the Aliases vector.
692 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
693 SmallVectorImpl<SDValue> &Aliases);
694
695 /// Return true if there is any possibility that the two addresses overlap.
696 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
697
698 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
699 /// chain (aliasing node.)
700 SDValue FindBetterChain(SDNode *N, SDValue Chain);
701
702 /// Try to replace a store and any possibly adjacent stores on
703 /// consecutive chains with better chains. Return true only if St is
704 /// replaced.
705 ///
706 /// Notice that other chains may still be replaced even if the function
707 /// returns false.
708 bool findBetterNeighborChains(StoreSDNode *St);
709
710 // Helper for findBetterNeighborChains. Walk up store chain add additional
711 // chained stores that do not overlap and can be parallelized.
712 bool parallelizeChainedStores(StoreSDNode *St);
713
714 /// Holds a pointer to an LSBaseSDNode as well as information on where it
715 /// is located in a sequence of memory operations connected by a chain.
716 struct MemOpLink {
717 // Ptr to the mem node.
718 LSBaseSDNode *MemNode;
719
720 // Offset from the base ptr.
721 int64_t OffsetFromBase;
722
723 MemOpLink(LSBaseSDNode *N, int64_t Offset)
724 : MemNode(N), OffsetFromBase(Offset) {}
725 };
726
727 // Classify the origin of a stored value.
728 enum class StoreSource { Unknown, Constant, Extract, Load };
729 StoreSource getStoreSource(SDValue StoreVal) {
730 switch (StoreVal.getOpcode()) {
731 case ISD::Constant:
732 case ISD::ConstantFP:
733 return StoreSource::Constant;
737 return StoreSource::Constant;
738 return StoreSource::Unknown;
741 return StoreSource::Extract;
742 case ISD::LOAD:
743 return StoreSource::Load;
744 default:
745 return StoreSource::Unknown;
746 }
747 }
748
749 /// This is a helper function for visitMUL to check the profitability
750 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
751 /// MulNode is the original multiply, AddNode is (add x, c1),
752 /// and ConstNode is c2.
753 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
754 SDValue ConstNode);
755
756 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
757 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
758 /// the type of the loaded value to be extended.
759 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
760 EVT LoadResultTy, EVT &ExtVT);
761
762 /// Helper function to calculate whether the given Load/Store can have its
763 /// width reduced to ExtVT.
764 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
765 EVT &MemVT, unsigned ShAmt = 0);
766
767 /// Used by BackwardsPropagateMask to find suitable loads.
768 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
769 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
770 ConstantSDNode *Mask, SDNode *&NodeToMask);
771 /// Attempt to propagate a given AND node back to load leaves so that they
772 /// can be combined into narrow loads.
773 bool BackwardsPropagateMask(SDNode *N);
774
775 /// Helper function for mergeConsecutiveStores which merges the component
776 /// store chains.
777 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
778 unsigned NumStores);
779
780 /// Helper function for mergeConsecutiveStores which checks if all the store
781 /// nodes have the same underlying object. We can still reuse the first
782 /// store's pointer info if all the stores are from the same object.
783 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
784
785 /// This is a helper function for mergeConsecutiveStores. When the source
786 /// elements of the consecutive stores are all constants or all extracted
787 /// vector elements, try to merge them into one larger store introducing
788 /// bitcasts if necessary. \return True if a merged store was created.
789 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
790 EVT MemVT, unsigned NumStores,
791 bool IsConstantSrc, bool UseVector,
792 bool UseTrunc);
793
794 /// This is a helper function for mergeConsecutiveStores. Stores that
795 /// potentially may be merged with St are placed in StoreNodes. On success,
796 /// returns a chain predecessor to all store candidates.
797 SDNode *getStoreMergeCandidates(StoreSDNode *St,
798 SmallVectorImpl<MemOpLink> &StoreNodes);
799
800 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
801 /// have indirect dependency through their operands. RootNode is the
802 /// predecessor to all stores calculated by getStoreMergeCandidates and is
803 /// used to prune the dependency check. \return True if safe to merge.
804 bool checkMergeStoreCandidatesForDependencies(
805 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
806 SDNode *RootNode);
807
808 /// This is a helper function for mergeConsecutiveStores. Given a list of
809 /// store candidates, find the first N that are consecutive in memory.
810 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
811 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
812 int64_t ElementSizeBytes) const;
813
814 /// This is a helper function for mergeConsecutiveStores. It is used for
815 /// store chains that are composed entirely of constant values.
816 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
817 unsigned NumConsecutiveStores,
818 EVT MemVT, SDNode *Root, bool AllowVectors);
819
820 /// This is a helper function for mergeConsecutiveStores. It is used for
821 /// store chains that are composed entirely of extracted vector elements.
822 /// When extracting multiple vector elements, try to store them in one
823 /// vector store rather than a sequence of scalar stores.
824 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
825 unsigned NumConsecutiveStores, EVT MemVT,
826 SDNode *Root);
827
828 /// This is a helper function for mergeConsecutiveStores. It is used for
829 /// store chains that are composed entirely of loaded values.
830 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
831 unsigned NumConsecutiveStores, EVT MemVT,
832 SDNode *Root, bool AllowVectors,
833 bool IsNonTemporalStore, bool IsNonTemporalLoad);
834
835 /// Merge consecutive store operations into a wide store.
836 /// This optimization uses wide integers or vectors when possible.
837 /// \return true if stores were merged.
838 bool mergeConsecutiveStores(StoreSDNode *St);
839
840 /// Try to transform a truncation where C is a constant:
841 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
842 ///
843 /// \p N needs to be a truncation and its first operand an AND. Other
844 /// requirements are checked by the function (e.g. that trunc is
845 /// single-use) and if missed an empty SDValue is returned.
846 SDValue distributeTruncateThroughAnd(SDNode *N);
847
848 /// Helper function to determine whether the target supports operation
849 /// given by \p Opcode for type \p VT, that is, whether the operation
850 /// is legal or custom before legalizing operations, and whether is
851 /// legal (but not custom) after legalization.
852 bool hasOperation(unsigned Opcode, EVT VT) {
853 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
854 }
855
856 public:
857 /// Runs the dag combiner on all nodes in the work list
858 void Run(CombineLevel AtLevel);
859
860 SelectionDAG &getDAG() const { return DAG; }
861
862 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
863 EVT getShiftAmountTy(EVT LHSTy) {
864 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
865 }
866
867 /// This method returns true if we are running before type legalization or
868 /// if the specified VT is legal.
869 bool isTypeLegal(const EVT &VT) {
870 if (!LegalTypes) return true;
871 return TLI.isTypeLegal(VT);
872 }
873
874 /// Convenience wrapper around TargetLowering::getSetCCResultType
875 EVT getSetCCResultType(EVT VT) const {
876 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
877 }
878
879 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
880 SDValue OrigLoad, SDValue ExtLoad,
881 ISD::NodeType ExtType);
882 };
883
884/// This class is a DAGUpdateListener that removes any deleted
885/// nodes from the worklist.
886class WorklistRemover : public SelectionDAG::DAGUpdateListener {
887 DAGCombiner &DC;
888
889public:
890 explicit WorklistRemover(DAGCombiner &dc)
891 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
892
893 void NodeDeleted(SDNode *N, SDNode *E) override {
894 DC.removeFromWorklist(N);
895 }
896};
897
898class WorklistInserter : public SelectionDAG::DAGUpdateListener {
899 DAGCombiner &DC;
900
901public:
902 explicit WorklistInserter(DAGCombiner &dc)
903 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
904
905 // FIXME: Ideally we could add N to the worklist, but this causes exponential
906 // compile time costs in large DAGs, e.g. Halide.
907 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
908};
909
910} // end anonymous namespace
911
912//===----------------------------------------------------------------------===//
913// TargetLowering::DAGCombinerInfo implementation
914//===----------------------------------------------------------------------===//
915
917 ((DAGCombiner*)DC)->AddToWorklist(N);
918}
919
921CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
922 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
923}
924
926CombineTo(SDNode *N, SDValue Res, bool AddTo) {
927 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
928}
929
931CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
932 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
933}
934
937 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
938}
939
942 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
943}
944
945//===----------------------------------------------------------------------===//
946// Helper Functions
947//===----------------------------------------------------------------------===//
948
949void DAGCombiner::deleteAndRecombine(SDNode *N) {
950 removeFromWorklist(N);
951
952 // If the operands of this node are only used by the node, they will now be
953 // dead. Make sure to re-visit them and recursively delete dead nodes.
954 for (const SDValue &Op : N->ops())
955 // For an operand generating multiple values, one of the values may
956 // become dead allowing further simplification (e.g. split index
957 // arithmetic from an indexed load).
958 if (Op->hasOneUse() || Op->getNumValues() > 1)
959 AddToWorklist(Op.getNode());
960
961 DAG.DeleteNode(N);
962}
963
964// APInts must be the same size for most operations, this helper
965// function zero extends the shorter of the pair so that they match.
966// We provide an Offset so that we can create bitwidths that won't overflow.
967static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
968 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
969 LHS = LHS.zext(Bits);
970 RHS = RHS.zext(Bits);
971}
972
973// Return true if this node is a setcc, or is a select_cc
974// that selects between the target values used for true and false, making it
975// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
976// the appropriate nodes based on the type of node we are checking. This
977// simplifies life a bit for the callers.
978bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
979 SDValue &CC, bool MatchStrict) const {
980 if (N.getOpcode() == ISD::SETCC) {
981 LHS = N.getOperand(0);
982 RHS = N.getOperand(1);
983 CC = N.getOperand(2);
984 return true;
985 }
986
987 if (MatchStrict &&
988 (N.getOpcode() == ISD::STRICT_FSETCC ||
989 N.getOpcode() == ISD::STRICT_FSETCCS)) {
990 LHS = N.getOperand(1);
991 RHS = N.getOperand(2);
992 CC = N.getOperand(3);
993 return true;
994 }
995
996 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
997 !TLI.isConstFalseVal(N.getOperand(3)))
998 return false;
999
1000 if (TLI.getBooleanContents(N.getValueType()) ==
1002 return false;
1003
1004 LHS = N.getOperand(0);
1005 RHS = N.getOperand(1);
1006 CC = N.getOperand(4);
1007 return true;
1008}
1009
1010/// Return true if this is a SetCC-equivalent operation with only one use.
1011/// If this is true, it allows the users to invert the operation for free when
1012/// it is profitable to do so.
1013bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1014 SDValue N0, N1, N2;
1015 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1016 return true;
1017 return false;
1018}
1019
1021 if (!ScalarTy.isSimple())
1022 return false;
1023
1024 uint64_t MaskForTy = 0ULL;
1025 switch (ScalarTy.getSimpleVT().SimpleTy) {
1026 case MVT::i8:
1027 MaskForTy = 0xFFULL;
1028 break;
1029 case MVT::i16:
1030 MaskForTy = 0xFFFFULL;
1031 break;
1032 case MVT::i32:
1033 MaskForTy = 0xFFFFFFFFULL;
1034 break;
1035 default:
1036 return false;
1037 break;
1038 }
1039
1040 APInt Val;
1041 if (ISD::isConstantSplatVector(N, Val))
1042 return Val.getLimitedValue() == MaskForTy;
1043
1044 return false;
1045}
1046
1047// Determines if it is a constant integer or a splat/build vector of constant
1048// integers (and undefs).
1049// Do not permit build vector implicit truncation.
1050static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1051 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1052 return !(Const->isOpaque() && NoOpaques);
1053 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1054 return false;
1055 unsigned BitWidth = N.getScalarValueSizeInBits();
1056 for (const SDValue &Op : N->op_values()) {
1057 if (Op.isUndef())
1058 continue;
1059 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1060 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1061 (Const->isOpaque() && NoOpaques))
1062 return false;
1063 }
1064 return true;
1065}
1066
1067// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1068// undef's.
1069static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1070 if (V.getOpcode() != ISD::BUILD_VECTOR)
1071 return false;
1072 return isConstantOrConstantVector(V, NoOpaques) ||
1074}
1075
1076// Determine if this an indexed load with an opaque target constant index.
1077static bool canSplitIdx(LoadSDNode *LD) {
1078 return MaySplitLoadIndex &&
1079 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1080 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1081}
1082
1083bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1084 const SDLoc &DL,
1085 SDNode *N,
1086 SDValue N0,
1087 SDValue N1) {
1088 // Currently this only tries to ensure we don't undo the GEP splits done by
1089 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1090 // we check if the following transformation would be problematic:
1091 // (load/store (add, (add, x, offset1), offset2)) ->
1092 // (load/store (add, x, offset1+offset2)).
1093
1094 // (load/store (add, (add, x, y), offset2)) ->
1095 // (load/store (add, (add, x, offset2), y)).
1096
1097 if (N0.getOpcode() != ISD::ADD)
1098 return false;
1099
1100 // Check for vscale addressing modes.
1101 // (load/store (add/sub (add x, y), vscale))
1102 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1103 // (load/store (add/sub (add x, y), (mul vscale, C)))
1104 if ((N1.getOpcode() == ISD::VSCALE ||
1105 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1106 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1107 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1108 N1.getValueType().getFixedSizeInBits() <= 64) {
1109 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1110 ? N1.getConstantOperandVal(0)
1111 : (N1.getOperand(0).getConstantOperandVal(0) *
1112 (N1.getOpcode() == ISD::SHL
1113 ? (1LL << N1.getConstantOperandVal(1))
1114 : N1.getConstantOperandVal(1)));
1115 if (Opc == ISD::SUB)
1116 ScalableOffset = -ScalableOffset;
1117 if (all_of(N->users(), [&](SDNode *Node) {
1118 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1119 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1121 AM.HasBaseReg = true;
1122 AM.ScalableOffset = ScalableOffset;
1123 EVT VT = LoadStore->getMemoryVT();
1124 unsigned AS = LoadStore->getAddressSpace();
1125 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1126 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1127 AS);
1128 }
1129 return false;
1130 }))
1131 return true;
1132 }
1133
1134 if (Opc != ISD::ADD)
1135 return false;
1136
1137 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1138 if (!C2)
1139 return false;
1140
1141 const APInt &C2APIntVal = C2->getAPIntValue();
1142 if (C2APIntVal.getSignificantBits() > 64)
1143 return false;
1144
1145 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1146 if (N0.hasOneUse())
1147 return false;
1148
1149 const APInt &C1APIntVal = C1->getAPIntValue();
1150 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1151 if (CombinedValueIntVal.getSignificantBits() > 64)
1152 return false;
1153 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1154
1155 for (SDNode *Node : N->users()) {
1156 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1157 // Is x[offset2] already not a legal addressing mode? If so then
1158 // reassociating the constants breaks nothing (we test offset2 because
1159 // that's the one we hope to fold into the load or store).
1161 AM.HasBaseReg = true;
1162 AM.BaseOffs = C2APIntVal.getSExtValue();
1163 EVT VT = LoadStore->getMemoryVT();
1164 unsigned AS = LoadStore->getAddressSpace();
1165 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1166 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1167 continue;
1168
1169 // Would x[offset1+offset2] still be a legal addressing mode?
1170 AM.BaseOffs = CombinedValue;
1171 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1172 return true;
1173 }
1174 }
1175 } else {
1176 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1177 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1178 return false;
1179
1180 for (SDNode *Node : N->users()) {
1181 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1182 if (!LoadStore)
1183 return false;
1184
1185 // Is x[offset2] a legal addressing mode? If so then
1186 // reassociating the constants breaks address pattern
1188 AM.HasBaseReg = true;
1189 AM.BaseOffs = C2APIntVal.getSExtValue();
1190 EVT VT = LoadStore->getMemoryVT();
1191 unsigned AS = LoadStore->getAddressSpace();
1192 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1193 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1194 return false;
1195 }
1196 return true;
1197 }
1198
1199 return false;
1200}
1201
1202/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1203/// \p N0 is the same kind of operation as \p Opc.
1204SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1205 SDValue N0, SDValue N1,
1206 SDNodeFlags Flags) {
1207 EVT VT = N0.getValueType();
1208
1209 if (N0.getOpcode() != Opc)
1210 return SDValue();
1211
1212 SDValue N00 = N0.getOperand(0);
1213 SDValue N01 = N0.getOperand(1);
1214
1216 SDNodeFlags NewFlags;
1217 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1218 Flags.hasNoUnsignedWrap())
1219 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1220
1222 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1223 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1224 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1225 N0->getFlags().hasDisjoint());
1226 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1227 }
1228 return SDValue();
1229 }
1230 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1231 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1232 // iff (op x, c1) has one use
1233 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1234 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1235 }
1236 }
1237
1238 // Check for repeated operand logic simplifications.
1239 if (Opc == ISD::AND || Opc == ISD::OR) {
1240 // (N00 & N01) & N00 --> N00 & N01
1241 // (N00 & N01) & N01 --> N00 & N01
1242 // (N00 | N01) | N00 --> N00 | N01
1243 // (N00 | N01) | N01 --> N00 | N01
1244 if (N1 == N00 || N1 == N01)
1245 return N0;
1246 }
1247 if (Opc == ISD::XOR) {
1248 // (N00 ^ N01) ^ N00 --> N01
1249 if (N1 == N00)
1250 return N01;
1251 // (N00 ^ N01) ^ N01 --> N00
1252 if (N1 == N01)
1253 return N00;
1254 }
1255
1256 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1257 if (N1 != N01) {
1258 // Reassociate if (op N00, N1) already exist
1259 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1260 // if Op (Op N00, N1), N01 already exist
1261 // we need to stop reassciate to avoid dead loop
1262 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1263 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1264 }
1265 }
1266
1267 if (N1 != N00) {
1268 // Reassociate if (op N01, N1) already exist
1269 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1270 // if Op (Op N01, N1), N00 already exist
1271 // we need to stop reassciate to avoid dead loop
1272 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1273 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1274 }
1275 }
1276
1277 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1278 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1279 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1280 // comparisons with the same predicate. This enables optimizations as the
1281 // following one:
1282 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1283 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1284 if (Opc == ISD::AND || Opc == ISD::OR) {
1285 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1286 N01->getOpcode() == ISD::SETCC) {
1287 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1288 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1289 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1290 if (CC1 == CC00 && CC1 != CC01) {
1291 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1292 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1293 }
1294 if (CC1 == CC01 && CC1 != CC00) {
1295 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1296 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1297 }
1298 }
1299 }
1300 }
1301
1302 return SDValue();
1303}
1304
1305/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1306/// same kind of operation as \p Opc.
1307SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1308 SDValue N1, SDNodeFlags Flags) {
1309 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1310
1311 // Floating-point reassociation is not allowed without loose FP math.
1312 if (N0.getValueType().isFloatingPoint() ||
1314 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1315 return SDValue();
1316
1317 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1318 return Combined;
1319 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1320 return Combined;
1321 return SDValue();
1322}
1323
1324// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1325// Note that we only expect Flags to be passed from FP operations. For integer
1326// operations they need to be dropped.
1327SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1328 const SDLoc &DL, EVT VT, SDValue N0,
1329 SDValue N1, SDNodeFlags Flags) {
1330 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1331 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1332 N0->hasOneUse() && N1->hasOneUse() &&
1334 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1335 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1336 return DAG.getNode(RedOpc, DL, VT,
1337 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1338 N0.getOperand(0), N1.getOperand(0)));
1339 }
1340 return SDValue();
1341}
1342
1343SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1344 bool AddTo) {
1345 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1346 ++NodesCombined;
1347 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1348 To[0].dump(&DAG);
1349 dbgs() << " and " << NumTo - 1 << " other values\n");
1350 for (unsigned i = 0, e = NumTo; i != e; ++i)
1351 assert((!To[i].getNode() ||
1352 N->getValueType(i) == To[i].getValueType()) &&
1353 "Cannot combine value to value of different type!");
1354
1355 WorklistRemover DeadNodes(*this);
1356 DAG.ReplaceAllUsesWith(N, To);
1357 if (AddTo) {
1358 // Push the new nodes and any users onto the worklist
1359 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1360 if (To[i].getNode())
1361 AddToWorklistWithUsers(To[i].getNode());
1362 }
1363 }
1364
1365 // Finally, if the node is now dead, remove it from the graph. The node
1366 // may not be dead if the replacement process recursively simplified to
1367 // something else needing this node.
1368 if (N->use_empty())
1369 deleteAndRecombine(N);
1370 return SDValue(N, 0);
1371}
1372
1373void DAGCombiner::
1374CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1375 // Replace the old value with the new one.
1376 ++NodesCombined;
1377 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1378 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1379
1380 // Replace all uses.
1381 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1382
1383 // Push the new node and any (possibly new) users onto the worklist.
1384 AddToWorklistWithUsers(TLO.New.getNode());
1385
1386 // Finally, if the node is now dead, remove it from the graph.
1387 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1388}
1389
1390/// Check the specified integer node value to see if it can be simplified or if
1391/// things it uses can be simplified by bit propagation. If so, return true.
1392bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1393 const APInt &DemandedElts,
1394 bool AssumeSingleUse) {
1395 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1396 KnownBits Known;
1397 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1398 AssumeSingleUse))
1399 return false;
1400
1401 // Revisit the node.
1402 AddToWorklist(Op.getNode());
1403
1404 CommitTargetLoweringOpt(TLO);
1405 return true;
1406}
1407
1408/// Check the specified vector node value to see if it can be simplified or
1409/// if things it uses can be simplified as it only uses some of the elements.
1410/// If so, return true.
1411bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1412 const APInt &DemandedElts,
1413 bool AssumeSingleUse) {
1414 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1415 APInt KnownUndef, KnownZero;
1416 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1417 TLO, 0, AssumeSingleUse))
1418 return false;
1419
1420 // Revisit the node.
1421 AddToWorklist(Op.getNode());
1422
1423 CommitTargetLoweringOpt(TLO);
1424 return true;
1425}
1426
1427void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1428 SDLoc DL(Load);
1429 EVT VT = Load->getValueType(0);
1430 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1431
1432 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1433 Trunc.dump(&DAG); dbgs() << '\n');
1434
1435 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1436 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1437
1438 AddToWorklist(Trunc.getNode());
1439 recursivelyDeleteUnusedNodes(Load);
1440}
1441
1442SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1443 Replace = false;
1444 SDLoc DL(Op);
1445 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1446 LoadSDNode *LD = cast<LoadSDNode>(Op);
1447 EVT MemVT = LD->getMemoryVT();
1449 : LD->getExtensionType();
1450 Replace = true;
1451 return DAG.getExtLoad(ExtType, DL, PVT,
1452 LD->getChain(), LD->getBasePtr(),
1453 MemVT, LD->getMemOperand());
1454 }
1455
1456 unsigned Opc = Op.getOpcode();
1457 switch (Opc) {
1458 default: break;
1459 case ISD::AssertSext:
1460 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1461 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1462 break;
1463 case ISD::AssertZext:
1464 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1465 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1466 break;
1467 case ISD::Constant: {
1468 unsigned ExtOpc =
1469 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1470 return DAG.getNode(ExtOpc, DL, PVT, Op);
1471 }
1472 }
1473
1474 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1475 return SDValue();
1476 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1477}
1478
1479SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1481 return SDValue();
1482 EVT OldVT = Op.getValueType();
1483 SDLoc DL(Op);
1484 bool Replace = false;
1485 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1486 if (!NewOp.getNode())
1487 return SDValue();
1488 AddToWorklist(NewOp.getNode());
1489
1490 if (Replace)
1491 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1492 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1493 DAG.getValueType(OldVT));
1494}
1495
1496SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1497 EVT OldVT = Op.getValueType();
1498 SDLoc DL(Op);
1499 bool Replace = false;
1500 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1501 if (!NewOp.getNode())
1502 return SDValue();
1503 AddToWorklist(NewOp.getNode());
1504
1505 if (Replace)
1506 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1507 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1508}
1509
1510/// Promote the specified integer binary operation if the target indicates it is
1511/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1512/// i32 since i16 instructions are longer.
1513SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1514 if (!LegalOperations)
1515 return SDValue();
1516
1517 EVT VT = Op.getValueType();
1518 if (VT.isVector() || !VT.isInteger())
1519 return SDValue();
1520
1521 // If operation type is 'undesirable', e.g. i16 on x86, consider
1522 // promoting it.
1523 unsigned Opc = Op.getOpcode();
1524 if (TLI.isTypeDesirableForOp(Opc, VT))
1525 return SDValue();
1526
1527 EVT PVT = VT;
1528 // Consult target whether it is a good idea to promote this operation and
1529 // what's the right type to promote it to.
1530 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1531 assert(PVT != VT && "Don't know what type to promote to!");
1532
1533 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1534
1535 bool Replace0 = false;
1536 SDValue N0 = Op.getOperand(0);
1537 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1538
1539 bool Replace1 = false;
1540 SDValue N1 = Op.getOperand(1);
1541 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1542 SDLoc DL(Op);
1543
1544 SDValue RV =
1545 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1546
1547 // We are always replacing N0/N1's use in N and only need additional
1548 // replacements if there are additional uses.
1549 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1550 // (SDValue) here because the node may reference multiple values
1551 // (for example, the chain value of a load node).
1552 Replace0 &= !N0->hasOneUse();
1553 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1554
1555 // Combine Op here so it is preserved past replacements.
1556 CombineTo(Op.getNode(), RV);
1557
1558 // If operands have a use ordering, make sure we deal with
1559 // predecessor first.
1560 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1561 std::swap(N0, N1);
1562 std::swap(NN0, NN1);
1563 }
1564
1565 if (Replace0) {
1566 AddToWorklist(NN0.getNode());
1567 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1568 }
1569 if (Replace1) {
1570 AddToWorklist(NN1.getNode());
1571 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1572 }
1573 return Op;
1574 }
1575 return SDValue();
1576}
1577
1578/// Promote the specified integer shift operation if the target indicates it is
1579/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1580/// i32 since i16 instructions are longer.
1581SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1582 if (!LegalOperations)
1583 return SDValue();
1584
1585 EVT VT = Op.getValueType();
1586 if (VT.isVector() || !VT.isInteger())
1587 return SDValue();
1588
1589 // If operation type is 'undesirable', e.g. i16 on x86, consider
1590 // promoting it.
1591 unsigned Opc = Op.getOpcode();
1592 if (TLI.isTypeDesirableForOp(Opc, VT))
1593 return SDValue();
1594
1595 EVT PVT = VT;
1596 // Consult target whether it is a good idea to promote this operation and
1597 // what's the right type to promote it to.
1598 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1599 assert(PVT != VT && "Don't know what type to promote to!");
1600
1601 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1602
1603 bool Replace = false;
1604 SDValue N0 = Op.getOperand(0);
1605 if (Opc == ISD::SRA)
1606 N0 = SExtPromoteOperand(N0, PVT);
1607 else if (Opc == ISD::SRL)
1608 N0 = ZExtPromoteOperand(N0, PVT);
1609 else
1610 N0 = PromoteOperand(N0, PVT, Replace);
1611
1612 if (!N0.getNode())
1613 return SDValue();
1614
1615 SDLoc DL(Op);
1616 SDValue N1 = Op.getOperand(1);
1617 SDValue RV =
1618 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1619
1620 if (Replace)
1621 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1622
1623 // Deal with Op being deleted.
1624 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1625 return RV;
1626 }
1627 return SDValue();
1628}
1629
1630SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1631 if (!LegalOperations)
1632 return SDValue();
1633
1634 EVT VT = Op.getValueType();
1635 if (VT.isVector() || !VT.isInteger())
1636 return SDValue();
1637
1638 // If operation type is 'undesirable', e.g. i16 on x86, consider
1639 // promoting it.
1640 unsigned Opc = Op.getOpcode();
1641 if (TLI.isTypeDesirableForOp(Opc, VT))
1642 return SDValue();
1643
1644 EVT PVT = VT;
1645 // Consult target whether it is a good idea to promote this operation and
1646 // what's the right type to promote it to.
1647 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1648 assert(PVT != VT && "Don't know what type to promote to!");
1649 // fold (aext (aext x)) -> (aext x)
1650 // fold (aext (zext x)) -> (zext x)
1651 // fold (aext (sext x)) -> (sext x)
1652 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1653 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1654 }
1655 return SDValue();
1656}
1657
1658bool DAGCombiner::PromoteLoad(SDValue Op) {
1659 if (!LegalOperations)
1660 return false;
1661
1662 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1663 return false;
1664
1665 EVT VT = Op.getValueType();
1666 if (VT.isVector() || !VT.isInteger())
1667 return false;
1668
1669 // If operation type is 'undesirable', e.g. i16 on x86, consider
1670 // promoting it.
1671 unsigned Opc = Op.getOpcode();
1672 if (TLI.isTypeDesirableForOp(Opc, VT))
1673 return false;
1674
1675 EVT PVT = VT;
1676 // Consult target whether it is a good idea to promote this operation and
1677 // what's the right type to promote it to.
1678 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1679 assert(PVT != VT && "Don't know what type to promote to!");
1680
1681 SDLoc DL(Op);
1682 SDNode *N = Op.getNode();
1683 LoadSDNode *LD = cast<LoadSDNode>(N);
1684 EVT MemVT = LD->getMemoryVT();
1686 : LD->getExtensionType();
1687 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1688 LD->getChain(), LD->getBasePtr(),
1689 MemVT, LD->getMemOperand());
1690 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1691
1692 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1693 Result.dump(&DAG); dbgs() << '\n');
1694
1696 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1697
1698 AddToWorklist(Result.getNode());
1699 recursivelyDeleteUnusedNodes(N);
1700 return true;
1701 }
1702
1703 return false;
1704}
1705
1706/// Recursively delete a node which has no uses and any operands for
1707/// which it is the only use.
1708///
1709/// Note that this both deletes the nodes and removes them from the worklist.
1710/// It also adds any nodes who have had a user deleted to the worklist as they
1711/// may now have only one use and subject to other combines.
1712bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1713 if (!N->use_empty())
1714 return false;
1715
1717 Nodes.insert(N);
1718 do {
1719 N = Nodes.pop_back_val();
1720 if (!N)
1721 continue;
1722
1723 if (N->use_empty()) {
1724 for (const SDValue &ChildN : N->op_values())
1725 Nodes.insert(ChildN.getNode());
1726
1727 removeFromWorklist(N);
1728 DAG.DeleteNode(N);
1729 } else {
1730 AddToWorklist(N);
1731 }
1732 } while (!Nodes.empty());
1733 return true;
1734}
1735
1736//===----------------------------------------------------------------------===//
1737// Main DAG Combiner implementation
1738//===----------------------------------------------------------------------===//
1739
1740void DAGCombiner::Run(CombineLevel AtLevel) {
1741 // set the instance variables, so that the various visit routines may use it.
1742 Level = AtLevel;
1743 LegalDAG = Level >= AfterLegalizeDAG;
1744 LegalOperations = Level >= AfterLegalizeVectorOps;
1745 LegalTypes = Level >= AfterLegalizeTypes;
1746
1747 WorklistInserter AddNodes(*this);
1748
1749 // Add all the dag nodes to the worklist.
1750 //
1751 // Note: All nodes are not added to PruningList here, this is because the only
1752 // nodes which can be deleted are those which have no uses and all other nodes
1753 // which would otherwise be added to the worklist by the first call to
1754 // getNextWorklistEntry are already present in it.
1755 for (SDNode &Node : DAG.allnodes())
1756 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1757
1758 // Create a dummy node (which is not added to allnodes), that adds a reference
1759 // to the root node, preventing it from being deleted, and tracking any
1760 // changes of the root.
1761 HandleSDNode Dummy(DAG.getRoot());
1762
1763 // While we have a valid worklist entry node, try to combine it.
1764 while (SDNode *N = getNextWorklistEntry()) {
1765 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1766 // N is deleted from the DAG, since they too may now be dead or may have a
1767 // reduced number of uses, allowing other xforms.
1768 if (recursivelyDeleteUnusedNodes(N))
1769 continue;
1770
1771 WorklistRemover DeadNodes(*this);
1772
1773 // If this combine is running after legalizing the DAG, re-legalize any
1774 // nodes pulled off the worklist.
1775 if (LegalDAG) {
1776 SmallSetVector<SDNode *, 16> UpdatedNodes;
1777 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1778
1779 for (SDNode *LN : UpdatedNodes)
1780 AddToWorklistWithUsers(LN);
1781
1782 if (!NIsValid)
1783 continue;
1784 }
1785
1786 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1787
1788 // Add any operands of the new node which have not yet been combined to the
1789 // worklist as well. getNextWorklistEntry flags nodes that have been
1790 // combined before. Because the worklist uniques things already, this won't
1791 // repeatedly process the same operand.
1792 for (const SDValue &ChildN : N->op_values())
1793 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1794 /*SkipIfCombinedBefore=*/true);
1795
1796 SDValue RV = combine(N);
1797
1798 if (!RV.getNode())
1799 continue;
1800
1801 ++NodesCombined;
1802
1803 // Invalidate cached info.
1804 ChainsWithoutMergeableStores.clear();
1805
1806 // If we get back the same node we passed in, rather than a new node or
1807 // zero, we know that the node must have defined multiple values and
1808 // CombineTo was used. Since CombineTo takes care of the worklist
1809 // mechanics for us, we have no work to do in this case.
1810 if (RV.getNode() == N)
1811 continue;
1812
1813 assert(N->getOpcode() != ISD::DELETED_NODE &&
1814 RV.getOpcode() != ISD::DELETED_NODE &&
1815 "Node was deleted but visit returned new node!");
1816
1817 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1818
1819 if (N->getNumValues() == RV->getNumValues())
1820 DAG.ReplaceAllUsesWith(N, RV.getNode());
1821 else {
1822 assert(N->getValueType(0) == RV.getValueType() &&
1823 N->getNumValues() == 1 && "Type mismatch");
1824 DAG.ReplaceAllUsesWith(N, &RV);
1825 }
1826
1827 // Push the new node and any users onto the worklist. Omit this if the
1828 // new node is the EntryToken (e.g. if a store managed to get optimized
1829 // out), because re-visiting the EntryToken and its users will not uncover
1830 // any additional opportunities, but there may be a large number of such
1831 // users, potentially causing compile time explosion.
1832 if (RV.getOpcode() != ISD::EntryToken)
1833 AddToWorklistWithUsers(RV.getNode());
1834
1835 // Finally, if the node is now dead, remove it from the graph. The node
1836 // may not be dead if the replacement process recursively simplified to
1837 // something else needing this node. This will also take care of adding any
1838 // operands which have lost a user to the worklist.
1839 recursivelyDeleteUnusedNodes(N);
1840 }
1841
1842 // If the root changed (e.g. it was a dead load, update the root).
1843 DAG.setRoot(Dummy.getValue());
1844 DAG.RemoveDeadNodes();
1845}
1846
1847SDValue DAGCombiner::visit(SDNode *N) {
1848 // clang-format off
1849 switch (N->getOpcode()) {
1850 default: break;
1851 case ISD::TokenFactor: return visitTokenFactor(N);
1852 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1853 case ISD::ADD: return visitADD(N);
1854 case ISD::SUB: return visitSUB(N);
1855 case ISD::SADDSAT:
1856 case ISD::UADDSAT: return visitADDSAT(N);
1857 case ISD::SSUBSAT:
1858 case ISD::USUBSAT: return visitSUBSAT(N);
1859 case ISD::ADDC: return visitADDC(N);
1860 case ISD::SADDO:
1861 case ISD::UADDO: return visitADDO(N);
1862 case ISD::SUBC: return visitSUBC(N);
1863 case ISD::SSUBO:
1864 case ISD::USUBO: return visitSUBO(N);
1865 case ISD::ADDE: return visitADDE(N);
1866 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1867 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1868 case ISD::SUBE: return visitSUBE(N);
1869 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1870 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1871 case ISD::SMULFIX:
1872 case ISD::SMULFIXSAT:
1873 case ISD::UMULFIX:
1874 case ISD::UMULFIXSAT: return visitMULFIX(N);
1875 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1876 case ISD::SDIV: return visitSDIV(N);
1877 case ISD::UDIV: return visitUDIV(N);
1878 case ISD::SREM:
1879 case ISD::UREM: return visitREM(N);
1880 case ISD::MULHU: return visitMULHU(N);
1881 case ISD::MULHS: return visitMULHS(N);
1882 case ISD::AVGFLOORS:
1883 case ISD::AVGFLOORU:
1884 case ISD::AVGCEILS:
1885 case ISD::AVGCEILU: return visitAVG(N);
1886 case ISD::ABDS:
1887 case ISD::ABDU: return visitABD(N);
1888 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1889 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1890 case ISD::SMULO:
1891 case ISD::UMULO: return visitMULO(N);
1892 case ISD::SMIN:
1893 case ISD::SMAX:
1894 case ISD::UMIN:
1895 case ISD::UMAX: return visitIMINMAX(N);
1896 case ISD::AND: return visitAND(N);
1897 case ISD::OR: return visitOR(N);
1898 case ISD::XOR: return visitXOR(N);
1899 case ISD::SHL: return visitSHL(N);
1900 case ISD::SRA: return visitSRA(N);
1901 case ISD::SRL: return visitSRL(N);
1902 case ISD::ROTR:
1903 case ISD::ROTL: return visitRotate(N);
1904 case ISD::FSHL:
1905 case ISD::FSHR: return visitFunnelShift(N);
1906 case ISD::SSHLSAT:
1907 case ISD::USHLSAT: return visitSHLSAT(N);
1908 case ISD::ABS: return visitABS(N);
1909 case ISD::BSWAP: return visitBSWAP(N);
1910 case ISD::BITREVERSE: return visitBITREVERSE(N);
1911 case ISD::CTLZ: return visitCTLZ(N);
1912 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1913 case ISD::CTTZ: return visitCTTZ(N);
1914 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1915 case ISD::CTPOP: return visitCTPOP(N);
1916 case ISD::SELECT: return visitSELECT(N);
1917 case ISD::VSELECT: return visitVSELECT(N);
1918 case ISD::SELECT_CC: return visitSELECT_CC(N);
1919 case ISD::SETCC: return visitSETCC(N);
1920 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1921 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1922 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1923 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1924 case ISD::AssertSext:
1925 case ISD::AssertZext: return visitAssertExt(N);
1926 case ISD::AssertAlign: return visitAssertAlign(N);
1927 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1930 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1931 case ISD::TRUNCATE: return visitTRUNCATE(N);
1932 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1933 case ISD::BITCAST: return visitBITCAST(N);
1934 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1935 case ISD::FADD: return visitFADD(N);
1936 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1937 case ISD::FSUB: return visitFSUB(N);
1938 case ISD::FMUL: return visitFMUL(N);
1939 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1940 case ISD::FMAD: return visitFMAD(N);
1941 case ISD::FDIV: return visitFDIV(N);
1942 case ISD::FREM: return visitFREM(N);
1943 case ISD::FSQRT: return visitFSQRT(N);
1944 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1945 case ISD::FPOW: return visitFPOW(N);
1946 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1947 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1948 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1949 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1950 case ISD::LROUND:
1951 case ISD::LLROUND:
1952 case ISD::LRINT:
1953 case ISD::LLRINT: return visitXROUND(N);
1954 case ISD::FP_ROUND: return visitFP_ROUND(N);
1955 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1956 case ISD::FNEG: return visitFNEG(N);
1957 case ISD::FABS: return visitFABS(N);
1958 case ISD::FFLOOR: return visitFFLOOR(N);
1959 case ISD::FMINNUM:
1960 case ISD::FMAXNUM:
1961 case ISD::FMINIMUM:
1962 case ISD::FMAXIMUM:
1963 case ISD::FMINIMUMNUM:
1964 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
1965 case ISD::FCEIL: return visitFCEIL(N);
1966 case ISD::FTRUNC: return visitFTRUNC(N);
1967 case ISD::FFREXP: return visitFFREXP(N);
1968 case ISD::BRCOND: return visitBRCOND(N);
1969 case ISD::BR_CC: return visitBR_CC(N);
1970 case ISD::LOAD: return visitLOAD(N);
1971 case ISD::STORE: return visitSTORE(N);
1972 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1973 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1974 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1975 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1976 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1977 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1978 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1979 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1980 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1981 case ISD::MGATHER: return visitMGATHER(N);
1982 case ISD::MLOAD: return visitMLOAD(N);
1983 case ISD::MSCATTER: return visitMSCATTER(N);
1984 case ISD::MSTORE: return visitMSTORE(N);
1985 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
1986 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
1987 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1988 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1989 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1990 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1991 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1992 case ISD::FREEZE: return visitFREEZE(N);
1993 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1994 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1995 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
1998 case ISD::VECREDUCE_ADD:
1999 case ISD::VECREDUCE_MUL:
2000 case ISD::VECREDUCE_AND:
2001 case ISD::VECREDUCE_OR:
2002 case ISD::VECREDUCE_XOR:
2010 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2011#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2012#include "llvm/IR/VPIntrinsics.def"
2013 return visitVPOp(N);
2014 }
2015 // clang-format on
2016 return SDValue();
2017}
2018
2019SDValue DAGCombiner::combine(SDNode *N) {
2020 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2021 return SDValue();
2022
2023 SDValue RV;
2024 if (!DisableGenericCombines)
2025 RV = visit(N);
2026
2027 // If nothing happened, try a target-specific DAG combine.
2028 if (!RV.getNode()) {
2029 assert(N->getOpcode() != ISD::DELETED_NODE &&
2030 "Node was deleted but visit returned NULL!");
2031
2032 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2033 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2034
2035 // Expose the DAG combiner to the target combiner impls.
2037 DagCombineInfo(DAG, Level, false, this);
2038
2039 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2040 }
2041 }
2042
2043 // If nothing happened still, try promoting the operation.
2044 if (!RV.getNode()) {
2045 switch (N->getOpcode()) {
2046 default: break;
2047 case ISD::ADD:
2048 case ISD::SUB:
2049 case ISD::MUL:
2050 case ISD::AND:
2051 case ISD::OR:
2052 case ISD::XOR:
2053 RV = PromoteIntBinOp(SDValue(N, 0));
2054 break;
2055 case ISD::SHL:
2056 case ISD::SRA:
2057 case ISD::SRL:
2058 RV = PromoteIntShiftOp(SDValue(N, 0));
2059 break;
2060 case ISD::SIGN_EXTEND:
2061 case ISD::ZERO_EXTEND:
2062 case ISD::ANY_EXTEND:
2063 RV = PromoteExtend(SDValue(N, 0));
2064 break;
2065 case ISD::LOAD:
2066 if (PromoteLoad(SDValue(N, 0)))
2067 RV = SDValue(N, 0);
2068 break;
2069 }
2070 }
2071
2072 // If N is a commutative binary node, try to eliminate it if the commuted
2073 // version is already present in the DAG.
2074 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2075 SDValue N0 = N->getOperand(0);
2076 SDValue N1 = N->getOperand(1);
2077
2078 // Constant operands are canonicalized to RHS.
2079 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2080 SDValue Ops[] = {N1, N0};
2081 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2082 N->getFlags());
2083 if (CSENode)
2084 return SDValue(CSENode, 0);
2085 }
2086 }
2087
2088 return RV;
2089}
2090
2091/// Given a node, return its input chain if it has one, otherwise return a null
2092/// sd operand.
2094 if (unsigned NumOps = N->getNumOperands()) {
2095 if (N->getOperand(0).getValueType() == MVT::Other)
2096 return N->getOperand(0);
2097 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2098 return N->getOperand(NumOps-1);
2099 for (unsigned i = 1; i < NumOps-1; ++i)
2100 if (N->getOperand(i).getValueType() == MVT::Other)
2101 return N->getOperand(i);
2102 }
2103 return SDValue();
2104}
2105
2106SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2107 SDValue Operand = N->getOperand(0);
2108 EVT VT = Operand.getValueType();
2109 SDLoc dl(N);
2110
2111 // Canonicalize undef to quiet NaN.
2112 if (Operand.isUndef()) {
2113 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2114 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2115 }
2116 return SDValue();
2117}
2118
2119SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2120 // If N has two operands, where one has an input chain equal to the other,
2121 // the 'other' chain is redundant.
2122 if (N->getNumOperands() == 2) {
2123 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2124 return N->getOperand(0);
2125 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2126 return N->getOperand(1);
2127 }
2128
2129 // Don't simplify token factors if optnone.
2130 if (OptLevel == CodeGenOptLevel::None)
2131 return SDValue();
2132
2133 // Don't simplify the token factor if the node itself has too many operands.
2134 if (N->getNumOperands() > TokenFactorInlineLimit)
2135 return SDValue();
2136
2137 // If the sole user is a token factor, we should make sure we have a
2138 // chance to merge them together. This prevents TF chains from inhibiting
2139 // optimizations.
2140 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2141 AddToWorklist(*(N->user_begin()));
2142
2143 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2144 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2146 bool Changed = false; // If we should replace this token factor.
2147
2148 // Start out with this token factor.
2149 TFs.push_back(N);
2150
2151 // Iterate through token factors. The TFs grows when new token factors are
2152 // encountered.
2153 for (unsigned i = 0; i < TFs.size(); ++i) {
2154 // Limit number of nodes to inline, to avoid quadratic compile times.
2155 // We have to add the outstanding Token Factors to Ops, otherwise we might
2156 // drop Ops from the resulting Token Factors.
2157 if (Ops.size() > TokenFactorInlineLimit) {
2158 for (unsigned j = i; j < TFs.size(); j++)
2159 Ops.emplace_back(TFs[j], 0);
2160 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2161 // combiner worklist later.
2162 TFs.resize(i);
2163 break;
2164 }
2165
2166 SDNode *TF = TFs[i];
2167 // Check each of the operands.
2168 for (const SDValue &Op : TF->op_values()) {
2169 switch (Op.getOpcode()) {
2170 case ISD::EntryToken:
2171 // Entry tokens don't need to be added to the list. They are
2172 // redundant.
2173 Changed = true;
2174 break;
2175
2176 case ISD::TokenFactor:
2177 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2178 // Queue up for processing.
2179 TFs.push_back(Op.getNode());
2180 Changed = true;
2181 break;
2182 }
2183 [[fallthrough]];
2184
2185 default:
2186 // Only add if it isn't already in the list.
2187 if (SeenOps.insert(Op.getNode()).second)
2188 Ops.push_back(Op);
2189 else
2190 Changed = true;
2191 break;
2192 }
2193 }
2194 }
2195
2196 // Re-visit inlined Token Factors, to clean them up in case they have been
2197 // removed. Skip the first Token Factor, as this is the current node.
2198 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2199 AddToWorklist(TFs[i]);
2200
2201 // Remove Nodes that are chained to another node in the list. Do so
2202 // by walking up chains breath-first stopping when we've seen
2203 // another operand. In general we must climb to the EntryNode, but we can exit
2204 // early if we find all remaining work is associated with just one operand as
2205 // no further pruning is possible.
2206
2207 // List of nodes to search through and original Ops from which they originate.
2209 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2210 SmallPtrSet<SDNode *, 16> SeenChains;
2211 bool DidPruneOps = false;
2212
2213 unsigned NumLeftToConsider = 0;
2214 for (const SDValue &Op : Ops) {
2215 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2216 OpWorkCount.push_back(1);
2217 }
2218
2219 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2220 // If this is an Op, we can remove the op from the list. Remark any
2221 // search associated with it as from the current OpNumber.
2222 if (SeenOps.contains(Op)) {
2223 Changed = true;
2224 DidPruneOps = true;
2225 unsigned OrigOpNumber = 0;
2226 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2227 OrigOpNumber++;
2228 assert((OrigOpNumber != Ops.size()) &&
2229 "expected to find TokenFactor Operand");
2230 // Re-mark worklist from OrigOpNumber to OpNumber
2231 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2232 if (Worklist[i].second == OrigOpNumber) {
2233 Worklist[i].second = OpNumber;
2234 }
2235 }
2236 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2237 OpWorkCount[OrigOpNumber] = 0;
2238 NumLeftToConsider--;
2239 }
2240 // Add if it's a new chain
2241 if (SeenChains.insert(Op).second) {
2242 OpWorkCount[OpNumber]++;
2243 Worklist.push_back(std::make_pair(Op, OpNumber));
2244 }
2245 };
2246
2247 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2248 // We need at least be consider at least 2 Ops to prune.
2249 if (NumLeftToConsider <= 1)
2250 break;
2251 auto CurNode = Worklist[i].first;
2252 auto CurOpNumber = Worklist[i].second;
2253 assert((OpWorkCount[CurOpNumber] > 0) &&
2254 "Node should not appear in worklist");
2255 switch (CurNode->getOpcode()) {
2256 case ISD::EntryToken:
2257 // Hitting EntryToken is the only way for the search to terminate without
2258 // hitting
2259 // another operand's search. Prevent us from marking this operand
2260 // considered.
2261 NumLeftToConsider++;
2262 break;
2263 case ISD::TokenFactor:
2264 for (const SDValue &Op : CurNode->op_values())
2265 AddToWorklist(i, Op.getNode(), CurOpNumber);
2266 break;
2268 case ISD::LIFETIME_END:
2269 case ISD::CopyFromReg:
2270 case ISD::CopyToReg:
2271 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2272 break;
2273 default:
2274 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2275 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2276 break;
2277 }
2278 OpWorkCount[CurOpNumber]--;
2279 if (OpWorkCount[CurOpNumber] == 0)
2280 NumLeftToConsider--;
2281 }
2282
2283 // If we've changed things around then replace token factor.
2284 if (Changed) {
2286 if (Ops.empty()) {
2287 // The entry token is the only possible outcome.
2288 Result = DAG.getEntryNode();
2289 } else {
2290 if (DidPruneOps) {
2291 SmallVector<SDValue, 8> PrunedOps;
2292 //
2293 for (const SDValue &Op : Ops) {
2294 if (SeenChains.count(Op.getNode()) == 0)
2295 PrunedOps.push_back(Op);
2296 }
2297 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2298 } else {
2299 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2300 }
2301 }
2302 return Result;
2303 }
2304 return SDValue();
2305}
2306
2307/// MERGE_VALUES can always be eliminated.
2308SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2309 WorklistRemover DeadNodes(*this);
2310 // Replacing results may cause a different MERGE_VALUES to suddenly
2311 // be CSE'd with N, and carry its uses with it. Iterate until no
2312 // uses remain, to ensure that the node can be safely deleted.
2313 // First add the users of this node to the work list so that they
2314 // can be tried again once they have new operands.
2315 AddUsersToWorklist(N);
2316 do {
2317 // Do as a single replacement to avoid rewalking use lists.
2318 SmallVector<SDValue, 8> Ops(N->ops());
2319 DAG.ReplaceAllUsesWith(N, Ops.data());
2320 } while (!N->use_empty());
2321 deleteAndRecombine(N);
2322 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2323}
2324
2325/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2326/// ConstantSDNode pointer else nullptr.
2328 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2329 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2330}
2331
2332// isTruncateOf - If N is a truncate of some other value, return true, record
2333// the value being truncated in Op and which of Op's bits are zero/one in Known.
2334// This function computes KnownBits to avoid a duplicated call to
2335// computeKnownBits in the caller.
2337 KnownBits &Known) {
2338 if (N->getOpcode() == ISD::TRUNCATE) {
2339 Op = N->getOperand(0);
2340 Known = DAG.computeKnownBits(Op);
2341 if (N->getFlags().hasNoUnsignedWrap())
2342 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2343 return true;
2344 }
2345
2346 if (N.getValueType().getScalarType() != MVT::i1 ||
2347 !sd_match(
2349 return false;
2350
2351 Known = DAG.computeKnownBits(Op);
2352 return (Known.Zero | 1).isAllOnes();
2353}
2354
2355/// Return true if 'Use' is a load or a store that uses N as its base pointer
2356/// and that N may be folded in the load / store addressing mode.
2358 const TargetLowering &TLI) {
2359 EVT VT;
2360 unsigned AS;
2361
2362 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2363 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2364 return false;
2365 VT = LD->getMemoryVT();
2366 AS = LD->getAddressSpace();
2367 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2368 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2369 return false;
2370 VT = ST->getMemoryVT();
2371 AS = ST->getAddressSpace();
2372 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2373 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2374 return false;
2375 VT = LD->getMemoryVT();
2376 AS = LD->getAddressSpace();
2377 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2378 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2379 return false;
2380 VT = ST->getMemoryVT();
2381 AS = ST->getAddressSpace();
2382 } else {
2383 return false;
2384 }
2385
2387 if (N->getOpcode() == ISD::ADD) {
2388 AM.HasBaseReg = true;
2389 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2390 if (Offset)
2391 // [reg +/- imm]
2392 AM.BaseOffs = Offset->getSExtValue();
2393 else
2394 // [reg +/- reg]
2395 AM.Scale = 1;
2396 } else if (N->getOpcode() == ISD::SUB) {
2397 AM.HasBaseReg = true;
2398 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2399 if (Offset)
2400 // [reg +/- imm]
2401 AM.BaseOffs = -Offset->getSExtValue();
2402 else
2403 // [reg +/- reg]
2404 AM.Scale = 1;
2405 } else {
2406 return false;
2407 }
2408
2409 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2410 VT.getTypeForEVT(*DAG.getContext()), AS);
2411}
2412
2413/// This inverts a canonicalization in IR that replaces a variable select arm
2414/// with an identity constant. Codegen improves if we re-use the variable
2415/// operand rather than load a constant. This can also be converted into a
2416/// masked vector operation if the target supports it.
2418 bool ShouldCommuteOperands) {
2419 // Match a select as operand 1. The identity constant that we are looking for
2420 // is only valid as operand 1 of a non-commutative binop.
2421 SDValue N0 = N->getOperand(0);
2422 SDValue N1 = N->getOperand(1);
2423 if (ShouldCommuteOperands)
2424 std::swap(N0, N1);
2425
2426 // TODO: Should this apply to scalar select too?
2427 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2428 return SDValue();
2429
2430 // We can't hoist all instructions because of immediate UB (not speculatable).
2431 // For example div/rem by zero.
2433 return SDValue();
2434
2435 unsigned Opcode = N->getOpcode();
2436 EVT VT = N->getValueType(0);
2437 SDValue Cond = N1.getOperand(0);
2438 SDValue TVal = N1.getOperand(1);
2439 SDValue FVal = N1.getOperand(2);
2440
2441 // This transform increases uses of N0, so freeze it to be safe.
2442 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2443 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2444 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2445 SDValue F0 = DAG.getFreeze(N0);
2446 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2447 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2448 }
2449 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2450 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2451 SDValue F0 = DAG.getFreeze(N0);
2452 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2453 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2454 }
2455
2456 return SDValue();
2457}
2458
2459SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2460 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2461 "Unexpected binary operator");
2462
2463 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2464 auto BinOpcode = BO->getOpcode();
2465 EVT VT = BO->getValueType(0);
2466 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2467 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2468 return Sel;
2469
2470 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2471 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2472 return Sel;
2473 }
2474
2475 // Don't do this unless the old select is going away. We want to eliminate the
2476 // binary operator, not replace a binop with a select.
2477 // TODO: Handle ISD::SELECT_CC.
2478 unsigned SelOpNo = 0;
2479 SDValue Sel = BO->getOperand(0);
2480 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2481 SelOpNo = 1;
2482 Sel = BO->getOperand(1);
2483
2484 // Peek through trunc to shift amount type.
2485 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2486 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2487 // This is valid when the truncated bits of x are already zero.
2488 SDValue Op;
2489 KnownBits Known;
2490 if (isTruncateOf(DAG, Sel, Op, Known) &&
2492 Sel = Op;
2493 }
2494 }
2495
2496 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2497 return SDValue();
2498
2499 SDValue CT = Sel.getOperand(1);
2500 if (!isConstantOrConstantVector(CT, true) &&
2502 return SDValue();
2503
2504 SDValue CF = Sel.getOperand(2);
2505 if (!isConstantOrConstantVector(CF, true) &&
2507 return SDValue();
2508
2509 // Bail out if any constants are opaque because we can't constant fold those.
2510 // The exception is "and" and "or" with either 0 or -1 in which case we can
2511 // propagate non constant operands into select. I.e.:
2512 // and (select Cond, 0, -1), X --> select Cond, 0, X
2513 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2514 bool CanFoldNonConst =
2515 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2518
2519 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2520 if (!CanFoldNonConst &&
2521 !isConstantOrConstantVector(CBO, true) &&
2523 return SDValue();
2524
2525 SDLoc DL(Sel);
2526 SDValue NewCT, NewCF;
2527
2528 if (CanFoldNonConst) {
2529 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2530 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2531 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2532 NewCT = CT;
2533 else
2534 NewCT = CBO;
2535
2536 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2537 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2538 NewCF = CF;
2539 else
2540 NewCF = CBO;
2541 } else {
2542 // We have a select-of-constants followed by a binary operator with a
2543 // constant. Eliminate the binop by pulling the constant math into the
2544 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2545 // CBO, CF + CBO
2546 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2547 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2548 if (!NewCT)
2549 return SDValue();
2550
2551 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2552 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2553 if (!NewCF)
2554 return SDValue();
2555 }
2556
2557 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2558 SelectOp->setFlags(BO->getFlags());
2559 return SelectOp;
2560}
2561
2563 SelectionDAG &DAG) {
2564 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2565 "Expecting add or sub");
2566
2567 // Match a constant operand and a zext operand for the math instruction:
2568 // add Z, C
2569 // sub C, Z
2570 bool IsAdd = N->getOpcode() == ISD::ADD;
2571 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2572 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2573 auto *CN = dyn_cast<ConstantSDNode>(C);
2574 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2575 return SDValue();
2576
2577 // Match the zext operand as a setcc of a boolean.
2578 if (Z.getOperand(0).getValueType() != MVT::i1)
2579 return SDValue();
2580
2581 // Match the compare as: setcc (X & 1), 0, eq.
2582 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2584 return SDValue();
2585
2586 // We are adding/subtracting a constant and an inverted low bit. Turn that
2587 // into a subtract/add of the low bit with incremented/decremented constant:
2588 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2589 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2590 EVT VT = C.getValueType();
2591 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2592 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2593 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2594 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2595}
2596
2597// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2598SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2599 SDValue N0 = N->getOperand(0);
2600 EVT VT = N0.getValueType();
2601 SDValue A, B;
2602
2603 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2605 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2606 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2607 }
2608 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2610 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2611 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2612 }
2613 return SDValue();
2614}
2615
2616/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2617/// a shift and add with a different constant.
2619 SelectionDAG &DAG) {
2620 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2621 "Expecting add or sub");
2622
2623 // We need a constant operand for the add/sub, and the other operand is a
2624 // logical shift right: add (srl), C or sub C, (srl).
2625 bool IsAdd = N->getOpcode() == ISD::ADD;
2626 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2627 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2628 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2629 ShiftOp.getOpcode() != ISD::SRL)
2630 return SDValue();
2631
2632 // The shift must be of a 'not' value.
2633 SDValue Not = ShiftOp.getOperand(0);
2634 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2635 return SDValue();
2636
2637 // The shift must be moving the sign bit to the least-significant-bit.
2638 EVT VT = ShiftOp.getValueType();
2639 SDValue ShAmt = ShiftOp.getOperand(1);
2640 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2641 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2642 return SDValue();
2643
2644 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2645 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2646 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2647 if (SDValue NewC = DAG.FoldConstantArithmetic(
2648 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2649 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2650 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2651 Not.getOperand(0), ShAmt);
2652 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2653 }
2654
2655 return SDValue();
2656}
2657
2658static bool
2660 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2661 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2662}
2663
2664/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2665/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2666/// are no common bits set in the operands).
2667SDValue DAGCombiner::visitADDLike(SDNode *N) {
2668 SDValue N0 = N->getOperand(0);
2669 SDValue N1 = N->getOperand(1);
2670 EVT VT = N0.getValueType();
2671 SDLoc DL(N);
2672
2673 // fold (add x, undef) -> undef
2674 if (N0.isUndef())
2675 return N0;
2676 if (N1.isUndef())
2677 return N1;
2678
2679 // fold (add c1, c2) -> c1+c2
2680 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2681 return C;
2682
2683 // canonicalize constant to RHS
2686 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2687
2688 if (areBitwiseNotOfEachother(N0, N1))
2689 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2690
2691 // fold vector ops
2692 if (VT.isVector()) {
2693 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2694 return FoldedVOp;
2695
2696 // fold (add x, 0) -> x, vector edition
2698 return N0;
2699 }
2700
2701 // fold (add x, 0) -> x
2702 if (isNullConstant(N1))
2703 return N0;
2704
2705 if (N0.getOpcode() == ISD::SUB) {
2706 SDValue N00 = N0.getOperand(0);
2707 SDValue N01 = N0.getOperand(1);
2708
2709 // fold ((A-c1)+c2) -> (A+(c2-c1))
2710 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2711 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2712
2713 // fold ((c1-A)+c2) -> (c1+c2)-A
2714 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2715 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2716 }
2717
2718 // add (sext i1 X), 1 -> zext (not i1 X)
2719 // We don't transform this pattern:
2720 // add (zext i1 X), -1 -> sext (not i1 X)
2721 // because most (?) targets generate better code for the zext form.
2722 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2723 isOneOrOneSplat(N1)) {
2724 SDValue X = N0.getOperand(0);
2725 if ((!LegalOperations ||
2726 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2728 X.getScalarValueSizeInBits() == 1) {
2729 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2730 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2731 }
2732 }
2733
2734 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2735 // iff (or x, c0) is equivalent to (add x, c0).
2736 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2737 // iff (xor x, c0) is equivalent to (add x, c0).
2738 if (DAG.isADDLike(N0)) {
2739 SDValue N01 = N0.getOperand(1);
2740 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2741 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2742 }
2743
2744 if (SDValue NewSel = foldBinOpIntoSelect(N))
2745 return NewSel;
2746
2747 // reassociate add
2748 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2749 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2750 return RADD;
2751
2752 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2753 // equivalent to (add x, c).
2754 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2755 // equivalent to (add x, c).
2756 // Do this optimization only when adding c does not introduce instructions
2757 // for adding carries.
2758 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2759 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2760 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2761 // If N0's type does not split or is a sign mask, it does not introduce
2762 // add carry.
2763 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2764 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2767 if (NoAddCarry)
2768 return DAG.getNode(
2769 ISD::ADD, DL, VT,
2770 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2771 N0.getOperand(1));
2772 }
2773 return SDValue();
2774 };
2775 if (SDValue Add = ReassociateAddOr(N0, N1))
2776 return Add;
2777 if (SDValue Add = ReassociateAddOr(N1, N0))
2778 return Add;
2779
2780 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2781 if (SDValue SD =
2782 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2783 return SD;
2784 }
2785
2786 SDValue A, B, C, D;
2787
2788 // fold ((0-A) + B) -> B-A
2789 if (sd_match(N0, m_Neg(m_Value(A))))
2790 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2791
2792 // fold (A + (0-B)) -> A-B
2793 if (sd_match(N1, m_Neg(m_Value(B))))
2794 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2795
2796 // fold (A+(B-A)) -> B
2797 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2798 return B;
2799
2800 // fold ((B-A)+A) -> B
2801 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2802 return B;
2803
2804 // fold ((A-B)+(C-A)) -> (C-B)
2805 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2807 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2808
2809 // fold ((A-B)+(B-C)) -> (A-C)
2810 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2812 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2813
2814 // fold (A+(B-(A+C))) to (B-C)
2815 // fold (A+(B-(C+A))) to (B-C)
2816 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2817 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2818
2819 // fold (A+((B-A)+or-C)) to (B+or-C)
2820 if (sd_match(N1,
2822 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2823 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2824
2825 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2826 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2827 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2829 return DAG.getNode(ISD::SUB, DL, VT,
2830 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2831 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2832
2833 // fold (add (umax X, C), -C) --> (usubsat X, C)
2834 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2835 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2836 return (!Max && !Op) ||
2837 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2838 };
2839 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2840 /*AllowUndefs*/ true))
2841 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2842 N0.getOperand(1));
2843 }
2844
2846 return SDValue(N, 0);
2847
2848 if (isOneOrOneSplat(N1)) {
2849 // fold (add (xor a, -1), 1) -> (sub 0, a)
2850 if (isBitwiseNot(N0))
2851 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2852 N0.getOperand(0));
2853
2854 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2855 if (N0.getOpcode() == ISD::ADD) {
2856 SDValue A, Xor;
2857
2858 if (isBitwiseNot(N0.getOperand(0))) {
2859 A = N0.getOperand(1);
2860 Xor = N0.getOperand(0);
2861 } else if (isBitwiseNot(N0.getOperand(1))) {
2862 A = N0.getOperand(0);
2863 Xor = N0.getOperand(1);
2864 }
2865
2866 if (Xor)
2867 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2868 }
2869
2870 // Look for:
2871 // add (add x, y), 1
2872 // And if the target does not like this form then turn into:
2873 // sub y, (xor x, -1)
2874 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2875 N0.hasOneUse() &&
2876 // Limit this to after legalization if the add has wrap flags
2877 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2878 !N->getFlags().hasNoSignedWrap()))) {
2879 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2880 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2881 }
2882 }
2883
2884 // (x - y) + -1 -> add (xor y, -1), x
2885 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2886 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2887 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2888 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2889 }
2890
2891 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2892 // This can help if the inner add has multiple uses.
2893 APInt CM, CA;
2894 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2895 if (VT.getScalarSizeInBits() <= 64) {
2897 m_ConstInt(CM)))) &&
2899 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2901 // If all the inputs are nuw, the outputs can be nuw. If all the input
2902 // are _also_ nsw the outputs can be too.
2903 if (N->getFlags().hasNoUnsignedWrap() &&
2904 N0->getFlags().hasNoUnsignedWrap() &&
2907 if (N->getFlags().hasNoSignedWrap() &&
2908 N0->getFlags().hasNoSignedWrap() &&
2911 }
2912 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2913 DAG.getConstant(CM, DL, VT), Flags);
2914 return DAG.getNode(
2915 ISD::ADD, DL, VT, Mul,
2916 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2917 }
2918 // Also look in case there is an intermediate add.
2919 if (sd_match(N0, m_OneUse(m_Add(
2921 m_ConstInt(CM))),
2922 m_Value(B)))) &&
2924 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2926 // If all the inputs are nuw, the outputs can be nuw. If all the input
2927 // are _also_ nsw the outputs can be too.
2928 SDValue OMul =
2929 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2930 if (N->getFlags().hasNoUnsignedWrap() &&
2931 N0->getFlags().hasNoUnsignedWrap() &&
2932 OMul->getFlags().hasNoUnsignedWrap() &&
2933 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2935 if (N->getFlags().hasNoSignedWrap() &&
2936 N0->getFlags().hasNoSignedWrap() &&
2937 OMul->getFlags().hasNoSignedWrap() &&
2938 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2940 }
2941 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2942 DAG.getConstant(CM, DL, VT), Flags);
2943 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2944 return DAG.getNode(
2945 ISD::ADD, DL, VT, Add,
2946 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2947 }
2948 }
2949 }
2950
2951 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2952 return Combined;
2953
2954 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2955 return Combined;
2956
2957 return SDValue();
2958}
2959
2960// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2961SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2962 SDValue N0 = N->getOperand(0);
2963 EVT VT = N0.getValueType();
2964 SDValue A, B;
2965
2966 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2968 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2969 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2970 }
2971 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2973 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2974 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2975 }
2976
2977 return SDValue();
2978}
2979
2980SDValue DAGCombiner::visitADD(SDNode *N) {
2981 SDValue N0 = N->getOperand(0);
2982 SDValue N1 = N->getOperand(1);
2983 EVT VT = N0.getValueType();
2984 SDLoc DL(N);
2985
2986 if (SDValue Combined = visitADDLike(N))
2987 return Combined;
2988
2989 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2990 return V;
2991
2992 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2993 return V;
2994
2995 // Try to match AVGFLOOR fixedwidth pattern
2996 if (SDValue V = foldAddToAvg(N, DL))
2997 return V;
2998
2999 // fold (a+b) -> (a|b) iff a and b share no bits.
3000 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3001 DAG.haveNoCommonBitsSet(N0, N1))
3002 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3003
3004 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3005 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3006 const APInt &C0 = N0->getConstantOperandAPInt(0);
3007 const APInt &C1 = N1->getConstantOperandAPInt(0);
3008 return DAG.getVScale(DL, VT, C0 + C1);
3009 }
3010
3011 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3012 if (N0.getOpcode() == ISD::ADD &&
3013 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3014 N1.getOpcode() == ISD::VSCALE) {
3015 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3016 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3017 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3018 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3019 }
3020
3021 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3022 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3023 N1.getOpcode() == ISD::STEP_VECTOR) {
3024 const APInt &C0 = N0->getConstantOperandAPInt(0);
3025 const APInt &C1 = N1->getConstantOperandAPInt(0);
3026 APInt NewStep = C0 + C1;
3027 return DAG.getStepVector(DL, VT, NewStep);
3028 }
3029
3030 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3031 if (N0.getOpcode() == ISD::ADD &&
3033 N1.getOpcode() == ISD::STEP_VECTOR) {
3034 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3035 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3036 APInt NewStep = SV0 + SV1;
3037 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3038 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3039 }
3040
3041 return SDValue();
3042}
3043
3044SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3045 unsigned Opcode = N->getOpcode();
3046 SDValue N0 = N->getOperand(0);
3047 SDValue N1 = N->getOperand(1);
3048 EVT VT = N0.getValueType();
3049 bool IsSigned = Opcode == ISD::SADDSAT;
3050 SDLoc DL(N);
3051
3052 // fold (add_sat x, undef) -> -1
3053 if (N0.isUndef() || N1.isUndef())
3054 return DAG.getAllOnesConstant(DL, VT);
3055
3056 // fold (add_sat c1, c2) -> c3
3057 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3058 return C;
3059
3060 // canonicalize constant to RHS
3063 return DAG.getNode(Opcode, DL, VT, N1, N0);
3064
3065 // fold vector ops
3066 if (VT.isVector()) {
3067 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3068 return FoldedVOp;
3069
3070 // fold (add_sat x, 0) -> x, vector edition
3072 return N0;
3073 }
3074
3075 // fold (add_sat x, 0) -> x
3076 if (isNullConstant(N1))
3077 return N0;
3078
3079 // If it cannot overflow, transform into an add.
3080 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3081 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3082
3083 return SDValue();
3084}
3085
3087 bool ForceCarryReconstruction = false) {
3088 bool Masked = false;
3089
3090 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3091 while (true) {
3092 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3093 V = V.getOperand(0);
3094 continue;
3095 }
3096
3097 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3098 if (ForceCarryReconstruction)
3099 return V;
3100
3101 Masked = true;
3102 V = V.getOperand(0);
3103 continue;
3104 }
3105
3106 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3107 return V;
3108
3109 break;
3110 }
3111
3112 // If this is not a carry, return.
3113 if (V.getResNo() != 1)
3114 return SDValue();
3115
3116 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3117 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3118 return SDValue();
3119
3120 EVT VT = V->getValueType(0);
3121 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3122 return SDValue();
3123
3124 // If the result is masked, then no matter what kind of bool it is we can
3125 // return. If it isn't, then we need to make sure the bool type is either 0 or
3126 // 1 and not other values.
3127 if (Masked ||
3128 TLI.getBooleanContents(V.getValueType()) ==
3130 return V;
3131
3132 return SDValue();
3133}
3134
3135/// Given the operands of an add/sub operation, see if the 2nd operand is a
3136/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3137/// the opcode and bypass the mask operation.
3138static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3139 SelectionDAG &DAG, const SDLoc &DL) {
3140 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3141 N1 = N1.getOperand(0);
3142
3143 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3144 return SDValue();
3145
3146 EVT VT = N0.getValueType();
3147 SDValue N10 = N1.getOperand(0);
3148 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3149 N10 = N10.getOperand(0);
3150
3151 if (N10.getValueType() != VT)
3152 return SDValue();
3153
3154 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3155 return SDValue();
3156
3157 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3158 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3159 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3160}
3161
3162/// Helper for doing combines based on N0 and N1 being added to each other.
3163SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3164 SDNode *LocReference) {
3165 EVT VT = N0.getValueType();
3166 SDLoc DL(LocReference);
3167
3168 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3169 SDValue Y, N;
3170 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3171 return DAG.getNode(ISD::SUB, DL, VT, N0,
3172 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3173
3174 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3175 return V;
3176
3177 // Look for:
3178 // add (add x, 1), y
3179 // And if the target does not like this form then turn into:
3180 // sub y, (xor x, -1)
3181 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3182 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3183 // Limit this to after legalization if the add has wrap flags
3184 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3185 !N0->getFlags().hasNoSignedWrap()))) {
3186 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3187 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3188 }
3189
3190 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3191 // Hoist one-use subtraction by non-opaque constant:
3192 // (x - C) + y -> (x + y) - C
3193 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3194 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3195 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3196 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3197 }
3198 // Hoist one-use subtraction from non-opaque constant:
3199 // (C - x) + y -> (y - x) + C
3200 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3201 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3202 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3203 }
3204 }
3205
3206 // add (mul x, C), x -> mul x, C+1
3207 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3208 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3209 N0.hasOneUse()) {
3210 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3211 DAG.getConstant(1, DL, VT));
3212 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3213 }
3214
3215 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3216 // rather than 'add 0/-1' (the zext should get folded).
3217 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3218 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3219 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3221 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3222 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3223 }
3224
3225 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3226 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3227 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3228 if (TN->getVT() == MVT::i1) {
3229 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3230 DAG.getConstant(1, DL, VT));
3231 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3232 }
3233 }
3234
3235 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3236 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3237 N1.getResNo() == 0)
3238 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3239 N0, N1.getOperand(0), N1.getOperand(2));
3240
3241 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3243 if (SDValue Carry = getAsCarry(TLI, N1))
3244 return DAG.getNode(ISD::UADDO_CARRY, DL,
3245 DAG.getVTList(VT, Carry.getValueType()), N0,
3246 DAG.getConstant(0, DL, VT), Carry);
3247
3248 return SDValue();
3249}
3250
3251SDValue DAGCombiner::visitADDC(SDNode *N) {
3252 SDValue N0 = N->getOperand(0);
3253 SDValue N1 = N->getOperand(1);
3254 EVT VT = N0.getValueType();
3255 SDLoc DL(N);
3256
3257 // If the flag result is dead, turn this into an ADD.
3258 if (!N->hasAnyUseOfValue(1))
3259 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3260 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3261
3262 // canonicalize constant to RHS.
3263 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3264 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3265 if (N0C && !N1C)
3266 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3267
3268 // fold (addc x, 0) -> x + no carry out
3269 if (isNullConstant(N1))
3270 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3271 DL, MVT::Glue));
3272
3273 // If it cannot overflow, transform into an add.
3275 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3276 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3277
3278 return SDValue();
3279}
3280
3281/**
3282 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3283 * then the flip also occurs if computing the inverse is the same cost.
3284 * This function returns an empty SDValue in case it cannot flip the boolean
3285 * without increasing the cost of the computation. If you want to flip a boolean
3286 * no matter what, use DAG.getLogicalNOT.
3287 */
3289 const TargetLowering &TLI,
3290 bool Force) {
3291 if (Force && isa<ConstantSDNode>(V))
3292 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3293
3294 if (V.getOpcode() != ISD::XOR)
3295 return SDValue();
3296
3297 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3298 return V.getOperand(0);
3299 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3300 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3301 return SDValue();
3302}
3303
3304SDValue DAGCombiner::visitADDO(SDNode *N) {
3305 SDValue N0 = N->getOperand(0);
3306 SDValue N1 = N->getOperand(1);
3307 EVT VT = N0.getValueType();
3308 bool IsSigned = (ISD::SADDO == N->getOpcode());
3309
3310 EVT CarryVT = N->getValueType(1);
3311 SDLoc DL(N);
3312
3313 // If the flag result is dead, turn this into an ADD.
3314 if (!N->hasAnyUseOfValue(1))
3315 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3316 DAG.getUNDEF(CarryVT));
3317
3318 // canonicalize constant to RHS.
3321 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3322
3323 // fold (addo x, 0) -> x + no carry out
3324 if (isNullOrNullSplat(N1))
3325 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3326
3327 // If it cannot overflow, transform into an add.
3328 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3329 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3330 DAG.getConstant(0, DL, CarryVT));
3331
3332 if (IsSigned) {
3333 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3334 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3335 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3336 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3337 } else {
3338 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3339 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3340 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3341 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3342 return CombineTo(
3343 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3344 }
3345
3346 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3347 return Combined;
3348
3349 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3350 return Combined;
3351 }
3352
3353 return SDValue();
3354}
3355
3356SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3357 EVT VT = N0.getValueType();
3358 if (VT.isVector())
3359 return SDValue();
3360
3361 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3362 // If Y + 1 cannot overflow.
3363 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3364 SDValue Y = N1.getOperand(0);
3365 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3367 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3368 N1.getOperand(2));
3369 }
3370
3371 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3373 if (SDValue Carry = getAsCarry(TLI, N1))
3374 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3375 DAG.getConstant(0, SDLoc(N), VT), Carry);
3376
3377 return SDValue();
3378}
3379
3380SDValue DAGCombiner::visitADDE(SDNode *N) {
3381 SDValue N0 = N->getOperand(0);
3382 SDValue N1 = N->getOperand(1);
3383 SDValue CarryIn = N->getOperand(2);
3384
3385 // canonicalize constant to RHS
3386 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3387 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3388 if (N0C && !N1C)
3389 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3390 N1, N0, CarryIn);
3391
3392 // fold (adde x, y, false) -> (addc x, y)
3393 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3394 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3395
3396 return SDValue();
3397}
3398
3399SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3400 SDValue N0 = N->getOperand(0);
3401 SDValue N1 = N->getOperand(1);
3402 SDValue CarryIn = N->getOperand(2);
3403 SDLoc DL(N);
3404
3405 // canonicalize constant to RHS
3406 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408 if (N0C && !N1C)
3409 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3410
3411 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3412 if (isNullConstant(CarryIn)) {
3413 if (!LegalOperations ||
3414 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3415 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3416 }
3417
3418 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3419 if (isNullConstant(N0) && isNullConstant(N1)) {
3420 EVT VT = N0.getValueType();
3421 EVT CarryVT = CarryIn.getValueType();
3422 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3423 AddToWorklist(CarryExt.getNode());
3424 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3425 DAG.getConstant(1, DL, VT)),
3426 DAG.getConstant(0, DL, CarryVT));
3427 }
3428
3429 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3430 return Combined;
3431
3432 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3433 return Combined;
3434
3435 // We want to avoid useless duplication.
3436 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3437 // not a binary operation, this is not really possible to leverage this
3438 // existing mechanism for it. However, if more operations require the same
3439 // deduplication logic, then it may be worth generalize.
3440 SDValue Ops[] = {N1, N0, CarryIn};
3441 SDNode *CSENode =
3442 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3443 if (CSENode)
3444 return SDValue(CSENode, 0);
3445
3446 return SDValue();
3447}
3448
3449/**
3450 * If we are facing some sort of diamond carry propagation pattern try to
3451 * break it up to generate something like:
3452 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3453 *
3454 * The end result is usually an increase in operation required, but because the
3455 * carry is now linearized, other transforms can kick in and optimize the DAG.
3456 *
3457 * Patterns typically look something like
3458 * (uaddo A, B)
3459 * / \
3460 * Carry Sum
3461 * | \
3462 * | (uaddo_carry *, 0, Z)
3463 * | /
3464 * \ Carry
3465 * | /
3466 * (uaddo_carry X, *, *)
3467 *
3468 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3469 * produce a combine with a single path for carry propagation.
3470 */
3472 SelectionDAG &DAG, SDValue X,
3473 SDValue Carry0, SDValue Carry1,
3474 SDNode *N) {
3475 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3476 return SDValue();
3477 if (Carry1.getOpcode() != ISD::UADDO)
3478 return SDValue();
3479
3480 SDValue Z;
3481
3482 /**
3483 * First look for a suitable Z. It will present itself in the form of
3484 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3485 */
3486 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3487 isNullConstant(Carry0.getOperand(1))) {
3488 Z = Carry0.getOperand(2);
3489 } else if (Carry0.getOpcode() == ISD::UADDO &&
3490 isOneConstant(Carry0.getOperand(1))) {
3491 EVT VT = Carry0->getValueType(1);
3492 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3493 } else {
3494 // We couldn't find a suitable Z.
3495 return SDValue();
3496 }
3497
3498
3499 auto cancelDiamond = [&](SDValue A,SDValue B) {
3500 SDLoc DL(N);
3501 SDValue NewY =
3502 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3503 Combiner.AddToWorklist(NewY.getNode());
3504 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3505 DAG.getConstant(0, DL, X.getValueType()),
3506 NewY.getValue(1));
3507 };
3508
3509 /**
3510 * (uaddo A, B)
3511 * |
3512 * Sum
3513 * |
3514 * (uaddo_carry *, 0, Z)
3515 */
3516 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3517 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3518 }
3519
3520 /**
3521 * (uaddo_carry A, 0, Z)
3522 * |
3523 * Sum
3524 * |
3525 * (uaddo *, B)
3526 */
3527 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3528 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3529 }
3530
3531 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3532 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3533 }
3534
3535 return SDValue();
3536}
3537
3538// If we are facing some sort of diamond carry/borrow in/out pattern try to
3539// match patterns like:
3540//
3541// (uaddo A, B) CarryIn
3542// | \ |
3543// | \ |
3544// PartialSum PartialCarryOutX /
3545// | | /
3546// | ____|____________/
3547// | / |
3548// (uaddo *, *) \________
3549// | \ \
3550// | \ |
3551// | PartialCarryOutY |
3552// | \ |
3553// | \ /
3554// AddCarrySum | ______/
3555// | /
3556// CarryOut = (or *, *)
3557//
3558// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3559//
3560// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3561//
3562// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3563// with a single path for carry/borrow out propagation.
3565 SDValue N0, SDValue N1, SDNode *N) {
3566 SDValue Carry0 = getAsCarry(TLI, N0);
3567 if (!Carry0)
3568 return SDValue();
3569 SDValue Carry1 = getAsCarry(TLI, N1);
3570 if (!Carry1)
3571 return SDValue();
3572
3573 unsigned Opcode = Carry0.getOpcode();
3574 if (Opcode != Carry1.getOpcode())
3575 return SDValue();
3576 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3577 return SDValue();
3578 // Guarantee identical type of CarryOut
3579 EVT CarryOutType = N->getValueType(0);
3580 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3581 CarryOutType != Carry1.getValue(1).getValueType())
3582 return SDValue();
3583
3584 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3585 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3586 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3587 std::swap(Carry0, Carry1);
3588
3589 // Check if nodes are connected in expected way.
3590 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3591 Carry1.getOperand(1) != Carry0.getValue(0))
3592 return SDValue();
3593
3594 // The carry in value must be on the righthand side for subtraction.
3595 unsigned CarryInOperandNum =
3596 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3597 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3598 return SDValue();
3599 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3600
3601 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3602 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3603 return SDValue();
3604
3605 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3606 CarryIn = getAsCarry(TLI, CarryIn, true);
3607 if (!CarryIn)
3608 return SDValue();
3609
3610 SDLoc DL(N);
3611 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3612 Carry1->getValueType(0));
3613 SDValue Merged =
3614 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3615 Carry0.getOperand(1), CarryIn);
3616
3617 // Please note that because we have proven that the result of the UADDO/USUBO
3618 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3619 // therefore prove that if the first UADDO/USUBO overflows, the second
3620 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3621 // maximum value.
3622 //
3623 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3624 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3625 //
3626 // This is important because it means that OR and XOR can be used to merge
3627 // carry flags; and that AND can return a constant zero.
3628 //
3629 // TODO: match other operations that can merge flags (ADD, etc)
3630 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3631 if (N->getOpcode() == ISD::AND)
3632 return DAG.getConstant(0, DL, CarryOutType);
3633 return Merged.getValue(1);
3634}
3635
3636SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3637 SDValue CarryIn, SDNode *N) {
3638 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3639 // carry.
3640 if (isBitwiseNot(N0))
3641 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3642 SDLoc DL(N);
3643 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3644 N0.getOperand(0), NotC);
3645 return CombineTo(
3646 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3647 }
3648
3649 // Iff the flag result is dead:
3650 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3651 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3652 // or the dependency between the instructions.
3653 if ((N0.getOpcode() == ISD::ADD ||
3654 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3655 N0.getValue(1) != CarryIn)) &&
3656 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3657 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3658 N0.getOperand(0), N0.getOperand(1), CarryIn);
3659
3660 /**
3661 * When one of the uaddo_carry argument is itself a carry, we may be facing
3662 * a diamond carry propagation. In which case we try to transform the DAG
3663 * to ensure linear carry propagation if that is possible.
3664 */
3665 if (auto Y = getAsCarry(TLI, N1)) {
3666 // Because both are carries, Y and Z can be swapped.
3667 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3668 return R;
3669 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3670 return R;
3671 }
3672
3673 return SDValue();
3674}
3675
3676SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3677 SDValue CarryIn, SDNode *N) {
3678 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3679 if (isBitwiseNot(N0)) {
3680 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3681 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3682 N0.getOperand(0), NotC);
3683 }
3684
3685 return SDValue();
3686}
3687
3688SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3689 SDValue N0 = N->getOperand(0);
3690 SDValue N1 = N->getOperand(1);
3691 SDValue CarryIn = N->getOperand(2);
3692 SDLoc DL(N);
3693
3694 // canonicalize constant to RHS
3695 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3696 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3697 if (N0C && !N1C)
3698 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3699
3700 // fold (saddo_carry x, y, false) -> (saddo x, y)
3701 if (isNullConstant(CarryIn)) {
3702 if (!LegalOperations ||
3703 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3704 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3705 }
3706
3707 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3708 return Combined;
3709
3710 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3711 return Combined;
3712
3713 return SDValue();
3714}
3715
3716// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3717// clamp/truncation if necessary.
3718static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3719 SDValue RHS, SelectionDAG &DAG,
3720 const SDLoc &DL) {
3721 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3722 "Illegal truncation");
3723
3724 if (DstVT == SrcVT)
3725 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3726
3727 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3728 // clamping RHS.
3730 DstVT.getScalarSizeInBits());
3731 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3732 return SDValue();
3733
3734 SDValue SatLimit =
3736 DstVT.getScalarSizeInBits()),
3737 DL, SrcVT);
3738 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3739 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3740 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3741 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3742}
3743
3744// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3745// usubsat(a,b), optionally as a truncated type.
3746SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3747 if (N->getOpcode() != ISD::SUB ||
3748 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3749 return SDValue();
3750
3751 EVT SubVT = N->getValueType(0);
3752 SDValue Op0 = N->getOperand(0);
3753 SDValue Op1 = N->getOperand(1);
3754
3755 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3756 // they may be converted to usubsat(a,b).
3757 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3758 SDValue MaxLHS = Op0.getOperand(0);
3759 SDValue MaxRHS = Op0.getOperand(1);
3760 if (MaxLHS == Op1)
3761 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3762 if (MaxRHS == Op1)
3763 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3764 }
3765
3766 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3767 SDValue MinLHS = Op1.getOperand(0);
3768 SDValue MinRHS = Op1.getOperand(1);
3769 if (MinLHS == Op0)
3770 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3771 if (MinRHS == Op0)
3772 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3773 }
3774
3775 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3776 if (Op1.getOpcode() == ISD::TRUNCATE &&
3777 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3778 Op1.getOperand(0).hasOneUse()) {
3779 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3780 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3781 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3782 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3783 DAG, DL);
3784 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3785 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3786 DAG, DL);
3787 }
3788
3789 return SDValue();
3790}
3791
3792// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3793// counting leading ones. Broadly, it replaces the substraction with a left
3794// shift.
3795//
3796// * DAG Legalisation Pattern:
3797//
3798// (sub (ctlz (zeroextend (not Src)))
3799// BitWidthDiff)
3800//
3801// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3802// -->
3803//
3804// (ctlz_zero_undef (not (shl (anyextend Src)
3805// BitWidthDiff)))
3806//
3807// * Type Legalisation Pattern:
3808//
3809// (sub (ctlz (and (xor Src XorMask)
3810// AndMask))
3811// BitWidthDiff)
3812//
3813// if AndMask has only trailing ones
3814// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3815// and XorMask has more trailing ones than AndMask
3816// -->
3817//
3818// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3819template <class MatchContextClass>
3821 const SDLoc DL(N);
3822 SDValue N0 = N->getOperand(0);
3823 EVT VT = N0.getValueType();
3824 unsigned BitWidth = VT.getScalarSizeInBits();
3825
3826 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3827
3828 APInt AndMask;
3829 APInt XorMask;
3830 APInt BitWidthDiff;
3831
3832 SDValue CtlzOp;
3833 SDValue Src;
3834
3835 if (!sd_context_match(
3836 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3837 return SDValue();
3838
3839 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3840 // DAG Legalisation Pattern:
3841 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3842 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3843 return SDValue();
3844
3845 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3846 } else if (sd_context_match(CtlzOp, Matcher,
3847 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3848 m_ConstInt(AndMask)))) {
3849 // Type Legalisation Pattern:
3850 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3851 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3852 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3853 return SDValue();
3854 } else
3855 return SDValue();
3856
3857 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
3858 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
3859 SDValue Not =
3860 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
3861
3862 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
3863}
3864
3865// Since it may not be valid to emit a fold to zero for vector initializers
3866// check if we can before folding.
3867static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3868 SelectionDAG &DAG, bool LegalOperations) {
3869 if (!VT.isVector())
3870 return DAG.getConstant(0, DL, VT);
3871 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3872 return DAG.getConstant(0, DL, VT);
3873 return SDValue();
3874}
3875
3876SDValue DAGCombiner::visitSUB(SDNode *N) {
3877 SDValue N0 = N->getOperand(0);
3878 SDValue N1 = N->getOperand(1);
3879 EVT VT = N0.getValueType();
3880 unsigned BitWidth = VT.getScalarSizeInBits();
3881 SDLoc DL(N);
3882
3883 auto PeekThroughFreeze = [](SDValue N) {
3884 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3885 return N->getOperand(0);
3886 return N;
3887 };
3888
3889 if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
3890 return V;
3891
3892 // fold (sub x, x) -> 0
3893 // FIXME: Refactor this and xor and other similar operations together.
3894 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3895 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3896
3897 // fold (sub c1, c2) -> c3
3898 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3899 return C;
3900
3901 // fold vector ops
3902 if (VT.isVector()) {
3903 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3904 return FoldedVOp;
3905
3906 // fold (sub x, 0) -> x, vector edition
3908 return N0;
3909 }
3910
3911 if (SDValue NewSel = foldBinOpIntoSelect(N))
3912 return NewSel;
3913
3914 // fold (sub x, c) -> (add x, -c)
3916 return DAG.getNode(ISD::ADD, DL, VT, N0,
3917 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3918
3919 if (isNullOrNullSplat(N0)) {
3920 // Right-shifting everything out but the sign bit followed by negation is
3921 // the same as flipping arithmetic/logical shift type without the negation:
3922 // -(X >>u 31) -> (X >>s 31)
3923 // -(X >>s 31) -> (X >>u 31)
3924 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3926 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3927 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3928 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3929 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3930 }
3931 }
3932
3933 // 0 - X --> 0 if the sub is NUW.
3934 if (N->getFlags().hasNoUnsignedWrap())
3935 return N0;
3936
3938 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3939 // N1 must be 0 because negating the minimum signed value is undefined.
3940 if (N->getFlags().hasNoSignedWrap())
3941 return N0;
3942
3943 // 0 - X --> X if X is 0 or the minimum signed value.
3944 return N1;
3945 }
3946
3947 // Convert 0 - abs(x).
3948 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3950 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3951 return Result;
3952
3953 // Similar to the previous rule, but this time targeting an expanded abs.
3954 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3955 // as well as
3956 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3957 // Note that these two are applicable to both signed and unsigned min/max.
3958 SDValue X;
3959 SDValue S0;
3960 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
3961 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
3962 m_UMax(m_Value(X), NegPat),
3963 m_SMin(m_Value(X), NegPat),
3964 m_UMin(m_Value(X), NegPat))))) {
3965 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
3966 if (hasOperation(NewOpc, VT))
3967 return DAG.getNode(NewOpc, DL, VT, X, S0);
3968 }
3969
3970 // Fold neg(splat(neg(x)) -> splat(x)
3971 if (VT.isVector()) {
3972 SDValue N1S = DAG.getSplatValue(N1, true);
3973 if (N1S && N1S.getOpcode() == ISD::SUB &&
3974 isNullConstant(N1S.getOperand(0)))
3975 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3976 }
3977 }
3978
3979 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3981 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3982
3983 // fold (A - (0-B)) -> A+B
3984 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3985 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3986
3987 // fold A-(A-B) -> B
3988 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3989 return N1.getOperand(1);
3990
3991 // fold (A+B)-A -> B
3992 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3993 return N0.getOperand(1);
3994
3995 // fold (A+B)-B -> A
3996 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3997 return N0.getOperand(0);
3998
3999 // fold (A+C1)-C2 -> A+(C1-C2)
4000 if (N0.getOpcode() == ISD::ADD) {
4001 SDValue N01 = N0.getOperand(1);
4002 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4003 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4004 }
4005
4006 // fold C2-(A+C1) -> (C2-C1)-A
4007 if (N1.getOpcode() == ISD::ADD) {
4008 SDValue N11 = N1.getOperand(1);
4009 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4010 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4011 }
4012
4013 // fold (A-C1)-C2 -> A-(C1+C2)
4014 if (N0.getOpcode() == ISD::SUB) {
4015 SDValue N01 = N0.getOperand(1);
4016 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4017 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4018 }
4019
4020 // fold (c1-A)-c2 -> (c1-c2)-A
4021 if (N0.getOpcode() == ISD::SUB) {
4022 SDValue N00 = N0.getOperand(0);
4023 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4024 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4025 }
4026
4027 SDValue A, B, C;
4028
4029 // fold ((A+(B+C))-B) -> A+C
4030 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4031 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4032
4033 // fold ((A+(B-C))-B) -> A-C
4034 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4035 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4036
4037 // fold ((A-(B-C))-C) -> A-B
4038 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4039 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4040
4041 // fold (A-(B-C)) -> A+(C-B)
4042 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4043 return DAG.getNode(ISD::ADD, DL, VT, N0,
4044 DAG.getNode(ISD::SUB, DL, VT, C, B));
4045
4046 // A - (A & B) -> A & (~B)
4047 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4048 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4049 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4050
4051 // fold (A - (-B * C)) -> (A + (B * C))
4052 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4053 return DAG.getNode(ISD::ADD, DL, VT, N0,
4054 DAG.getNode(ISD::MUL, DL, VT, B, C));
4055
4056 // If either operand of a sub is undef, the result is undef
4057 if (N0.isUndef())
4058 return N0;
4059 if (N1.isUndef())
4060 return N1;
4061
4062 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4063 return V;
4064
4065 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4066 return V;
4067
4068 // Try to match AVGCEIL fixedwidth pattern
4069 if (SDValue V = foldSubToAvg(N, DL))
4070 return V;
4071
4072 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4073 return V;
4074
4075 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4076 return V;
4077
4078 // (A - B) - 1 -> add (xor B, -1), A
4080 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4081
4082 // Look for:
4083 // sub y, (xor x, -1)
4084 // And if the target does not like this form then turn into:
4085 // add (add x, y), 1
4086 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4087 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4088 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4089 }
4090
4091 // Hoist one-use addition by non-opaque constant:
4092 // (x + C) - y -> (x - y) + C
4093 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4094 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4095 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4096 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4097 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4098 }
4099 // y - (x + C) -> (y - x) - C
4100 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4101 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4102 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4103 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4104 }
4105 // (x - C) - y -> (x - y) - C
4106 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4107 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4108 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4109 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4110 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4111 }
4112 // (C - x) - y -> C - (x + y)
4113 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4114 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4115 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4116 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4117 }
4118
4119 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4120 // rather than 'sub 0/1' (the sext should get folded).
4121 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4122 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4123 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4124 TLI.getBooleanContents(VT) ==
4126 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4127 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4128 }
4129
4130 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4131 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4133 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4134 return DAG.getNode(ISD::ABS, DL, VT, A);
4135
4136 // If the relocation model supports it, consider symbol offsets.
4137 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4138 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4139 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4140 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4141 if (GA->getGlobal() == GB->getGlobal())
4142 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4143 DL, VT);
4144 }
4145
4146 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4147 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4148 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4149 if (TN->getVT() == MVT::i1) {
4150 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4151 DAG.getConstant(1, DL, VT));
4152 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4153 }
4154 }
4155
4156 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4157 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4158 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4159 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4160 }
4161
4162 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4163 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4164 APInt NewStep = -N1.getConstantOperandAPInt(0);
4165 return DAG.getNode(ISD::ADD, DL, VT, N0,
4166 DAG.getStepVector(DL, VT, NewStep));
4167 }
4168
4169 // Prefer an add for more folding potential and possibly better codegen:
4170 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4171 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4172 SDValue ShAmt = N1.getOperand(1);
4173 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4174 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4175 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4176 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4177 }
4178 }
4179
4180 // As with the previous fold, prefer add for more folding potential.
4181 // Subtracting SMIN/0 is the same as adding SMIN/0:
4182 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4183 if (N1.getOpcode() == ISD::SHL) {
4185 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4186 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4187 }
4188
4189 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4190 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4191 N0.getResNo() == 0 && N0.hasOneUse())
4192 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4193 N0.getOperand(0), N1, N0.getOperand(2));
4194
4196 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4197 if (SDValue Carry = getAsCarry(TLI, N0)) {
4198 SDValue X = N1;
4199 SDValue Zero = DAG.getConstant(0, DL, VT);
4200 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4201 return DAG.getNode(ISD::UADDO_CARRY, DL,
4202 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4203 Carry);
4204 }
4205 }
4206
4207 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4208 // sub C0, X --> xor X, C0
4209 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4210 if (!C0->isOpaque()) {
4211 const APInt &C0Val = C0->getAPIntValue();
4212 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4213 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4214 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4215 }
4216 }
4217
4218 // smax(a,b) - smin(a,b) --> abds(a,b)
4219 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4220 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4222 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4223
4224 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4225 if (hasOperation(ISD::ABDS, VT) &&
4226 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4228 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4229
4230 // umax(a,b) - umin(a,b) --> abdu(a,b)
4231 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4232 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4234 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4235
4236 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4237 if (hasOperation(ISD::ABDU, VT) &&
4238 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4240 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4241
4242 return SDValue();
4243}
4244
4245SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4246 unsigned Opcode = N->getOpcode();
4247 SDValue N0 = N->getOperand(0);
4248 SDValue N1 = N->getOperand(1);
4249 EVT VT = N0.getValueType();
4250 bool IsSigned = Opcode == ISD::SSUBSAT;
4251 SDLoc DL(N);
4252
4253 // fold (sub_sat x, undef) -> 0
4254 if (N0.isUndef() || N1.isUndef())
4255 return DAG.getConstant(0, DL, VT);
4256
4257 // fold (sub_sat x, x) -> 0
4258 if (N0 == N1)
4259 return DAG.getConstant(0, DL, VT);
4260
4261 // fold (sub_sat c1, c2) -> c3
4262 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4263 return C;
4264
4265 // fold vector ops
4266 if (VT.isVector()) {
4267 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4268 return FoldedVOp;
4269
4270 // fold (sub_sat x, 0) -> x, vector edition
4272 return N0;
4273 }
4274
4275 // fold (sub_sat x, 0) -> x
4276 if (isNullConstant(N1))
4277 return N0;
4278
4279 // If it cannot overflow, transform into an sub.
4280 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4281 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4282
4283 return SDValue();
4284}
4285
4286SDValue DAGCombiner::visitSUBC(SDNode *N) {
4287 SDValue N0 = N->getOperand(0);
4288 SDValue N1 = N->getOperand(1);
4289 EVT VT = N0.getValueType();
4290 SDLoc DL(N);
4291
4292 // If the flag result is dead, turn this into an SUB.
4293 if (!N->hasAnyUseOfValue(1))
4294 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4295 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4296
4297 // fold (subc x, x) -> 0 + no borrow
4298 if (N0 == N1)
4299 return CombineTo(N, DAG.getConstant(0, DL, VT),
4300 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4301
4302 // fold (subc x, 0) -> x + no borrow
4303 if (isNullConstant(N1))
4304 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4305
4306 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4307 if (isAllOnesConstant(N0))
4308 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4309 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4310
4311 return SDValue();
4312}
4313
4314SDValue DAGCombiner::visitSUBO(SDNode *N) {
4315 SDValue N0 = N->getOperand(0);
4316 SDValue N1 = N->getOperand(1);
4317 EVT VT = N0.getValueType();
4318 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4319
4320 EVT CarryVT = N->getValueType(1);
4321 SDLoc DL(N);
4322
4323 // If the flag result is dead, turn this into an SUB.
4324 if (!N->hasAnyUseOfValue(1))
4325 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4326 DAG.getUNDEF(CarryVT));
4327
4328 // fold (subo x, x) -> 0 + no borrow
4329 if (N0 == N1)
4330 return CombineTo(N, DAG.getConstant(0, DL, VT),
4331 DAG.getConstant(0, DL, CarryVT));
4332
4333 // fold (subox, c) -> (addo x, -c)
4335 if (IsSigned && !N1C->isMinSignedValue())
4336 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4337 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4338
4339 // fold (subo x, 0) -> x + no borrow
4340 if (isNullOrNullSplat(N1))
4341 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4342
4343 // If it cannot overflow, transform into an sub.
4344 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4345 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4346 DAG.getConstant(0, DL, CarryVT));
4347
4348 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4349 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4350 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4351 DAG.getConstant(0, DL, CarryVT));
4352
4353 return SDValue();
4354}
4355
4356SDValue DAGCombiner::visitSUBE(SDNode *N) {
4357 SDValue N0 = N->getOperand(0);
4358 SDValue N1 = N->getOperand(1);
4359 SDValue CarryIn = N->getOperand(2);
4360
4361 // fold (sube x, y, false) -> (subc x, y)
4362 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4363 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4364
4365 return SDValue();
4366}
4367
4368SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4369 SDValue N0 = N->getOperand(0);
4370 SDValue N1 = N->getOperand(1);
4371 SDValue CarryIn = N->getOperand(2);
4372
4373 // fold (usubo_carry x, y, false) -> (usubo x, y)
4374 if (isNullConstant(CarryIn)) {
4375 if (!LegalOperations ||
4376 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4377 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4378 }
4379
4380 return SDValue();
4381}
4382
4383SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4384 SDValue N0 = N->getOperand(0);
4385 SDValue N1 = N->getOperand(1);
4386 SDValue CarryIn = N->getOperand(2);
4387
4388 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4389 if (isNullConstant(CarryIn)) {
4390 if (!LegalOperations ||
4391 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4392 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4393 }
4394
4395 return SDValue();
4396}
4397
4398// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4399// UMULFIXSAT here.
4400SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4401 SDValue N0 = N->getOperand(0);
4402 SDValue N1 = N->getOperand(1);
4403 SDValue Scale = N->getOperand(2);
4404 EVT VT = N0.getValueType();
4405
4406 // fold (mulfix x, undef, scale) -> 0
4407 if (N0.isUndef() || N1.isUndef())
4408 return DAG.getConstant(0, SDLoc(N), VT);
4409
4410 // Canonicalize constant to RHS (vector doesn't have to splat)
4413 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4414
4415 // fold (mulfix x, 0, scale) -> 0
4416 if (isNullConstant(N1))
4417 return DAG.getConstant(0, SDLoc(N), VT);
4418
4419 return SDValue();
4420}
4421
4422template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4423 SDValue N0 = N->getOperand(0);
4424 SDValue N1 = N->getOperand(1);
4425 EVT VT = N0.getValueType();
4426 unsigned BitWidth = VT.getScalarSizeInBits();
4427 SDLoc DL(N);
4428 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4429 MatchContextClass Matcher(DAG, TLI, N);
4430
4431 // fold (mul x, undef) -> 0
4432 if (N0.isUndef() || N1.isUndef())
4433 return DAG.getConstant(0, DL, VT);
4434
4435 // fold (mul c1, c2) -> c1*c2
4436 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4437 return C;
4438
4439 // canonicalize constant to RHS (vector doesn't have to splat)
4442 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4443
4444 bool N1IsConst = false;
4445 bool N1IsOpaqueConst = false;
4446 APInt ConstValue1;
4447
4448 // fold vector ops
4449 if (VT.isVector()) {
4450 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4451 if (!UseVP)
4452 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4453 return FoldedVOp;
4454
4455 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4456 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4457 "Splat APInt should be element width");
4458 } else {
4459 N1IsConst = isa<ConstantSDNode>(N1);
4460 if (N1IsConst) {
4461 ConstValue1 = N1->getAsAPIntVal();
4462 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4463 }
4464 }
4465
4466 // fold (mul x, 0) -> 0
4467 if (N1IsConst && ConstValue1.isZero())
4468 return N1;
4469
4470 // fold (mul x, 1) -> x
4471 if (N1IsConst && ConstValue1.isOne())
4472 return N0;
4473
4474 if (!UseVP)
4475 if (SDValue NewSel = foldBinOpIntoSelect(N))
4476 return NewSel;
4477
4478 // fold (mul x, -1) -> 0-x
4479 if (N1IsConst && ConstValue1.isAllOnes())
4480 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4481
4482 // fold (mul x, (1 << c)) -> x << c
4483 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4484 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4485 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4486 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4487 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4488 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4489 }
4490 }
4491
4492 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4493 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4494 unsigned Log2Val = (-ConstValue1).logBase2();
4495
4496 // FIXME: If the input is something that is easily negated (e.g. a
4497 // single-use add), we should put the negate there.
4498 return Matcher.getNode(
4499 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4500 Matcher.getNode(ISD::SHL, DL, VT, N0,
4501 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4502 }
4503
4504 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4505 // hi result is in use in case we hit this mid-legalization.
4506 if (!UseVP) {
4507 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4508 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4509 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4510 // TODO: Can we match commutable operands with getNodeIfExists?
4511 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4512 if (LoHi->hasAnyUseOfValue(1))
4513 return SDValue(LoHi, 0);
4514 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4515 if (LoHi->hasAnyUseOfValue(1))
4516 return SDValue(LoHi, 0);
4517 }
4518 }
4519 }
4520
4521 // Try to transform:
4522 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4523 // mul x, (2^N + 1) --> add (shl x, N), x
4524 // mul x, (2^N - 1) --> sub (shl x, N), x
4525 // Examples: x * 33 --> (x << 5) + x
4526 // x * 15 --> (x << 4) - x
4527 // x * -33 --> -((x << 5) + x)
4528 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4529 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4530 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4531 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4532 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4533 // x * 0xf800 --> (x << 16) - (x << 11)
4534 // x * -0x8800 --> -((x << 15) + (x << 11))
4535 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4536 if (!UseVP && N1IsConst &&
4537 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4538 // TODO: We could handle more general decomposition of any constant by
4539 // having the target set a limit on number of ops and making a
4540 // callback to determine that sequence (similar to sqrt expansion).
4541 unsigned MathOp = ISD::DELETED_NODE;
4542 APInt MulC = ConstValue1.abs();
4543 // The constant `2` should be treated as (2^0 + 1).
4544 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4545 MulC.lshrInPlace(TZeros);
4546 if ((MulC - 1).isPowerOf2())
4547 MathOp = ISD::ADD;
4548 else if ((MulC + 1).isPowerOf2())
4549 MathOp = ISD::SUB;
4550
4551 if (MathOp != ISD::DELETED_NODE) {
4552 unsigned ShAmt =
4553 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4554 ShAmt += TZeros;
4555 assert(ShAmt < BitWidth &&
4556 "multiply-by-constant generated out of bounds shift");
4557 SDValue Shl =
4558 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4559 SDValue R =
4560 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4561 DAG.getNode(ISD::SHL, DL, VT, N0,
4562 DAG.getConstant(TZeros, DL, VT)))
4563 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4564 if (ConstValue1.isNegative())
4565 R = DAG.getNegative(R, DL, VT);
4566 return R;
4567 }
4568 }
4569
4570 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4571 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4572 SDValue N01 = N0.getOperand(1);
4573 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4574 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4575 }
4576
4577 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4578 // use.
4579 {
4580 SDValue Sh, Y;
4581
4582 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4583 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4585 Sh = N0; Y = N1;
4586 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4588 Sh = N1; Y = N0;
4589 }
4590
4591 if (Sh.getNode()) {
4592 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4593 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4594 }
4595 }
4596
4597 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4598 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4602 return Matcher.getNode(
4603 ISD::ADD, DL, VT,
4604 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4605 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4606
4607 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4609 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4610 const APInt &C0 = N0.getConstantOperandAPInt(0);
4611 const APInt &C1 = NC1->getAPIntValue();
4612 return DAG.getVScale(DL, VT, C0 * C1);
4613 }
4614
4615 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4616 APInt MulVal;
4617 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4618 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4619 const APInt &C0 = N0.getConstantOperandAPInt(0);
4620 APInt NewStep = C0 * MulVal;
4621 return DAG.getStepVector(DL, VT, NewStep);
4622 }
4623
4624 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4625 SDValue X;
4626 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4628 N, Matcher,
4630 m_Deferred(X)))) {
4631 return Matcher.getNode(ISD::ABS, DL, VT, X);
4632 }
4633
4634 // Fold ((mul x, 0/undef) -> 0,
4635 // (mul x, 1) -> x) -> x)
4636 // -> and(x, mask)
4637 // We can replace vectors with '0' and '1' factors with a clearing mask.
4638 if (VT.isFixedLengthVector()) {
4639 unsigned NumElts = VT.getVectorNumElements();
4640 SmallBitVector ClearMask;
4641 ClearMask.reserve(NumElts);
4642 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4643 if (!V || V->isZero()) {
4644 ClearMask.push_back(true);
4645 return true;
4646 }
4647 ClearMask.push_back(false);
4648 return V->isOne();
4649 };
4650 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4651 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4652 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4653 EVT LegalSVT = N1.getOperand(0).getValueType();
4654 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4655 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4657 for (unsigned I = 0; I != NumElts; ++I)
4658 if (ClearMask[I])
4659 Mask[I] = Zero;
4660 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4661 }
4662 }
4663
4664 // reassociate mul
4665 // TODO: Change reassociateOps to support vp ops.
4666 if (!UseVP)
4667 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4668 return RMUL;
4669
4670 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4671 // TODO: Change reassociateReduction to support vp ops.
4672 if (!UseVP)
4673 if (SDValue SD =
4674 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4675 return SD;
4676
4677 // Simplify the operands using demanded-bits information.
4679 return SDValue(N, 0);
4680
4681 return SDValue();
4682}
4683
4684/// Return true if divmod libcall is available.
4686 const TargetLowering &TLI) {
4687 RTLIB::Libcall LC;
4688 EVT NodeType = Node->getValueType(0);
4689 if (!NodeType.isSimple())
4690 return false;
4691 switch (NodeType.getSimpleVT().SimpleTy) {
4692 default: return false; // No libcall for vector types.
4693 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4694 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4695 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4696 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4697 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4698 }
4699
4700 return TLI.getLibcallName(LC) != nullptr;
4701}
4702
4703/// Issue divrem if both quotient and remainder are needed.
4704SDValue DAGCombiner::useDivRem(SDNode *Node) {
4705 if (Node->use_empty())
4706 return SDValue(); // This is a dead node, leave it alone.
4707
4708 unsigned Opcode = Node->getOpcode();
4709 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4710 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4711
4712 // DivMod lib calls can still work on non-legal types if using lib-calls.
4713 EVT VT = Node->getValueType(0);
4714 if (VT.isVector() || !VT.isInteger())
4715 return SDValue();
4716
4717 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4718 return SDValue();
4719
4720 // If DIVREM is going to get expanded into a libcall,
4721 // but there is no libcall available, then don't combine.
4722 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4724 return SDValue();
4725
4726 // If div is legal, it's better to do the normal expansion
4727 unsigned OtherOpcode = 0;
4728 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4729 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4730 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4731 return SDValue();
4732 } else {
4733 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4734 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4735 return SDValue();
4736 }
4737
4738 SDValue Op0 = Node->getOperand(0);
4739 SDValue Op1 = Node->getOperand(1);
4740 SDValue combined;
4741 for (SDNode *User : Op0->users()) {
4742 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4743 User->use_empty())
4744 continue;
4745 // Convert the other matching node(s), too;
4746 // otherwise, the DIVREM may get target-legalized into something
4747 // target-specific that we won't be able to recognize.
4748 unsigned UserOpc = User->getOpcode();
4749 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4750 User->getOperand(0) == Op0 &&
4751 User->getOperand(1) == Op1) {
4752 if (!combined) {
4753 if (UserOpc == OtherOpcode) {
4754 SDVTList VTs = DAG.getVTList(VT, VT);
4755 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4756 } else if (UserOpc == DivRemOpc) {
4757 combined = SDValue(User, 0);
4758 } else {
4759 assert(UserOpc == Opcode);
4760 continue;
4761 }
4762 }
4763 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4764 CombineTo(User, combined);
4765 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4766 CombineTo(User, combined.getValue(1));
4767 }
4768 }
4769 return combined;
4770}
4771
4773 SDValue N0 = N->getOperand(0);
4774 SDValue N1 = N->getOperand(1);
4775 EVT VT = N->getValueType(0);
4776 SDLoc DL(N);
4777
4778 unsigned Opc = N->getOpcode();
4779 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4781
4782 // X / undef -> undef
4783 // X % undef -> undef
4784 // X / 0 -> undef
4785 // X % 0 -> undef
4786 // NOTE: This includes vectors where any divisor element is zero/undef.
4787 if (DAG.isUndef(Opc, {N0, N1}))
4788 return DAG.getUNDEF(VT);
4789
4790 // undef / X -> 0
4791 // undef % X -> 0
4792 if (N0.isUndef())
4793 return DAG.getConstant(0, DL, VT);
4794
4795 // 0 / X -> 0
4796 // 0 % X -> 0
4798 if (N0C && N0C->isZero())
4799 return N0;
4800
4801 // X / X -> 1
4802 // X % X -> 0
4803 if (N0 == N1)
4804 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4805
4806 // X / 1 -> X
4807 // X % 1 -> 0
4808 // If this is a boolean op (single-bit element type), we can't have
4809 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4810 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4811 // it's a 1.
4812 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4813 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4814
4815 return SDValue();
4816}
4817
4818SDValue DAGCombiner::visitSDIV(SDNode *N) {
4819 SDValue N0 = N->getOperand(0);
4820 SDValue N1 = N->getOperand(1);
4821 EVT VT = N->getValueType(0);
4822 EVT CCVT = getSetCCResultType(VT);
4823 SDLoc DL(N);
4824
4825 // fold (sdiv c1, c2) -> c1/c2
4826 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4827 return C;
4828
4829 // fold vector ops
4830 if (VT.isVector())
4831 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4832 return FoldedVOp;
4833
4834 // fold (sdiv X, -1) -> 0-X
4836 if (N1C && N1C->isAllOnes())
4837 return DAG.getNegative(N0, DL, VT);
4838
4839 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4840 if (N1C && N1C->isMinSignedValue())
4841 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4842 DAG.getConstant(1, DL, VT),
4843 DAG.getConstant(0, DL, VT));
4844
4845 if (SDValue V = simplifyDivRem(N, DAG))
4846 return V;
4847
4848 if (SDValue NewSel = foldBinOpIntoSelect(N))
4849 return NewSel;
4850
4851 // If we know the sign bits of both operands are zero, strength reduce to a
4852 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4853 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4854 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4855
4856 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4857 // If the corresponding remainder node exists, update its users with
4858 // (Dividend - (Quotient * Divisor).
4859 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4860 { N0, N1 })) {
4861 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4862 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4863 AddToWorklist(Mul.getNode());
4864 AddToWorklist(Sub.getNode());
4865 CombineTo(RemNode, Sub);
4866 }
4867 return V;
4868 }
4869
4870 // sdiv, srem -> sdivrem
4871 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4872 // true. Otherwise, we break the simplification logic in visitREM().
4874 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4875 if (SDValue DivRem = useDivRem(N))
4876 return DivRem;
4877
4878 return SDValue();
4879}
4880
4881static bool isDivisorPowerOfTwo(SDValue Divisor) {
4882 // Helper for determining whether a value is a power-2 constant scalar or a
4883 // vector of such elements.
4884 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4885 if (C->isZero() || C->isOpaque())
4886 return false;
4887 if (C->getAPIntValue().isPowerOf2())
4888 return true;
4889 if (C->getAPIntValue().isNegatedPowerOf2())
4890 return true;
4891 return false;
4892 };
4893
4894 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4895}
4896
4897SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4898 SDLoc DL(N);
4899 EVT VT = N->getValueType(0);
4900 EVT CCVT = getSetCCResultType(VT);
4901 unsigned BitWidth = VT.getScalarSizeInBits();
4902
4903 // fold (sdiv X, pow2) -> simple ops after legalize
4904 // FIXME: We check for the exact bit here because the generic lowering gives
4905 // better results in that case. The target-specific lowering should learn how
4906 // to handle exact sdivs efficiently.
4907 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4908 // Target-specific implementation of sdiv x, pow2.
4909 if (SDValue Res = BuildSDIVPow2(N))
4910 return Res;
4911
4912 // Create constants that are functions of the shift amount value.
4913 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4914 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4915 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4916 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4917 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4918 if (!isConstantOrConstantVector(Inexact))
4919 return SDValue();
4920
4921 // Splat the sign bit into the register
4922 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4923 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4924 AddToWorklist(Sign.getNode());
4925
4926 // Add (N0 < 0) ? abs2 - 1 : 0;
4927 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4928 AddToWorklist(Srl.getNode());
4929 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4930 AddToWorklist(Add.getNode());
4931 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4932 AddToWorklist(Sra.getNode());
4933
4934 // Special case: (sdiv X, 1) -> X
4935 // Special Case: (sdiv X, -1) -> 0-X
4936 SDValue One = DAG.getConstant(1, DL, VT);
4938 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4939 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4940 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4941 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4942
4943 // If dividing by a positive value, we're done. Otherwise, the result must
4944 // be negated.
4945 SDValue Zero = DAG.getConstant(0, DL, VT);
4946 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4947
4948 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4949 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4950 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4951 return Res;
4952 }
4953
4954 // If integer divide is expensive and we satisfy the requirements, emit an
4955 // alternate sequence. Targets may check function attributes for size/speed
4956 // trade-offs.
4959 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4960 if (SDValue Op = BuildSDIV(N))
4961 return Op;
4962
4963 return SDValue();
4964}
4965
4966SDValue DAGCombiner::visitUDIV(SDNode *N) {
4967 SDValue N0 = N->getOperand(0);
4968 SDValue N1 = N->getOperand(1);
4969 EVT VT = N->getValueType(0);
4970 EVT CCVT = getSetCCResultType(VT);
4971 SDLoc DL(N);
4972
4973 // fold (udiv c1, c2) -> c1/c2
4974 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4975 return C;
4976
4977 // fold vector ops
4978 if (VT.isVector())
4979 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4980 return FoldedVOp;
4981
4982 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4984 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4985 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4986 DAG.getConstant(1, DL, VT),
4987 DAG.getConstant(0, DL, VT));
4988 }
4989
4990 if (SDValue V = simplifyDivRem(N, DAG))
4991 return V;
4992
4993 if (SDValue NewSel = foldBinOpIntoSelect(N))
4994 return NewSel;
4995
4996 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4997 // If the corresponding remainder node exists, update its users with
4998 // (Dividend - (Quotient * Divisor).
4999 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5000 { N0, N1 })) {
5001 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5002 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5003 AddToWorklist(Mul.getNode());
5004 AddToWorklist(Sub.getNode());
5005 CombineTo(RemNode, Sub);
5006 }
5007 return V;
5008 }
5009
5010 // sdiv, srem -> sdivrem
5011 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5012 // true. Otherwise, we break the simplification logic in visitREM().
5014 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5015 if (SDValue DivRem = useDivRem(N))
5016 return DivRem;
5017
5018 // Simplify the operands using demanded-bits information.
5019 // We don't have demanded bits support for UDIV so this just enables constant
5020 // folding based on known bits.
5022 return SDValue(N, 0);
5023
5024 return SDValue();
5025}
5026
5027SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5028 SDLoc DL(N);
5029 EVT VT = N->getValueType(0);
5030
5031 // fold (udiv x, (1 << c)) -> x >>u c
5032 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5033 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5034 AddToWorklist(LogBase2.getNode());
5035
5036 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5037 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5038 AddToWorklist(Trunc.getNode());
5039 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5040 }
5041 }
5042
5043 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5044 if (N1.getOpcode() == ISD::SHL) {
5045 SDValue N10 = N1.getOperand(0);
5046 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5047 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5048 AddToWorklist(LogBase2.getNode());
5049
5050 EVT ADDVT = N1.getOperand(1).getValueType();
5051 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5052 AddToWorklist(Trunc.getNode());
5053 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5054 AddToWorklist(Add.getNode());
5055 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5056 }
5057 }
5058 }
5059
5060 // fold (udiv x, c) -> alternate
5063 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5064 if (SDValue Op = BuildUDIV(N))
5065 return Op;
5066
5067 return SDValue();
5068}
5069
5070SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5071 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5072 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5073 // Target-specific implementation of srem x, pow2.
5074 if (SDValue Res = BuildSREMPow2(N))
5075 return Res;
5076 }
5077 return SDValue();
5078}
5079
5080// handles ISD::SREM and ISD::UREM
5081SDValue DAGCombiner::visitREM(SDNode *N) {
5082 unsigned Opcode = N->getOpcode();
5083 SDValue N0 = N->getOperand(0);
5084 SDValue N1 = N->getOperand(1);
5085 EVT VT = N->getValueType(0);
5086 EVT CCVT = getSetCCResultType(VT);
5087
5088 bool isSigned = (Opcode == ISD::SREM);
5089 SDLoc DL(N);
5090
5091 // fold (rem c1, c2) -> c1%c2
5092 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5093 return C;
5094
5095 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5096 // Freeze the numerator to avoid a miscompile with an undefined value.
5097 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5098 CCVT.isVector() == VT.isVector()) {
5099 SDValue F0 = DAG.getFreeze(N0);
5100 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5101 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5102 }
5103
5104 if (SDValue V = simplifyDivRem(N, DAG))
5105 return V;
5106
5107 if (SDValue NewSel = foldBinOpIntoSelect(N))
5108 return NewSel;
5109
5110 if (isSigned) {
5111 // If we know the sign bits of both operands are zero, strength reduce to a
5112 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5113 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5114 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5115 } else {
5116 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5117 // fold (urem x, pow2) -> (and x, pow2-1)
5118 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5119 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5120 AddToWorklist(Add.getNode());
5121 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5122 }
5123 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5124 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5125 // TODO: We should sink the following into isKnownToBePowerOfTwo
5126 // using a OrZero parameter analogous to our handling in ValueTracking.
5127 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5129 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5130 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5131 AddToWorklist(Add.getNode());
5132 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5133 }
5134 }
5135
5137
5138 // If X/C can be simplified by the division-by-constant logic, lower
5139 // X%C to the equivalent of X-X/C*C.
5140 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5141 // speculative DIV must not cause a DIVREM conversion. We guard against this
5142 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5143 // combine will not return a DIVREM. Regardless, checking cheapness here
5144 // makes sense since the simplification results in fatter code.
5145 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5146 if (isSigned) {
5147 // check if we can build faster implementation for srem
5148 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5149 return OptimizedRem;
5150 }
5151
5152 SDValue OptimizedDiv =
5153 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5154 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5155 // If the equivalent Div node also exists, update its users.
5156 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5157 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5158 { N0, N1 }))
5159 CombineTo(DivNode, OptimizedDiv);
5160 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5161 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5162 AddToWorklist(OptimizedDiv.getNode());
5163 AddToWorklist(Mul.getNode());
5164 return Sub;
5165 }
5166 }
5167
5168 // sdiv, srem -> sdivrem
5169 if (SDValue DivRem = useDivRem(N))
5170 return DivRem.getValue(1);
5171
5172 return SDValue();
5173}
5174
5175SDValue DAGCombiner::visitMULHS(SDNode *N) {
5176 SDValue N0 = N->getOperand(0);
5177 SDValue N1 = N->getOperand(1);
5178 EVT VT = N->getValueType(0);
5179 SDLoc DL(N);
5180
5181 // fold (mulhs c1, c2)
5182 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5183 return C;
5184
5185 // canonicalize constant to RHS.
5188 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5189
5190 if (VT.isVector()) {
5191 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5192 return FoldedVOp;
5193
5194 // fold (mulhs x, 0) -> 0
5195 // do not return N1, because undef node may exist.
5197 return DAG.getConstant(0, DL, VT);
5198 }
5199
5200 // fold (mulhs x, 0) -> 0
5201 if (isNullConstant(N1))
5202 return N1;
5203
5204 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5205 if (isOneConstant(N1))
5206 return DAG.getNode(
5207 ISD::SRA, DL, VT, N0,
5209
5210 // fold (mulhs x, undef) -> 0
5211 if (N0.isUndef() || N1.isUndef())
5212 return DAG.getConstant(0, DL, VT);
5213
5214 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5215 // plus a shift.
5216 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5217 !VT.isVector()) {
5218 MVT Simple = VT.getSimpleVT();
5219 unsigned SimpleSize = Simple.getSizeInBits();
5220 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5221 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5222 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5223 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5224 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5225 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5226 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5227 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5228 }
5229 }
5230
5231 return SDValue();
5232}
5233
5234SDValue DAGCombiner::visitMULHU(SDNode *N) {
5235 SDValue N0 = N->getOperand(0);
5236 SDValue N1 = N->getOperand(1);
5237 EVT VT = N->getValueType(0);
5238 SDLoc DL(N);
5239
5240 // fold (mulhu c1, c2)
5241 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5242 return C;
5243
5244 // canonicalize constant to RHS.
5247 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5248
5249 if (VT.isVector()) {
5250 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5251 return FoldedVOp;
5252
5253 // fold (mulhu x, 0) -> 0
5254 // do not return N1, because undef node may exist.
5256 return DAG.getConstant(0, DL, VT);
5257 }
5258
5259 // fold (mulhu x, 0) -> 0
5260 if (isNullConstant(N1))
5261 return N1;
5262
5263 // fold (mulhu x, 1) -> 0
5264 if (isOneConstant(N1))
5265 return DAG.getConstant(0, DL, VT);
5266
5267 // fold (mulhu x, undef) -> 0
5268 if (N0.isUndef() || N1.isUndef())
5269 return DAG.getConstant(0, DL, VT);
5270
5271 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5272 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5273 hasOperation(ISD::SRL, VT)) {
5274 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5275 unsigned NumEltBits = VT.getScalarSizeInBits();
5276 SDValue SRLAmt = DAG.getNode(
5277 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5278 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5279 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5280 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5281 }
5282 }
5283
5284 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5285 // plus a shift.
5286 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5287 !VT.isVector()) {
5288 MVT Simple = VT.getSimpleVT();
5289 unsigned SimpleSize = Simple.getSizeInBits();
5290 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5291 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5292 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5293 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5294 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5295 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5296 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5297 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5298 }
5299 }
5300
5301 // Simplify the operands using demanded-bits information.
5302 // We don't have demanded bits support for MULHU so this just enables constant
5303 // folding based on known bits.
5305 return SDValue(N, 0);
5306
5307 return SDValue();
5308}
5309
5310SDValue DAGCombiner::visitAVG(SDNode *N) {
5311 unsigned Opcode = N->getOpcode();
5312 SDValue N0 = N->getOperand(0);
5313 SDValue N1 = N->getOperand(1);
5314 EVT VT = N->getValueType(0);
5315 SDLoc DL(N);
5316 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5317
5318 // fold (avg c1, c2)
5319 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5320 return C;
5321
5322 // canonicalize constant to RHS.
5325 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5326
5327 if (VT.isVector())
5328 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5329 return FoldedVOp;
5330
5331 // fold (avg x, undef) -> x
5332 if (N0.isUndef())
5333 return N1;
5334 if (N1.isUndef())
5335 return N0;
5336
5337 // fold (avg x, x) --> x
5338 if (N0 == N1 && Level >= AfterLegalizeTypes)
5339 return N0;
5340
5341 // fold (avgfloor x, 0) -> x >> 1
5342 SDValue X, Y;
5344 return DAG.getNode(ISD::SRA, DL, VT, X,
5345 DAG.getShiftAmountConstant(1, VT, DL));
5347 return DAG.getNode(ISD::SRL, DL, VT, X,
5348 DAG.getShiftAmountConstant(1, VT, DL));
5349
5350 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5351 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5352 if (!IsSigned &&
5353 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5354 X.getValueType() == Y.getValueType() &&
5355 hasOperation(Opcode, X.getValueType())) {
5356 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5357 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5358 }
5359 if (IsSigned &&
5360 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5361 X.getValueType() == Y.getValueType() &&
5362 hasOperation(Opcode, X.getValueType())) {
5363 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5364 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5365 }
5366
5367 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5368 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5369 // Check if avgflooru isn't legal/custom but avgceilu is.
5370 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5371 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5372 if (DAG.isKnownNeverZero(N1))
5373 return DAG.getNode(
5374 ISD::AVGCEILU, DL, VT, N0,
5375 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5376 if (DAG.isKnownNeverZero(N0))
5377 return DAG.getNode(
5378 ISD::AVGCEILU, DL, VT, N1,
5379 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5380 }
5381
5382 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5383 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5384 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5385 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5386 SDValue Add;
5387 if (sd_match(N,
5388 m_c_BinOp(Opcode,
5390 m_One())) ||
5391 sd_match(N, m_c_BinOp(Opcode,
5393 m_Value(Y)))) {
5394
5395 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5396 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5397
5398 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5399 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5400 }
5401 }
5402
5403 return SDValue();
5404}
5405
5406SDValue DAGCombiner::visitABD(SDNode *N) {
5407 unsigned Opcode = N->getOpcode();
5408 SDValue N0 = N->getOperand(0);
5409 SDValue N1 = N->getOperand(1);
5410 EVT VT = N->getValueType(0);
5411 SDLoc DL(N);
5412
5413 // fold (abd c1, c2)
5414 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5415 return C;
5416
5417 // canonicalize constant to RHS.
5420 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5421
5422 if (VT.isVector())
5423 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5424 return FoldedVOp;
5425
5426 // fold (abd x, undef) -> 0
5427 if (N0.isUndef() || N1.isUndef())
5428 return DAG.getConstant(0, DL, VT);
5429
5430 // fold (abd x, x) -> 0
5431 if (N0 == N1)
5432 return DAG.getConstant(0, DL, VT);
5433
5434 SDValue X;
5435
5436 // fold (abds x, 0) -> abs x
5438 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5439 return DAG.getNode(ISD::ABS, DL, VT, X);
5440
5441 // fold (abdu x, 0) -> x
5443 return X;
5444
5445 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5446 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5447 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5448 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5449
5450 return SDValue();
5451}
5452
5453/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5454/// give the opcodes for the two computations that are being performed. Return
5455/// true if a simplification was made.
5456SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5457 unsigned HiOp) {
5458 // If the high half is not needed, just compute the low half.
5459 bool HiExists = N->hasAnyUseOfValue(1);
5460 if (!HiExists && (!LegalOperations ||
5461 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5462 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5463 return CombineTo(N, Res, Res);
5464 }
5465
5466 // If the low half is not needed, just compute the high half.
5467 bool LoExists = N->hasAnyUseOfValue(0);
5468 if (!LoExists && (!LegalOperations ||
5469 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5470 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5471 return CombineTo(N, Res, Res);
5472 }
5473
5474 // If both halves are used, return as it is.
5475 if (LoExists && HiExists)
5476 return SDValue();
5477
5478 // If the two computed results can be simplified separately, separate them.
5479 if (LoExists) {
5480 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5481 AddToWorklist(Lo.getNode());
5482 SDValue LoOpt = combine(Lo.getNode());
5483 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5484 (!LegalOperations ||
5485 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5486 return CombineTo(N, LoOpt, LoOpt);
5487 }
5488
5489 if (HiExists) {
5490 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5491 AddToWorklist(Hi.getNode());
5492 SDValue HiOpt = combine(Hi.getNode());
5493 if (HiOpt.getNode() && HiOpt != Hi &&
5494 (!LegalOperations ||
5495 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5496 return CombineTo(N, HiOpt, HiOpt);
5497 }
5498
5499 return SDValue();
5500}
5501
5502SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5503 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5504 return Res;
5505
5506 SDValue N0 = N->getOperand(0);
5507 SDValue N1 = N->getOperand(1);
5508 EVT VT = N->getValueType(0);
5509 SDLoc DL(N);
5510
5511 // Constant fold.
5512 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5513 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5514
5515 // canonicalize constant to RHS (vector doesn't have to splat)
5518 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5519
5520 // If the type is twice as wide is legal, transform the mulhu to a wider
5521 // multiply plus a shift.
5522 if (VT.isSimple() && !VT.isVector()) {
5523 MVT Simple = VT.getSimpleVT();
5524 unsigned SimpleSize = Simple.getSizeInBits();
5525 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5526 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5527 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5528 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5529 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5530 // Compute the high part as N1.
5531 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5532 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5533 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5534 // Compute the low part as N0.
5535 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5536 return CombineTo(N, Lo, Hi);
5537 }
5538 }
5539
5540 return SDValue();
5541}
5542
5543SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5544 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5545 return Res;
5546
5547 SDValue N0 = N->getOperand(0);
5548 SDValue N1 = N->getOperand(1);
5549 EVT VT = N->getValueType(0);
5550 SDLoc DL(N);
5551
5552 // Constant fold.
5553 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5554 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5555
5556 // canonicalize constant to RHS (vector doesn't have to splat)
5559 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5560
5561 // (umul_lohi N0, 0) -> (0, 0)
5562 if (isNullConstant(N1)) {
5563 SDValue Zero = DAG.getConstant(0, DL, VT);
5564 return CombineTo(N, Zero, Zero);
5565 }
5566
5567 // (umul_lohi N0, 1) -> (N0, 0)
5568 if (isOneConstant(N1)) {
5569 SDValue Zero = DAG.getConstant(0, DL, VT);
5570 return CombineTo(N, N0, Zero);
5571 }
5572
5573 // If the type is twice as wide is legal, transform the mulhu to a wider
5574 // multiply plus a shift.
5575 if (VT.isSimple() && !VT.isVector()) {
5576 MVT Simple = VT.getSimpleVT();
5577 unsigned SimpleSize = Simple.getSizeInBits();
5578 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5579 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5580 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5581 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5582 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5583 // Compute the high part as N1.
5584 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5585 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5586 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5587 // Compute the low part as N0.
5588 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5589 return CombineTo(N, Lo, Hi);
5590 }
5591 }
5592
5593 return SDValue();
5594}
5595
5596SDValue DAGCombiner::visitMULO(SDNode *N) {
5597 SDValue N0 = N->getOperand(0);
5598 SDValue N1 = N->getOperand(1);
5599 EVT VT = N0.getValueType();
5600 bool IsSigned = (ISD::SMULO == N->getOpcode());
5601
5602 EVT CarryVT = N->getValueType(1);
5603 SDLoc DL(N);
5604
5607
5608 // fold operation with constant operands.
5609 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5610 // multiple results.
5611 if (N0C && N1C) {
5612 bool Overflow;
5613 APInt Result =
5614 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5615 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5616 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5617 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5618 }
5619
5620 // canonicalize constant to RHS.
5623 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5624
5625 // fold (mulo x, 0) -> 0 + no carry out
5626 if (isNullOrNullSplat(N1))
5627 return CombineTo(N, DAG.getConstant(0, DL, VT),
5628 DAG.getConstant(0, DL, CarryVT));
5629
5630 // (mulo x, 2) -> (addo x, x)
5631 // FIXME: This needs a freeze.
5632 if (N1C && N1C->getAPIntValue() == 2 &&
5633 (!IsSigned || VT.getScalarSizeInBits() > 2))
5634 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5635 N->getVTList(), N0, N0);
5636
5637 // A 1 bit SMULO overflows if both inputs are 1.
5638 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5639 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5640 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5641 DAG.getConstant(0, DL, VT), ISD::SETNE);
5642 return CombineTo(N, And, Cmp);
5643 }
5644
5645 // If it cannot overflow, transform into a mul.
5646 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5647 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5648 DAG.getConstant(0, DL, CarryVT));
5649 return SDValue();
5650}
5651
5652// Function to calculate whether the Min/Max pair of SDNodes (potentially
5653// swapped around) make a signed saturate pattern, clamping to between a signed
5654// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5655// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5656// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5657// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5659 SDValue N3, ISD::CondCode CC, unsigned &BW,
5660 bool &Unsigned, SelectionDAG &DAG) {
5661 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5662 ISD::CondCode CC) {
5663 // The compare and select operand should be the same or the select operands
5664 // should be truncated versions of the comparison.
5665 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5666 return 0;
5667 // The constants need to be the same or a truncated version of each other.
5670 if (!N1C || !N3C)
5671 return 0;
5672 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5673 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5674 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5675 return 0;
5676 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5677 };
5678
5679 // Check the initial value is a SMIN/SMAX equivalent.
5680 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5681 if (!Opcode0)
5682 return SDValue();
5683
5684 // We could only need one range check, if the fptosi could never produce
5685 // the upper value.
5686 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5687 if (isNullOrNullSplat(N3)) {
5688 EVT IntVT = N0.getValueType().getScalarType();
5689 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5690 if (FPVT.isSimple()) {
5691 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5692 const fltSemantics &Semantics = InputTy->getFltSemantics();
5693 uint32_t MinBitWidth =
5694 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5695 if (IntVT.getSizeInBits() >= MinBitWidth) {
5696 Unsigned = true;
5697 BW = PowerOf2Ceil(MinBitWidth);
5698 return N0;
5699 }
5700 }
5701 }
5702 }
5703
5704 SDValue N00, N01, N02, N03;
5705 ISD::CondCode N0CC;
5706 switch (N0.getOpcode()) {
5707 case ISD::SMIN:
5708 case ISD::SMAX:
5709 N00 = N02 = N0.getOperand(0);
5710 N01 = N03 = N0.getOperand(1);
5711 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5712 break;
5713 case ISD::SELECT_CC:
5714 N00 = N0.getOperand(0);
5715 N01 = N0.getOperand(1);
5716 N02 = N0.getOperand(2);
5717 N03 = N0.getOperand(3);
5718 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5719 break;
5720 case ISD::SELECT:
5721 case ISD::VSELECT:
5722 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5723 return SDValue();
5724 N00 = N0.getOperand(0).getOperand(0);
5725 N01 = N0.getOperand(0).getOperand(1);
5726 N02 = N0.getOperand(1);
5727 N03 = N0.getOperand(2);
5728 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5729 break;
5730 default:
5731 return SDValue();
5732 }
5733
5734 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5735 if (!Opcode1 || Opcode0 == Opcode1)
5736 return SDValue();
5737
5738 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5739 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5740 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5741 return SDValue();
5742
5743 const APInt &MinC = MinCOp->getAPIntValue();
5744 const APInt &MaxC = MaxCOp->getAPIntValue();
5745 APInt MinCPlus1 = MinC + 1;
5746 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5747 BW = MinCPlus1.exactLogBase2() + 1;
5748 Unsigned = false;
5749 return N02;
5750 }
5751
5752 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5753 BW = MinCPlus1.exactLogBase2();
5754 Unsigned = true;
5755 return N02;
5756 }
5757
5758 return SDValue();
5759}
5760
5763 SelectionDAG &DAG) {
5764 unsigned BW;
5765 bool Unsigned;
5766 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5767 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5768 return SDValue();
5769 EVT FPVT = Fp.getOperand(0).getValueType();
5770 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5771 if (FPVT.isVector())
5772 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5773 FPVT.getVectorElementCount());
5774 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5775 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5776 return SDValue();
5777 SDLoc DL(Fp);
5778 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5779 DAG.getValueType(NewVT.getScalarType()));
5780 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5781}
5782
5785 SelectionDAG &DAG) {
5786 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5787 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5788 // be truncated versions of the setcc (N0/N1).
5789 if ((N0 != N2 &&
5790 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5792 return SDValue();
5795 if (!N1C || !N3C)
5796 return SDValue();
5797 const APInt &C1 = N1C->getAPIntValue();
5798 const APInt &C3 = N3C->getAPIntValue();
5799 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5800 C1 != C3.zext(C1.getBitWidth()))
5801 return SDValue();
5802
5803 unsigned BW = (C1 + 1).exactLogBase2();
5804 EVT FPVT = N0.getOperand(0).getValueType();
5805 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5806 if (FPVT.isVector())
5807 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5808 FPVT.getVectorElementCount());
5810 FPVT, NewVT))
5811 return SDValue();
5812
5813 SDValue Sat =
5814 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5815 DAG.getValueType(NewVT.getScalarType()));
5816 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5817}
5818
5819SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5820 SDValue N0 = N->getOperand(0);
5821 SDValue N1 = N->getOperand(1);
5822 EVT VT = N0.getValueType();
5823 unsigned Opcode = N->getOpcode();
5824 SDLoc DL(N);
5825
5826 // fold operation with constant operands.
5827 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5828 return C;
5829
5830 // If the operands are the same, this is a no-op.
5831 if (N0 == N1)
5832 return N0;
5833
5834 // canonicalize constant to RHS
5837 return DAG.getNode(Opcode, DL, VT, N1, N0);
5838
5839 // fold vector ops
5840 if (VT.isVector())
5841 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5842 return FoldedVOp;
5843
5844 // reassociate minmax
5845 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5846 return RMINMAX;
5847
5848 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5849 // Only do this if:
5850 // 1. The current op isn't legal and the flipped is.
5851 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5852 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5853 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5854 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5855 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5856 unsigned AltOpcode;
5857 switch (Opcode) {
5858 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5859 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5860 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5861 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5862 default: llvm_unreachable("Unknown MINMAX opcode");
5863 }
5864 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5865 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5866 }
5867
5868 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5870 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5871 return S;
5872 if (Opcode == ISD::UMIN)
5873 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5874 return S;
5875
5876 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5877 auto ReductionOpcode = [](unsigned Opcode) {
5878 switch (Opcode) {
5879 case ISD::SMIN:
5880 return ISD::VECREDUCE_SMIN;
5881 case ISD::SMAX:
5882 return ISD::VECREDUCE_SMAX;
5883 case ISD::UMIN:
5884 return ISD::VECREDUCE_UMIN;
5885 case ISD::UMAX:
5886 return ISD::VECREDUCE_UMAX;
5887 default:
5888 llvm_unreachable("Unexpected opcode");
5889 }
5890 };
5891 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5892 SDLoc(N), VT, N0, N1))
5893 return SD;
5894
5895 // Simplify the operands using demanded-bits information.
5897 return SDValue(N, 0);
5898
5899 return SDValue();
5900}
5901
5902/// If this is a bitwise logic instruction and both operands have the same
5903/// opcode, try to sink the other opcode after the logic instruction.
5904SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5905 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5906 EVT VT = N0.getValueType();
5907 unsigned LogicOpcode = N->getOpcode();
5908 unsigned HandOpcode = N0.getOpcode();
5909 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5910 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5911
5912 // Bail early if none of these transforms apply.
5913 if (N0.getNumOperands() == 0)
5914 return SDValue();
5915
5916 // FIXME: We should check number of uses of the operands to not increase
5917 // the instruction count for all transforms.
5918
5919 // Handle size-changing casts (or sign_extend_inreg).
5920 SDValue X = N0.getOperand(0);
5921 SDValue Y = N1.getOperand(0);
5922 EVT XVT = X.getValueType();
5923 SDLoc DL(N);
5924 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5925 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5926 N0.getOperand(1) == N1.getOperand(1))) {
5927 // If both operands have other uses, this transform would create extra
5928 // instructions without eliminating anything.
5929 if (!N0.hasOneUse() && !N1.hasOneUse())
5930 return SDValue();
5931 // We need matching integer source types.
5932 if (XVT != Y.getValueType())
5933 return SDValue();
5934 // Don't create an illegal op during or after legalization. Don't ever
5935 // create an unsupported vector op.
5936 if ((VT.isVector() || LegalOperations) &&
5937 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5938 return SDValue();
5939 // Avoid infinite looping with PromoteIntBinOp.
5940 // TODO: Should we apply desirable/legal constraints to all opcodes?
5941 if ((HandOpcode == ISD::ANY_EXTEND ||
5942 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5943 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5944 return SDValue();
5945 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5946 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5947 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5948 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5949 return DAG.getNode(HandOpcode, DL, VT, Logic);
5950 }
5951
5952 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5953 if (HandOpcode == ISD::TRUNCATE) {
5954 // If both operands have other uses, this transform would create extra
5955 // instructions without eliminating anything.
5956 if (!N0.hasOneUse() && !N1.hasOneUse())
5957 return SDValue();
5958 // We need matching source types.
5959 if (XVT != Y.getValueType())
5960 return SDValue();
5961 // Don't create an illegal op during or after legalization.
5962 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5963 return SDValue();
5964 // Be extra careful sinking truncate. If it's free, there's no benefit in
5965 // widening a binop. Also, don't create a logic op on an illegal type.
5966 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5967 return SDValue();
5968 if (!TLI.isTypeLegal(XVT))
5969 return SDValue();
5970 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5971 return DAG.getNode(HandOpcode, DL, VT, Logic);
5972 }
5973
5974 // For binops SHL/SRL/SRA/AND:
5975 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5976 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5977 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5978 N0.getOperand(1) == N1.getOperand(1)) {
5979 // If either operand has other uses, this transform is not an improvement.
5980 if (!N0.hasOneUse() || !N1.hasOneUse())
5981 return SDValue();
5982 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5983 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5984 }
5985
5986 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5987 if (HandOpcode == ISD::BSWAP) {
5988 // If either operand has other uses, this transform is not an improvement.
5989 if (!N0.hasOneUse() || !N1.hasOneUse())
5990 return SDValue();
5991 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5992 return DAG.getNode(HandOpcode, DL, VT, Logic);
5993 }
5994
5995 // For funnel shifts FSHL/FSHR:
5996 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5997 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5998 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5999 N0.getOperand(2) == N1.getOperand(2)) {
6000 if (!N0.hasOneUse() || !N1.hasOneUse())
6001 return SDValue();
6002 SDValue X1 = N0.getOperand(1);
6003 SDValue Y1 = N1.getOperand(1);
6004 SDValue S = N0.getOperand(2);
6005 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6006 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6007 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6008 }
6009
6010 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6011 // Only perform this optimization up until type legalization, before
6012 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6013 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6014 // we don't want to undo this promotion.
6015 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6016 // on scalars.
6017 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6018 Level <= AfterLegalizeTypes) {
6019 // Input types must be integer and the same.
6020 if (XVT.isInteger() && XVT == Y.getValueType() &&
6021 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6022 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6023 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6024 return DAG.getNode(HandOpcode, DL, VT, Logic);
6025 }
6026 }
6027
6028 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6029 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6030 // If both shuffles use the same mask, and both shuffle within a single
6031 // vector, then it is worthwhile to move the swizzle after the operation.
6032 // The type-legalizer generates this pattern when loading illegal
6033 // vector types from memory. In many cases this allows additional shuffle
6034 // optimizations.
6035 // There are other cases where moving the shuffle after the xor/and/or
6036 // is profitable even if shuffles don't perform a swizzle.
6037 // If both shuffles use the same mask, and both shuffles have the same first
6038 // or second operand, then it might still be profitable to move the shuffle
6039 // after the xor/and/or operation.
6040 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6041 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6042 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6043 assert(X.getValueType() == Y.getValueType() &&
6044 "Inputs to shuffles are not the same type");
6045
6046 // Check that both shuffles use the same mask. The masks are known to be of
6047 // the same length because the result vector type is the same.
6048 // Check also that shuffles have only one use to avoid introducing extra
6049 // instructions.
6050 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6051 !SVN0->getMask().equals(SVN1->getMask()))
6052 return SDValue();
6053
6054 // Don't try to fold this node if it requires introducing a
6055 // build vector of all zeros that might be illegal at this stage.
6056 SDValue ShOp = N0.getOperand(1);
6057 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6058 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6059
6060 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6061 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6062 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6063 N0.getOperand(0), N1.getOperand(0));
6064 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6065 }
6066
6067 // Don't try to fold this node if it requires introducing a
6068 // build vector of all zeros that might be illegal at this stage.
6069 ShOp = N0.getOperand(0);
6070 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6071 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6072
6073 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6074 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6075 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6076 N1.getOperand(1));
6077 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6078 }
6079 }
6080
6081 return SDValue();
6082}
6083
6084/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6085SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6086 const SDLoc &DL) {
6087 SDValue LL, LR, RL, RR, N0CC, N1CC;
6088 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6089 !isSetCCEquivalent(N1, RL, RR, N1CC))
6090 return SDValue();
6091
6092 assert(N0.getValueType() == N1.getValueType() &&
6093 "Unexpected operand types for bitwise logic op");
6094 assert(LL.getValueType() == LR.getValueType() &&
6095 RL.getValueType() == RR.getValueType() &&
6096 "Unexpected operand types for setcc");
6097
6098 // If we're here post-legalization or the logic op type is not i1, the logic
6099 // op type must match a setcc result type. Also, all folds require new
6100 // operations on the left and right operands, so those types must match.
6101 EVT VT = N0.getValueType();
6102 EVT OpVT = LL.getValueType();
6103 if (LegalOperations || VT.getScalarType() != MVT::i1)
6104 if (VT != getSetCCResultType(OpVT))
6105 return SDValue();
6106 if (OpVT != RL.getValueType())
6107 return SDValue();
6108
6109 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6110 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6111 bool IsInteger = OpVT.isInteger();
6112 if (LR == RR && CC0 == CC1 && IsInteger) {
6113 bool IsZero = isNullOrNullSplat(LR);
6114 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6115
6116 // All bits clear?
6117 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6118 // All sign bits clear?
6119 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6120 // Any bits set?
6121 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6122 // Any sign bits set?
6123 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6124
6125 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6126 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6127 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6128 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6129 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6130 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6131 AddToWorklist(Or.getNode());
6132 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6133 }
6134
6135 // All bits set?
6136 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6137 // All sign bits set?
6138 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6139 // Any bits clear?
6140 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6141 // Any sign bits clear?
6142 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6143
6144 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6145 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6146 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6147 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6148 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6149 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6150 AddToWorklist(And.getNode());
6151 return DAG.getSetCC(DL, VT, And, LR, CC1);
6152 }
6153 }
6154
6155 // TODO: What is the 'or' equivalent of this fold?
6156 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6157 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6158 IsInteger && CC0 == ISD::SETNE &&
6159 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6160 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6161 SDValue One = DAG.getConstant(1, DL, OpVT);
6162 SDValue Two = DAG.getConstant(2, DL, OpVT);
6163 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6164 AddToWorklist(Add.getNode());
6165 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6166 }
6167
6168 // Try more general transforms if the predicates match and the only user of
6169 // the compares is the 'and' or 'or'.
6170 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6171 N0.hasOneUse() && N1.hasOneUse()) {
6172 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6173 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6174 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6175 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6176 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6177 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6178 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6179 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6180 }
6181
6182 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6183 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6184 // Match a shared variable operand and 2 non-opaque constant operands.
6185 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6186 // The difference of the constants must be a single bit.
6187 const APInt &CMax =
6188 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6189 const APInt &CMin =
6190 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6191 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6192 };
6193 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6194 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6195 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6196 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6197 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6198 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6199 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6200 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6201 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6202 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6203 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6204 }
6205 }
6206 }
6207
6208 // Canonicalize equivalent operands to LL == RL.
6209 if (LL == RR && LR == RL) {
6211 std::swap(RL, RR);
6212 }
6213
6214 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6215 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6216 if (LL == RL && LR == RR) {
6217 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6218 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6219 if (NewCC != ISD::SETCC_INVALID &&
6220 (!LegalOperations ||
6221 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6222 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6223 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6224 }
6225
6226 return SDValue();
6227}
6228
6229static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6230 SelectionDAG &DAG) {
6231 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6232}
6233
6234static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6235 SelectionDAG &DAG) {
6236 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6237}
6238
6239// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6240static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6241 ISD::CondCode CC, unsigned OrAndOpcode,
6242 SelectionDAG &DAG,
6243 bool isFMAXNUMFMINNUM_IEEE,
6244 bool isFMAXNUMFMINNUM) {
6245 // The optimization cannot be applied for all the predicates because
6246 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6247 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6248 // applied at all if one of the operands is a signaling NaN.
6249
6250 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6251 // are non NaN values.
6252 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6253 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6254 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6255 isFMAXNUMFMINNUM_IEEE
6258 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6259 (OrAndOpcode == ISD::OR)) ||
6260 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6261 (OrAndOpcode == ISD::AND)))
6262 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6263 isFMAXNUMFMINNUM_IEEE
6266 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6267 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6268 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6269 // that there are not any sNaNs, then the optimization is not valid
6270 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6271 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6272 // we can prove that we do not have any sNaNs, then we can do the
6273 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6274 // cases.
6275 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6276 (OrAndOpcode == ISD::OR)) ||
6277 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6278 (OrAndOpcode == ISD::AND)))
6279 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6280 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6281 isFMAXNUMFMINNUM_IEEE
6284 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6285 (OrAndOpcode == ISD::OR)) ||
6286 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6287 (OrAndOpcode == ISD::AND)))
6288 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6289 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6290 isFMAXNUMFMINNUM_IEEE
6293 return ISD::DELETED_NODE;
6294}
6295
6298 assert(
6299 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6300 "Invalid Op to combine SETCC with");
6301
6302 // TODO: Search past casts/truncates.
6303 SDValue LHS = LogicOp->getOperand(0);
6304 SDValue RHS = LogicOp->getOperand(1);
6305 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6306 !LHS->hasOneUse() || !RHS->hasOneUse())
6307 return SDValue();
6308
6309 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6311 LogicOp, LHS.getNode(), RHS.getNode());
6312
6313 SDValue LHS0 = LHS->getOperand(0);
6314 SDValue RHS0 = RHS->getOperand(0);
6315 SDValue LHS1 = LHS->getOperand(1);
6316 SDValue RHS1 = RHS->getOperand(1);
6317 // TODO: We don't actually need a splat here, for vectors we just need the
6318 // invariants to hold for each element.
6319 auto *LHS1C = isConstOrConstSplat(LHS1);
6320 auto *RHS1C = isConstOrConstSplat(RHS1);
6321 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6322 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6323 EVT VT = LogicOp->getValueType(0);
6324 EVT OpVT = LHS0.getValueType();
6325 SDLoc DL(LogicOp);
6326
6327 // Check if the operands of an and/or operation are comparisons and if they
6328 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6329 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6330 // sequence will be replaced with min-cmp sequence:
6331 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6332 // and and-cmp-cmp will be replaced with max-cmp sequence:
6333 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6334 // The optimization does not work for `==` or `!=` .
6335 // The two comparisons should have either the same predicate or the
6336 // predicate of one of the comparisons is the opposite of the other one.
6337 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6339 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6341 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6342 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6343 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6344 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6345 (OpVT.isFloatingPoint() &&
6346 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6348 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6349 CCL != ISD::SETTRUE &&
6350 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6351
6352 SDValue CommonValue, Operand1, Operand2;
6354 if (CCL == CCR) {
6355 if (LHS0 == RHS0) {
6356 CommonValue = LHS0;
6357 Operand1 = LHS1;
6358 Operand2 = RHS1;
6360 } else if (LHS1 == RHS1) {
6361 CommonValue = LHS1;
6362 Operand1 = LHS0;
6363 Operand2 = RHS0;
6364 CC = CCL;
6365 }
6366 } else {
6367 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6368 if (LHS0 == RHS1) {
6369 CommonValue = LHS0;
6370 Operand1 = LHS1;
6371 Operand2 = RHS0;
6372 CC = CCR;
6373 } else if (RHS0 == LHS1) {
6374 CommonValue = LHS1;
6375 Operand1 = LHS0;
6376 Operand2 = RHS1;
6377 CC = CCL;
6378 }
6379 }
6380
6381 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6382 // handle it using OR/AND.
6383 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6385 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6387
6388 if (CC != ISD::SETCC_INVALID) {
6389 unsigned NewOpcode = ISD::DELETED_NODE;
6390 bool IsSigned = isSignedIntSetCC(CC);
6391 if (OpVT.isInteger()) {
6392 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6393 CC == ISD::SETLT || CC == ISD::SETULT);
6394 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6395 if (IsLess == IsOr)
6396 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6397 else
6398 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6399 } else if (OpVT.isFloatingPoint())
6400 NewOpcode =
6401 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6402 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6403
6404 if (NewOpcode != ISD::DELETED_NODE) {
6405 SDValue MinMaxValue =
6406 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6407 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6408 }
6409 }
6410 }
6411
6412 if (TargetPreference == AndOrSETCCFoldKind::None)
6413 return SDValue();
6414
6415 if (CCL == CCR &&
6416 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6417 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6418 const APInt &APLhs = LHS1C->getAPIntValue();
6419 const APInt &APRhs = RHS1C->getAPIntValue();
6420
6421 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6422 // case this is just a compare).
6423 if (APLhs == (-APRhs) &&
6424 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6425 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6426 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6427 // (icmp eq A, C) | (icmp eq A, -C)
6428 // -> (icmp eq Abs(A), C)
6429 // (icmp ne A, C) & (icmp ne A, -C)
6430 // -> (icmp ne Abs(A), C)
6431 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6432 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6433 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6434 } else if (TargetPreference &
6436
6437 // AndOrSETCCFoldKind::AddAnd:
6438 // A == C0 | A == C1
6439 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6440 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6441 // A != C0 & A != C1
6442 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6443 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6444
6445 // AndOrSETCCFoldKind::NotAnd:
6446 // A == C0 | A == C1
6447 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6448 // -> ~A & smin(C0, C1) == 0
6449 // A != C0 & A != C1
6450 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6451 // -> ~A & smin(C0, C1) != 0
6452
6453 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6454 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6455 APInt Dif = MaxC - MinC;
6456 if (!Dif.isZero() && Dif.isPowerOf2()) {
6457 if (MaxC.isAllOnes() &&
6458 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6459 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6460 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6461 DAG.getConstant(MinC, DL, OpVT));
6462 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6463 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6464 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6465
6466 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6467 DAG.getConstant(-MinC, DL, OpVT));
6468 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6469 DAG.getConstant(~Dif, DL, OpVT));
6470 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6471 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6472 }
6473 }
6474 }
6475 }
6476
6477 return SDValue();
6478}
6479
6480// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6481// We canonicalize to the `select` form in the middle end, but the `and` form
6482// gets better codegen and all tested targets (arm, x86, riscv)
6484 const SDLoc &DL, SelectionDAG &DAG) {
6485 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6486 if (!isNullConstant(F))
6487 return SDValue();
6488
6489 EVT CondVT = Cond.getValueType();
6490 if (TLI.getBooleanContents(CondVT) !=
6492 return SDValue();
6493
6494 if (T.getOpcode() != ISD::AND)
6495 return SDValue();
6496
6497 if (!isOneConstant(T.getOperand(1)))
6498 return SDValue();
6499
6500 EVT OpVT = T.getValueType();
6501
6502 SDValue CondMask =
6503 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6504 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6505}
6506
6507/// This contains all DAGCombine rules which reduce two values combined by
6508/// an And operation to a single value. This makes them reusable in the context
6509/// of visitSELECT(). Rules involving constants are not included as
6510/// visitSELECT() already handles those cases.
6511SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6512 EVT VT = N1.getValueType();
6513 SDLoc DL(N);
6514
6515 // fold (and x, undef) -> 0
6516 if (N0.isUndef() || N1.isUndef())
6517 return DAG.getConstant(0, DL, VT);
6518
6519 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6520 return V;
6521
6522 // Canonicalize:
6523 // and(x, add) -> and(add, x)
6524 if (N1.getOpcode() == ISD::ADD)
6525 std::swap(N0, N1);
6526
6527 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6528 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6529 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6530 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6531 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6532 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6533 // immediate for an add, but it is legal if its top c2 bits are set,
6534 // transform the ADD so the immediate doesn't need to be materialized
6535 // in a register.
6536 APInt ADDC = ADDI->getAPIntValue();
6537 APInt SRLC = SRLI->getAPIntValue();
6538 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6539 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6541 SRLC.getZExtValue());
6542 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6543 ADDC |= Mask;
6544 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6545 SDLoc DL0(N0);
6546 SDValue NewAdd =
6547 DAG.getNode(ISD::ADD, DL0, VT,
6548 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6549 CombineTo(N0.getNode(), NewAdd);
6550 // Return N so it doesn't get rechecked!
6551 return SDValue(N, 0);
6552 }
6553 }
6554 }
6555 }
6556 }
6557 }
6558
6559 return SDValue();
6560}
6561
6562bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6563 EVT LoadResultTy, EVT &ExtVT) {
6564 if (!AndC->getAPIntValue().isMask())
6565 return false;
6566
6567 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6568
6569 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6570 EVT LoadedVT = LoadN->getMemoryVT();
6571
6572 if (ExtVT == LoadedVT &&
6573 (!LegalOperations ||
6574 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6575 // ZEXTLOAD will match without needing to change the size of the value being
6576 // loaded.
6577 return true;
6578 }
6579
6580 // Do not change the width of a volatile or atomic loads.
6581 if (!LoadN->isSimple())
6582 return false;
6583
6584 // Do not generate loads of non-round integer types since these can
6585 // be expensive (and would be wrong if the type is not byte sized).
6586 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6587 return false;
6588
6589 if (LegalOperations &&
6590 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6591 return false;
6592
6593 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6594 return false;
6595
6596 return true;
6597}
6598
6599bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6600 ISD::LoadExtType ExtType, EVT &MemVT,
6601 unsigned ShAmt) {
6602 if (!LDST)
6603 return false;
6604 // Only allow byte offsets.
6605 if (ShAmt % 8)
6606 return false;
6607
6608 // Do not generate loads of non-round integer types since these can
6609 // be expensive (and would be wrong if the type is not byte sized).
6610 if (!MemVT.isRound())
6611 return false;
6612
6613 // Don't change the width of a volatile or atomic loads.
6614 if (!LDST->isSimple())
6615 return false;
6616
6617 EVT LdStMemVT = LDST->getMemoryVT();
6618
6619 // Bail out when changing the scalable property, since we can't be sure that
6620 // we're actually narrowing here.
6621 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6622 return false;
6623
6624 // Verify that we are actually reducing a load width here.
6625 if (LdStMemVT.bitsLT(MemVT))
6626 return false;
6627
6628 // Ensure that this isn't going to produce an unsupported memory access.
6629 if (ShAmt) {
6630 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6631 const unsigned ByteShAmt = ShAmt / 8;
6632 const Align LDSTAlign = LDST->getAlign();
6633 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6634 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6635 LDST->getAddressSpace(), NarrowAlign,
6636 LDST->getMemOperand()->getFlags()))
6637 return false;
6638 }
6639
6640 // It's not possible to generate a constant of extended or untyped type.
6641 EVT PtrType = LDST->getBasePtr().getValueType();
6642 if (PtrType == MVT::Untyped || PtrType.isExtended())
6643 return false;
6644
6645 if (isa<LoadSDNode>(LDST)) {
6646 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6647 // Don't transform one with multiple uses, this would require adding a new
6648 // load.
6649 if (!SDValue(Load, 0).hasOneUse())
6650 return false;
6651
6652 if (LegalOperations &&
6653 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6654 return false;
6655
6656 // For the transform to be legal, the load must produce only two values
6657 // (the value loaded and the chain). Don't transform a pre-increment
6658 // load, for example, which produces an extra value. Otherwise the
6659 // transformation is not equivalent, and the downstream logic to replace
6660 // uses gets things wrong.
6661 if (Load->getNumValues() > 2)
6662 return false;
6663
6664 // If the load that we're shrinking is an extload and we're not just
6665 // discarding the extension we can't simply shrink the load. Bail.
6666 // TODO: It would be possible to merge the extensions in some cases.
6667 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6668 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6669 return false;
6670
6671 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6672 return false;
6673 } else {
6674 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6675 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6676 // Can't write outside the original store
6677 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6678 return false;
6679
6680 if (LegalOperations &&
6681 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6682 return false;
6683 }
6684 return true;
6685}
6686
6687bool DAGCombiner::SearchForAndLoads(SDNode *N,
6689 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6690 ConstantSDNode *Mask,
6691 SDNode *&NodeToMask) {
6692 // Recursively search for the operands, looking for loads which can be
6693 // narrowed.
6694 for (SDValue Op : N->op_values()) {
6695 if (Op.getValueType().isVector())
6696 return false;
6697
6698 // Some constants may need fixing up later if they are too large.
6699 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6700 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6701 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6702 NodesWithConsts.insert(N);
6703 continue;
6704 }
6705
6706 if (!Op.hasOneUse())
6707 return false;
6708
6709 switch(Op.getOpcode()) {
6710 case ISD::LOAD: {
6711 auto *Load = cast<LoadSDNode>(Op);
6712 EVT ExtVT;
6713 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6714 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6715
6716 // ZEXTLOAD is already small enough.
6717 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6718 ExtVT.bitsGE(Load->getMemoryVT()))
6719 continue;
6720
6721 // Use LE to convert equal sized loads to zext.
6722 if (ExtVT.bitsLE(Load->getMemoryVT()))
6723 Loads.push_back(Load);
6724
6725 continue;
6726 }
6727 return false;
6728 }
6729 case ISD::ZERO_EXTEND:
6730 case ISD::AssertZext: {
6731 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6732 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6733 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6734 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6735 Op.getOperand(0).getValueType();
6736
6737 // We can accept extending nodes if the mask is wider or an equal
6738 // width to the original type.
6739 if (ExtVT.bitsGE(VT))
6740 continue;
6741 break;
6742 }
6743 case ISD::OR:
6744 case ISD::XOR:
6745 case ISD::AND:
6746 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6747 NodeToMask))
6748 return false;
6749 continue;
6750 }
6751
6752 // Allow one node which will masked along with any loads found.
6753 if (NodeToMask)
6754 return false;
6755
6756 // Also ensure that the node to be masked only produces one data result.
6757 NodeToMask = Op.getNode();
6758 if (NodeToMask->getNumValues() > 1) {
6759 bool HasValue = false;
6760 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6761 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6762 if (VT != MVT::Glue && VT != MVT::Other) {
6763 if (HasValue) {
6764 NodeToMask = nullptr;
6765 return false;
6766 }
6767 HasValue = true;
6768 }
6769 }
6770 assert(HasValue && "Node to be masked has no data result?");
6771 }
6772 }
6773 return true;
6774}
6775
6776bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6777 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6778 if (!Mask)
6779 return false;
6780
6781 if (!Mask->getAPIntValue().isMask())
6782 return false;
6783
6784 // No need to do anything if the and directly uses a load.
6785 if (isa<LoadSDNode>(N->getOperand(0)))
6786 return false;
6787
6789 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6790 SDNode *FixupNode = nullptr;
6791 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6792 if (Loads.empty())
6793 return false;
6794
6795 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6796 SDValue MaskOp = N->getOperand(1);
6797
6798 // If it exists, fixup the single node we allow in the tree that needs
6799 // masking.
6800 if (FixupNode) {
6801 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6802 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6803 FixupNode->getValueType(0),
6804 SDValue(FixupNode, 0), MaskOp);
6805 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6806 if (And.getOpcode() == ISD ::AND)
6807 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6808 }
6809
6810 // Narrow any constants that need it.
6811 for (auto *LogicN : NodesWithConsts) {
6812 SDValue Op0 = LogicN->getOperand(0);
6813 SDValue Op1 = LogicN->getOperand(1);
6814
6815 if (isa<ConstantSDNode>(Op0))
6816 Op0 =
6817 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6818
6819 if (isa<ConstantSDNode>(Op1))
6820 Op1 =
6821 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6822
6823 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6824 std::swap(Op0, Op1);
6825
6826 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6827 }
6828
6829 // Create narrow loads.
6830 for (auto *Load : Loads) {
6831 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6832 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6833 SDValue(Load, 0), MaskOp);
6834 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6835 if (And.getOpcode() == ISD ::AND)
6836 And = SDValue(
6837 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6838 SDValue NewLoad = reduceLoadWidth(And.getNode());
6839 assert(NewLoad &&
6840 "Shouldn't be masking the load if it can't be narrowed");
6841 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6842 }
6843 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6844 return true;
6845 }
6846 return false;
6847}
6848
6849// Unfold
6850// x & (-1 'logical shift' y)
6851// To
6852// (x 'opposite logical shift' y) 'logical shift' y
6853// if it is better for performance.
6854SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6855 assert(N->getOpcode() == ISD::AND);
6856
6857 SDValue N0 = N->getOperand(0);
6858 SDValue N1 = N->getOperand(1);
6859
6860 // Do we actually prefer shifts over mask?
6862 return SDValue();
6863
6864 // Try to match (-1 '[outer] logical shift' y)
6865 unsigned OuterShift;
6866 unsigned InnerShift; // The opposite direction to the OuterShift.
6867 SDValue Y; // Shift amount.
6868 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6869 if (!M.hasOneUse())
6870 return false;
6871 OuterShift = M->getOpcode();
6872 if (OuterShift == ISD::SHL)
6873 InnerShift = ISD::SRL;
6874 else if (OuterShift == ISD::SRL)
6875 InnerShift = ISD::SHL;
6876 else
6877 return false;
6878 if (!isAllOnesConstant(M->getOperand(0)))
6879 return false;
6880 Y = M->getOperand(1);
6881 return true;
6882 };
6883
6884 SDValue X;
6885 if (matchMask(N1))
6886 X = N0;
6887 else if (matchMask(N0))
6888 X = N1;
6889 else
6890 return SDValue();
6891
6892 SDLoc DL(N);
6893 EVT VT = N->getValueType(0);
6894
6895 // tmp = x 'opposite logical shift' y
6896 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6897 // ret = tmp 'logical shift' y
6898 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6899
6900 return T1;
6901}
6902
6903/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6904/// For a target with a bit test, this is expected to become test + set and save
6905/// at least 1 instruction.
6907 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6908
6909 // Look through an optional extension.
6910 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6911 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6912 And0 = And0.getOperand(0);
6913 if (!isOneConstant(And1) || !And0.hasOneUse())
6914 return SDValue();
6915
6916 SDValue Src = And0;
6917
6918 // Attempt to find a 'not' op.
6919 // TODO: Should we favor test+set even without the 'not' op?
6920 bool FoundNot = false;
6921 if (isBitwiseNot(Src)) {
6922 FoundNot = true;
6923 Src = Src.getOperand(0);
6924
6925 // Look though an optional truncation. The source operand may not be the
6926 // same type as the original 'and', but that is ok because we are masking
6927 // off everything but the low bit.
6928 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6929 Src = Src.getOperand(0);
6930 }
6931
6932 // Match a shift-right by constant.
6933 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6934 return SDValue();
6935
6936 // This is probably not worthwhile without a supported type.
6937 EVT SrcVT = Src.getValueType();
6938 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6939 if (!TLI.isTypeLegal(SrcVT))
6940 return SDValue();
6941
6942 // We might have looked through casts that make this transform invalid.
6943 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6944 SDValue ShiftAmt = Src.getOperand(1);
6945 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6946 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6947 return SDValue();
6948
6949 // Set source to shift source.
6950 Src = Src.getOperand(0);
6951
6952 // Try again to find a 'not' op.
6953 // TODO: Should we favor test+set even with two 'not' ops?
6954 if (!FoundNot) {
6955 if (!isBitwiseNot(Src))
6956 return SDValue();
6957 Src = Src.getOperand(0);
6958 }
6959
6960 if (!TLI.hasBitTest(Src, ShiftAmt))
6961 return SDValue();
6962
6963 // Turn this into a bit-test pattern using mask op + setcc:
6964 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6965 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6966 SDLoc DL(And);
6967 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6968 EVT CCVT =
6969 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6970 SDValue Mask = DAG.getConstant(
6971 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6972 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6973 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6974 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6975 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6976}
6977
6978/// For targets that support usubsat, match a bit-hack form of that operation
6979/// that ends in 'and' and convert it.
6981 EVT VT = N->getValueType(0);
6982 unsigned BitWidth = VT.getScalarSizeInBits();
6983 APInt SignMask = APInt::getSignMask(BitWidth);
6984
6985 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6986 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6987 // xor/add with SMIN (signmask) are logically equivalent.
6988 SDValue X;
6989 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6991 m_SpecificInt(BitWidth - 1))))) &&
6994 m_SpecificInt(BitWidth - 1))))))
6995 return SDValue();
6996
6997 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6998 DAG.getConstant(SignMask, DL, VT));
6999}
7000
7001/// Given a bitwise logic operation N with a matching bitwise logic operand,
7002/// fold a pattern where 2 of the source operands are identically shifted
7003/// values. For example:
7004/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7006 SelectionDAG &DAG) {
7007 unsigned LogicOpcode = N->getOpcode();
7008 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7009 "Expected bitwise logic operation");
7010
7011 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7012 return SDValue();
7013
7014 // Match another bitwise logic op and a shift.
7015 unsigned ShiftOpcode = ShiftOp.getOpcode();
7016 if (LogicOp.getOpcode() != LogicOpcode ||
7017 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7018 ShiftOpcode == ISD::SRA))
7019 return SDValue();
7020
7021 // Match another shift op inside the first logic operand. Handle both commuted
7022 // possibilities.
7023 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7024 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7025 SDValue X1 = ShiftOp.getOperand(0);
7026 SDValue Y = ShiftOp.getOperand(1);
7027 SDValue X0, Z;
7028 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7029 LogicOp.getOperand(0).getOperand(1) == Y) {
7030 X0 = LogicOp.getOperand(0).getOperand(0);
7031 Z = LogicOp.getOperand(1);
7032 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7033 LogicOp.getOperand(1).getOperand(1) == Y) {
7034 X0 = LogicOp.getOperand(1).getOperand(0);
7035 Z = LogicOp.getOperand(0);
7036 } else {
7037 return SDValue();
7038 }
7039
7040 EVT VT = N->getValueType(0);
7041 SDLoc DL(N);
7042 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7043 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7044 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7045}
7046
7047/// Given a tree of logic operations with shape like
7048/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7049/// try to match and fold shift operations with the same shift amount.
7050/// For example:
7051/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7052/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7054 SDValue RightHand, SelectionDAG &DAG) {
7055 unsigned LogicOpcode = N->getOpcode();
7056 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7057 "Expected bitwise logic operation");
7058 if (LeftHand.getOpcode() != LogicOpcode ||
7059 RightHand.getOpcode() != LogicOpcode)
7060 return SDValue();
7061 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7062 return SDValue();
7063
7064 // Try to match one of following patterns:
7065 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7066 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7067 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7068 // itself.
7069 SDValue CombinedShifts, W;
7070 SDValue R0 = RightHand.getOperand(0);
7071 SDValue R1 = RightHand.getOperand(1);
7072 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7073 W = R1;
7074 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7075 W = R0;
7076 else
7077 return SDValue();
7078
7079 EVT VT = N->getValueType(0);
7080 SDLoc DL(N);
7081 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7082}
7083
7084SDValue DAGCombiner::visitAND(SDNode *N) {
7085 SDValue N0 = N->getOperand(0);
7086 SDValue N1 = N->getOperand(1);
7087 EVT VT = N1.getValueType();
7088 SDLoc DL(N);
7089
7090 // x & x --> x
7091 if (N0 == N1)
7092 return N0;
7093
7094 // fold (and c1, c2) -> c1&c2
7095 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7096 return C;
7097
7098 // canonicalize constant to RHS
7101 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7102
7103 if (areBitwiseNotOfEachother(N0, N1))
7104 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7105
7106 // fold vector ops
7107 if (VT.isVector()) {
7108 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7109 return FoldedVOp;
7110
7111 // fold (and x, 0) -> 0, vector edition
7113 // do not return N1, because undef node may exist in N1
7115 N1.getValueType());
7116
7117 // fold (and x, -1) -> x, vector edition
7119 return N0;
7120
7121 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7122 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7123 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7124 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7125 EVT LoadVT = MLoad->getMemoryVT();
7126 EVT ExtVT = VT;
7127 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7128 // For this AND to be a zero extension of the masked load the elements
7129 // of the BuildVec must mask the bottom bits of the extended element
7130 // type
7131 uint64_t ElementSize =
7133 if (Splat->getAPIntValue().isMask(ElementSize)) {
7134 SDValue NewLoad = DAG.getMaskedLoad(
7135 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7136 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7137 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7138 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7139 bool LoadHasOtherUsers = !N0.hasOneUse();
7140 CombineTo(N, NewLoad);
7141 if (LoadHasOtherUsers)
7142 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7143 return SDValue(N, 0);
7144 }
7145 }
7146 }
7147 }
7148
7149 // fold (and x, -1) -> x
7150 if (isAllOnesConstant(N1))
7151 return N0;
7152
7153 // if (and x, c) is known to be zero, return 0
7154 unsigned BitWidth = VT.getScalarSizeInBits();
7157 return DAG.getConstant(0, DL, VT);
7158
7159 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7160 return R;
7161
7162 if (SDValue NewSel = foldBinOpIntoSelect(N))
7163 return NewSel;
7164
7165 // reassociate and
7166 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7167 return RAND;
7168
7169 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7170 if (SDValue SD =
7171 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7172 return SD;
7173
7174 // fold (and (or x, C), D) -> D if (C & D) == D
7175 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7176 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7177 };
7178 if (N0.getOpcode() == ISD::OR &&
7179 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7180 return N1;
7181
7182 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7183 SDValue N0Op0 = N0.getOperand(0);
7184 EVT SrcVT = N0Op0.getValueType();
7185 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7186 APInt Mask = ~N1C->getAPIntValue();
7187 Mask = Mask.trunc(SrcBitWidth);
7188
7189 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7190 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7191 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7192
7193 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7194 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7195 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7196 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7197 TLI.isNarrowingProfitable(N, VT, SrcVT))
7198 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7199 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7200 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7201 }
7202
7203 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7204 if (ISD::isExtOpcode(N0.getOpcode())) {
7205 unsigned ExtOpc = N0.getOpcode();
7206 SDValue N0Op0 = N0.getOperand(0);
7207 if (N0Op0.getOpcode() == ISD::AND &&
7208 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7209 N0->hasOneUse() && N0Op0->hasOneUse()) {
7210 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7211 {N0Op0.getOperand(1)})) {
7212 if (SDValue NewMask =
7213 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7214 return DAG.getNode(ISD::AND, DL, VT,
7215 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7216 NewMask);
7217 }
7218 }
7219 }
7220 }
7221
7222 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7223 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7224 // already be zero by virtue of the width of the base type of the load.
7225 //
7226 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7227 // more cases.
7228 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7230 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7231 N0.getOperand(0).getResNo() == 0) ||
7232 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7233 auto *Load =
7234 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7235
7236 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7237 // This can be a pure constant or a vector splat, in which case we treat the
7238 // vector as a scalar and use the splat value.
7241 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7242 Constant = C->getAPIntValue();
7243 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7244 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7245 APInt SplatValue, SplatUndef;
7246 unsigned SplatBitSize;
7247 bool HasAnyUndefs;
7248 // Endianness should not matter here. Code below makes sure that we only
7249 // use the result if the SplatBitSize is a multiple of the vector element
7250 // size. And after that we AND all element sized parts of the splat
7251 // together. So the end result should be the same regardless of in which
7252 // order we do those operations.
7253 const bool IsBigEndian = false;
7254 bool IsSplat =
7255 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7256 HasAnyUndefs, EltBitWidth, IsBigEndian);
7257
7258 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7259 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7260 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7261 // Undef bits can contribute to a possible optimisation if set, so
7262 // set them.
7263 SplatValue |= SplatUndef;
7264
7265 // The splat value may be something like "0x00FFFFFF", which means 0 for
7266 // the first vector value and FF for the rest, repeating. We need a mask
7267 // that will apply equally to all members of the vector, so AND all the
7268 // lanes of the constant together.
7269 Constant = APInt::getAllOnes(EltBitWidth);
7270 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7271 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7272 }
7273 }
7274
7275 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7276 // actually legal and isn't going to get expanded, else this is a false
7277 // optimisation.
7278 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7279 Load->getValueType(0),
7280 Load->getMemoryVT());
7281
7282 // Resize the constant to the same size as the original memory access before
7283 // extension. If it is still the AllOnesValue then this AND is completely
7284 // unneeded.
7285 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7286
7287 bool B;
7288 switch (Load->getExtensionType()) {
7289 default: B = false; break;
7290 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7291 case ISD::ZEXTLOAD:
7292 case ISD::NON_EXTLOAD: B = true; break;
7293 }
7294
7295 if (B && Constant.isAllOnes()) {
7296 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7297 // preserve semantics once we get rid of the AND.
7298 SDValue NewLoad(Load, 0);
7299
7300 // Fold the AND away. NewLoad may get replaced immediately.
7301 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7302
7303 if (Load->getExtensionType() == ISD::EXTLOAD) {
7304 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7305 Load->getValueType(0), SDLoc(Load),
7306 Load->getChain(), Load->getBasePtr(),
7307 Load->getOffset(), Load->getMemoryVT(),
7308 Load->getMemOperand());
7309 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7310 if (Load->getNumValues() == 3) {
7311 // PRE/POST_INC loads have 3 values.
7312 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7313 NewLoad.getValue(2) };
7314 CombineTo(Load, To, 3, true);
7315 } else {
7316 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7317 }
7318 }
7319
7320 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7321 }
7322 }
7323
7324 // Try to convert a constant mask AND into a shuffle clear mask.
7325 if (VT.isVector())
7326 if (SDValue Shuffle = XformToShuffleWithZero(N))
7327 return Shuffle;
7328
7329 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7330 return Combined;
7331
7332 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7334 SDValue Ext = N0.getOperand(0);
7335 EVT ExtVT = Ext->getValueType(0);
7336 SDValue Extendee = Ext->getOperand(0);
7337
7338 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7339 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7340 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7341 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7342 // => (extract_subvector (iN_zeroext v))
7343 SDValue ZeroExtExtendee =
7344 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7345
7346 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7347 N0.getOperand(1));
7348 }
7349 }
7350
7351 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7352 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7353 EVT MemVT = GN0->getMemoryVT();
7354 EVT ScalarVT = MemVT.getScalarType();
7355
7356 if (SDValue(GN0, 0).hasOneUse() &&
7357 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7359 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7360 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7361
7362 SDValue ZExtLoad = DAG.getMaskedGather(
7363 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7364 GN0->getIndexType(), ISD::ZEXTLOAD);
7365
7366 CombineTo(N, ZExtLoad);
7367 AddToWorklist(ZExtLoad.getNode());
7368 // Avoid recheck of N.
7369 return SDValue(N, 0);
7370 }
7371 }
7372
7373 // fold (and (load x), 255) -> (zextload x, i8)
7374 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7375 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7376 if (SDValue Res = reduceLoadWidth(N))
7377 return Res;
7378
7379 if (LegalTypes) {
7380 // Attempt to propagate the AND back up to the leaves which, if they're
7381 // loads, can be combined to narrow loads and the AND node can be removed.
7382 // Perform after legalization so that extend nodes will already be
7383 // combined into the loads.
7384 if (BackwardsPropagateMask(N))
7385 return SDValue(N, 0);
7386 }
7387
7388 if (SDValue Combined = visitANDLike(N0, N1, N))
7389 return Combined;
7390
7391 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7392 if (N0.getOpcode() == N1.getOpcode())
7393 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7394 return V;
7395
7396 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7397 return R;
7398 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7399 return R;
7400
7401 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7402 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7403 SDValue X, Y, Z, NotY;
7404 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7405 if (sd_match(N,
7406 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7407 sd_match(NotY, m_Not(m_Value(Y))) &&
7408 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7409 return DAG.getNode(ISD::AND, DL, VT, X,
7410 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7411
7412 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7413 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7414 if (sd_match(N, m_And(m_Value(X),
7415 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7416 sd_match(NotY, m_Not(m_Value(Y))) &&
7417 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7418 return DAG.getNode(ISD::AND, DL, VT, X,
7419 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7420
7421 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7422 // If we are shifting down an extended sign bit, see if we can simplify
7423 // this to shifting the MSB directly to expose further simplifications.
7424 // This pattern often appears after sext_inreg legalization.
7425 APInt Amt;
7426 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7427 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7428 return DAG.getNode(ISD::SRL, DL, VT, X,
7429 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7430
7431 // Masking the negated extension of a boolean is just the zero-extended
7432 // boolean:
7433 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7434 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7435 //
7436 // Note: the SimplifyDemandedBits fold below can make an information-losing
7437 // transform, and then we have no way to find this better fold.
7438 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7439 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7440 X.getOperand(0).getScalarValueSizeInBits() == 1)
7441 return X;
7442 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7443 X.getOperand(0).getScalarValueSizeInBits() == 1)
7444 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7445 }
7446
7447 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7448 // fold (and (sra)) -> (and (srl)) when possible.
7450 return SDValue(N, 0);
7451
7452 // fold (zext_inreg (extload x)) -> (zextload x)
7453 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7454 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7455 (ISD::isEXTLoad(N0.getNode()) ||
7456 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7457 auto *LN0 = cast<LoadSDNode>(N0);
7458 EVT MemVT = LN0->getMemoryVT();
7459 // If we zero all the possible extended bits, then we can turn this into
7460 // a zextload if we are running before legalize or the operation is legal.
7461 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7462 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7463 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7464 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7465 ((!LegalOperations && LN0->isSimple()) ||
7466 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7467 SDValue ExtLoad =
7468 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7469 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7470 AddToWorklist(N);
7471 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7472 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7473 }
7474 }
7475
7476 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7477 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7478 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7479 N0.getOperand(1), false))
7480 return BSwap;
7481 }
7482
7483 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7484 return Shifts;
7485
7486 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7487 return V;
7488
7489 // Recognize the following pattern:
7490 //
7491 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7492 //
7493 // where bitmask is a mask that clears the upper bits of AndVT. The
7494 // number of bits in bitmask must be a power of two.
7495 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7496 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7497 return false;
7498
7499 auto *C = dyn_cast<ConstantSDNode>(RHS);
7500 if (!C)
7501 return false;
7502
7503 if (!C->getAPIntValue().isMask(
7504 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7505 return false;
7506
7507 return true;
7508 };
7509
7510 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7511 if (IsAndZeroExtMask(N0, N1))
7512 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7513
7514 if (hasOperation(ISD::USUBSAT, VT))
7515 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7516 return V;
7517
7518 // Postpone until legalization completed to avoid interference with bswap
7519 // folding
7520 if (LegalOperations || VT.isVector())
7521 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7522 return R;
7523
7524 return SDValue();
7525}
7526
7527/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7528SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7529 bool DemandHighBits) {
7530 if (!LegalOperations)
7531 return SDValue();
7532
7533 EVT VT = N->getValueType(0);
7534 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7535 return SDValue();
7537 return SDValue();
7538
7539 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7540 bool LookPassAnd0 = false;
7541 bool LookPassAnd1 = false;
7542 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7543 std::swap(N0, N1);
7544 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7545 std::swap(N0, N1);
7546 if (N0.getOpcode() == ISD::AND) {
7547 if (!N0->hasOneUse())
7548 return SDValue();
7549 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7550 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7551 // This is needed for X86.
7552 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7553 N01C->getZExtValue() != 0xFFFF))
7554 return SDValue();
7555 N0 = N0.getOperand(0);
7556 LookPassAnd0 = true;
7557 }
7558
7559 if (N1.getOpcode() == ISD::AND) {
7560 if (!N1->hasOneUse())
7561 return SDValue();
7562 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7563 if (!N11C || N11C->getZExtValue() != 0xFF)
7564 return SDValue();
7565 N1 = N1.getOperand(0);
7566 LookPassAnd1 = true;
7567 }
7568
7569 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7570 std::swap(N0, N1);
7571 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7572 return SDValue();
7573 if (!N0->hasOneUse() || !N1->hasOneUse())
7574 return SDValue();
7575
7576 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7577 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7578 if (!N01C || !N11C)
7579 return SDValue();
7580 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7581 return SDValue();
7582
7583 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7584 SDValue N00 = N0->getOperand(0);
7585 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7586 if (!N00->hasOneUse())
7587 return SDValue();
7588 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7589 if (!N001C || N001C->getZExtValue() != 0xFF)
7590 return SDValue();
7591 N00 = N00.getOperand(0);
7592 LookPassAnd0 = true;
7593 }
7594
7595 SDValue N10 = N1->getOperand(0);
7596 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7597 if (!N10->hasOneUse())
7598 return SDValue();
7599 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7600 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7601 // for X86.
7602 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7603 N101C->getZExtValue() != 0xFFFF))
7604 return SDValue();
7605 N10 = N10.getOperand(0);
7606 LookPassAnd1 = true;
7607 }
7608
7609 if (N00 != N10)
7610 return SDValue();
7611
7612 // Make sure everything beyond the low halfword gets set to zero since the SRL
7613 // 16 will clear the top bits.
7614 unsigned OpSizeInBits = VT.getSizeInBits();
7615 if (OpSizeInBits > 16) {
7616 // If the left-shift isn't masked out then the only way this is a bswap is
7617 // if all bits beyond the low 8 are 0. In that case the entire pattern
7618 // reduces to a left shift anyway: leave it for other parts of the combiner.
7619 if (DemandHighBits && !LookPassAnd0)
7620 return SDValue();
7621
7622 // However, if the right shift isn't masked out then it might be because
7623 // it's not needed. See if we can spot that too. If the high bits aren't
7624 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7625 // upper bits to be zero.
7626 if (!LookPassAnd1) {
7627 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7628 if (!DAG.MaskedValueIsZero(N10,
7629 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7630 return SDValue();
7631 }
7632 }
7633
7634 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7635 if (OpSizeInBits > 16) {
7636 SDLoc DL(N);
7637 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7638 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7639 }
7640 return Res;
7641}
7642
7643/// Return true if the specified node is an element that makes up a 32-bit
7644/// packed halfword byteswap.
7645/// ((x & 0x000000ff) << 8) |
7646/// ((x & 0x0000ff00) >> 8) |
7647/// ((x & 0x00ff0000) << 8) |
7648/// ((x & 0xff000000) >> 8)
7650 if (!N->hasOneUse())
7651 return false;
7652
7653 unsigned Opc = N.getOpcode();
7654 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7655 return false;
7656
7657 SDValue N0 = N.getOperand(0);
7658 unsigned Opc0 = N0.getOpcode();
7659 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7660 return false;
7661
7662 ConstantSDNode *N1C = nullptr;
7663 // SHL or SRL: look upstream for AND mask operand
7664 if (Opc == ISD::AND)
7665 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7666 else if (Opc0 == ISD::AND)
7667 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7668 if (!N1C)
7669 return false;
7670
7671 unsigned MaskByteOffset;
7672 switch (N1C->getZExtValue()) {
7673 default:
7674 return false;
7675 case 0xFF: MaskByteOffset = 0; break;
7676 case 0xFF00: MaskByteOffset = 1; break;
7677 case 0xFFFF:
7678 // In case demanded bits didn't clear the bits that will be shifted out.
7679 // This is needed for X86.
7680 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7681 MaskByteOffset = 1;
7682 break;
7683 }
7684 return false;
7685 case 0xFF0000: MaskByteOffset = 2; break;
7686 case 0xFF000000: MaskByteOffset = 3; break;
7687 }
7688
7689 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7690 if (Opc == ISD::AND) {
7691 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7692 // (x >> 8) & 0xff
7693 // (x >> 8) & 0xff0000
7694 if (Opc0 != ISD::SRL)
7695 return false;
7696 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7697 if (!C || C->getZExtValue() != 8)
7698 return false;
7699 } else {
7700 // (x << 8) & 0xff00
7701 // (x << 8) & 0xff000000
7702 if (Opc0 != ISD::SHL)
7703 return false;
7704 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7705 if (!C || C->getZExtValue() != 8)
7706 return false;
7707 }
7708 } else if (Opc == ISD::SHL) {
7709 // (x & 0xff) << 8
7710 // (x & 0xff0000) << 8
7711 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7712 return false;
7713 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7714 if (!C || C->getZExtValue() != 8)
7715 return false;
7716 } else { // Opc == ISD::SRL
7717 // (x & 0xff00) >> 8
7718 // (x & 0xff000000) >> 8
7719 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7720 return false;
7721 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7722 if (!C || C->getZExtValue() != 8)
7723 return false;
7724 }
7725
7726 if (Parts[MaskByteOffset])
7727 return false;
7728
7729 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7730 return true;
7731}
7732
7733// Match 2 elements of a packed halfword bswap.
7735 if (N.getOpcode() == ISD::OR)
7736 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7737 isBSwapHWordElement(N.getOperand(1), Parts);
7738
7739 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7740 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7741 if (!C || C->getAPIntValue() != 16)
7742 return false;
7743 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7744 return true;
7745 }
7746
7747 return false;
7748}
7749
7750// Match this pattern:
7751// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7752// And rewrite this to:
7753// (rotr (bswap A), 16)
7755 SelectionDAG &DAG, SDNode *N, SDValue N0,
7756 SDValue N1, EVT VT) {
7757 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7758 "MatchBSwapHWordOrAndAnd: expecting i32");
7759 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7760 return SDValue();
7761 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7762 return SDValue();
7763 // TODO: this is too restrictive; lifting this restriction requires more tests
7764 if (!N0->hasOneUse() || !N1->hasOneUse())
7765 return SDValue();
7768 if (!Mask0 || !Mask1)
7769 return SDValue();
7770 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7771 Mask1->getAPIntValue() != 0x00ff00ff)
7772 return SDValue();
7773 SDValue Shift0 = N0.getOperand(0);
7774 SDValue Shift1 = N1.getOperand(0);
7775 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7776 return SDValue();
7777 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7778 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7779 if (!ShiftAmt0 || !ShiftAmt1)
7780 return SDValue();
7781 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7782 return SDValue();
7783 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7784 return SDValue();
7785
7786 SDLoc DL(N);
7787 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7788 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7789 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7790}
7791
7792/// Match a 32-bit packed halfword bswap. That is
7793/// ((x & 0x000000ff) << 8) |
7794/// ((x & 0x0000ff00) >> 8) |
7795/// ((x & 0x00ff0000) << 8) |
7796/// ((x & 0xff000000) >> 8)
7797/// => (rotl (bswap x), 16)
7798SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7799 if (!LegalOperations)
7800 return SDValue();
7801
7802 EVT VT = N->getValueType(0);
7803 if (VT != MVT::i32)
7804 return SDValue();
7806 return SDValue();
7807
7808 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
7809 return BSwap;
7810
7811 // Try again with commuted operands.
7812 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
7813 return BSwap;
7814
7815
7816 // Look for either
7817 // (or (bswaphpair), (bswaphpair))
7818 // (or (or (bswaphpair), (and)), (and))
7819 // (or (or (and), (bswaphpair)), (and))
7820 SDNode *Parts[4] = {};
7821
7822 if (isBSwapHWordPair(N0, Parts)) {
7823 // (or (or (and), (and)), (or (and), (and)))
7824 if (!isBSwapHWordPair(N1, Parts))
7825 return SDValue();
7826 } else if (N0.getOpcode() == ISD::OR) {
7827 // (or (or (or (and), (and)), (and)), (and))
7828 if (!isBSwapHWordElement(N1, Parts))
7829 return SDValue();
7830 SDValue N00 = N0.getOperand(0);
7831 SDValue N01 = N0.getOperand(1);
7832 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7833 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7834 return SDValue();
7835 } else {
7836 return SDValue();
7837 }
7838
7839 // Make sure the parts are all coming from the same node.
7840 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7841 return SDValue();
7842
7843 SDLoc DL(N);
7844 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7845 SDValue(Parts[0], 0));
7846
7847 // Result of the bswap should be rotated by 16. If it's not legal, then
7848 // do (x << 16) | (x >> 16).
7849 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7851 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7853 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7854 return DAG.getNode(ISD::OR, DL, VT,
7855 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7856 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7857}
7858
7859/// This contains all DAGCombine rules which reduce two values combined by
7860/// an Or operation to a single value \see visitANDLike().
7861SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7862 EVT VT = N1.getValueType();
7863
7864 // fold (or x, undef) -> -1
7865 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7866 return DAG.getAllOnesConstant(DL, VT);
7867
7868 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7869 return V;
7870
7871 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7872 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7873 // Don't increase # computations.
7874 (N0->hasOneUse() || N1->hasOneUse())) {
7875 // We can only do this xform if we know that bits from X that are set in C2
7876 // but not in C1 are already zero. Likewise for Y.
7877 if (const ConstantSDNode *N0O1C =
7879 if (const ConstantSDNode *N1O1C =
7881 // We can only do this xform if we know that bits from X that are set in
7882 // C2 but not in C1 are already zero. Likewise for Y.
7883 const APInt &LHSMask = N0O1C->getAPIntValue();
7884 const APInt &RHSMask = N1O1C->getAPIntValue();
7885
7886 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7887 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7888 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7889 N0.getOperand(0), N1.getOperand(0));
7890 return DAG.getNode(ISD::AND, DL, VT, X,
7891 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7892 }
7893 }
7894 }
7895 }
7896
7897 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7898 if (N0.getOpcode() == ISD::AND &&
7899 N1.getOpcode() == ISD::AND &&
7900 N0.getOperand(0) == N1.getOperand(0) &&
7901 // Don't increase # computations.
7902 (N0->hasOneUse() || N1->hasOneUse())) {
7903 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7904 N0.getOperand(1), N1.getOperand(1));
7905 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7906 }
7907
7908 return SDValue();
7909}
7910
7911/// OR combines for which the commuted variant will be tried as well.
7913 SDNode *N) {
7914 EVT VT = N0.getValueType();
7915 unsigned BW = VT.getScalarSizeInBits();
7916 SDLoc DL(N);
7917
7918 auto peekThroughResize = [](SDValue V) {
7919 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7920 return V->getOperand(0);
7921 return V;
7922 };
7923
7924 SDValue N0Resized = peekThroughResize(N0);
7925 if (N0Resized.getOpcode() == ISD::AND) {
7926 SDValue N1Resized = peekThroughResize(N1);
7927 SDValue N00 = N0Resized.getOperand(0);
7928 SDValue N01 = N0Resized.getOperand(1);
7929
7930 // fold or (and x, y), x --> x
7931 if (N00 == N1Resized || N01 == N1Resized)
7932 return N1;
7933
7934 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7935 // TODO: Set AllowUndefs = true.
7936 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7937 /* AllowUndefs */ false)) {
7938 if (peekThroughResize(NotOperand) == N1Resized)
7939 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7940 N1);
7941 }
7942
7943 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7944 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7945 /* AllowUndefs */ false)) {
7946 if (peekThroughResize(NotOperand) == N1Resized)
7947 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7948 N1);
7949 }
7950 }
7951
7952 SDValue X, Y;
7953
7954 // fold or (xor X, N1), N1 --> or X, N1
7955 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7956 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7957
7958 // fold or (xor x, y), (x and/or y) --> or x, y
7959 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7960 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7962 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7963
7964 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7965 return R;
7966
7967 auto peekThroughZext = [](SDValue V) {
7968 if (V->getOpcode() == ISD::ZERO_EXTEND)
7969 return V->getOperand(0);
7970 return V;
7971 };
7972
7973 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7974 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7975 N0.getOperand(0) == N1.getOperand(0) &&
7976 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7977 return N0;
7978
7979 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7980 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7981 N0.getOperand(1) == N1.getOperand(0) &&
7982 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7983 return N0;
7984
7985 // Attempt to match a legalized build_pair-esque pattern:
7986 // or(shl(aext(Hi),BW/2),zext(Lo))
7987 SDValue Lo, Hi;
7988 if (sd_match(N0,
7990 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7991 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7992 Lo.getValueType() == Hi.getValueType()) {
7993 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7994 SDValue NotLo, NotHi;
7995 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7996 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7997 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7998 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7999 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8000 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8001 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8002 }
8003 }
8004
8005 return SDValue();
8006}
8007
8008SDValue DAGCombiner::visitOR(SDNode *N) {
8009 SDValue N0 = N->getOperand(0);
8010 SDValue N1 = N->getOperand(1);
8011 EVT VT = N1.getValueType();
8012 SDLoc DL(N);
8013
8014 // x | x --> x
8015 if (N0 == N1)
8016 return N0;
8017
8018 // fold (or c1, c2) -> c1|c2
8019 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8020 return C;
8021
8022 // canonicalize constant to RHS
8025 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8026
8027 // fold vector ops
8028 if (VT.isVector()) {
8029 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8030 return FoldedVOp;
8031
8032 // fold (or x, 0) -> x, vector edition
8034 return N0;
8035
8036 // fold (or x, -1) -> -1, vector edition
8038 // do not return N1, because undef node may exist in N1
8039 return DAG.getAllOnesConstant(DL, N1.getValueType());
8040
8041 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8042 // Do this only if the resulting type / shuffle is legal.
8043 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8044 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8045 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8046 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8047 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8048 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8049 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8050 // Ensure both shuffles have a zero input.
8051 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8052 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8053 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8054 bool CanFold = true;
8055 int NumElts = VT.getVectorNumElements();
8056 SmallVector<int, 4> Mask(NumElts, -1);
8057
8058 for (int i = 0; i != NumElts; ++i) {
8059 int M0 = SV0->getMaskElt(i);
8060 int M1 = SV1->getMaskElt(i);
8061
8062 // Determine if either index is pointing to a zero vector.
8063 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8064 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8065
8066 // If one element is zero and the otherside is undef, keep undef.
8067 // This also handles the case that both are undef.
8068 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8069 continue;
8070
8071 // Make sure only one of the elements is zero.
8072 if (M0Zero == M1Zero) {
8073 CanFold = false;
8074 break;
8075 }
8076
8077 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8078
8079 // We have a zero and non-zero element. If the non-zero came from
8080 // SV0 make the index a LHS index. If it came from SV1, make it
8081 // a RHS index. We need to mod by NumElts because we don't care
8082 // which operand it came from in the original shuffles.
8083 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8084 }
8085
8086 if (CanFold) {
8087 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8088 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8089 SDValue LegalShuffle =
8090 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8091 if (LegalShuffle)
8092 return LegalShuffle;
8093 }
8094 }
8095 }
8096 }
8097
8098 // fold (or x, 0) -> x
8099 if (isNullConstant(N1))
8100 return N0;
8101
8102 // fold (or x, -1) -> -1
8103 if (isAllOnesConstant(N1))
8104 return N1;
8105
8106 if (SDValue NewSel = foldBinOpIntoSelect(N))
8107 return NewSel;
8108
8109 // fold (or x, c) -> c iff (x & ~c) == 0
8110 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8111 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8112 return N1;
8113
8114 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8115 return R;
8116
8117 if (SDValue Combined = visitORLike(N0, N1, DL))
8118 return Combined;
8119
8120 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8121 return Combined;
8122
8123 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8124 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8125 return BSwap;
8126 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8127 return BSwap;
8128
8129 // reassociate or
8130 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8131 return ROR;
8132
8133 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8134 if (SDValue SD =
8135 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8136 return SD;
8137
8138 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8139 // iff (c1 & c2) != 0 or c1/c2 are undef.
8140 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8141 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8142 };
8143 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8144 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8145 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8146 {N1, N0.getOperand(1)})) {
8147 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8148 AddToWorklist(IOR.getNode());
8149 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8150 }
8151 }
8152
8153 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8154 return Combined;
8155 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8156 return Combined;
8157
8158 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8159 if (N0.getOpcode() == N1.getOpcode())
8160 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8161 return V;
8162
8163 // See if this is some rotate idiom.
8164 if (SDValue Rot = MatchRotate(N0, N1, DL))
8165 return Rot;
8166
8167 if (SDValue Load = MatchLoadCombine(N))
8168 return Load;
8169
8170 // Simplify the operands using demanded-bits information.
8172 return SDValue(N, 0);
8173
8174 // If OR can be rewritten into ADD, try combines based on ADD.
8175 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8176 DAG.isADDLike(SDValue(N, 0)))
8177 if (SDValue Combined = visitADDLike(N))
8178 return Combined;
8179
8180 // Postpone until legalization completed to avoid interference with bswap
8181 // folding
8182 if (LegalOperations || VT.isVector())
8183 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8184 return R;
8185
8186 return SDValue();
8187}
8188
8190 SDValue &Mask) {
8191 if (Op.getOpcode() == ISD::AND &&
8192 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8193 Mask = Op.getOperand(1);
8194 return Op.getOperand(0);
8195 }
8196 return Op;
8197}
8198
8199/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8200static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8201 SDValue &Mask) {
8202 Op = stripConstantMask(DAG, Op, Mask);
8203 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8204 Shift = Op;
8205 return true;
8206 }
8207 return false;
8208}
8209
8210/// Helper function for visitOR to extract the needed side of a rotate idiom
8211/// from a shl/srl/mul/udiv. This is meant to handle cases where
8212/// InstCombine merged some outside op with one of the shifts from
8213/// the rotate pattern.
8214/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8215/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8216/// patterns:
8217///
8218/// (or (add v v) (shrl v bitwidth-1)):
8219/// expands (add v v) -> (shl v 1)
8220///
8221/// (or (mul v c0) (shrl (mul v c1) c2)):
8222/// expands (mul v c0) -> (shl (mul v c1) c3)
8223///
8224/// (or (udiv v c0) (shl (udiv v c1) c2)):
8225/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8226///
8227/// (or (shl v c0) (shrl (shl v c1) c2)):
8228/// expands (shl v c0) -> (shl (shl v c1) c3)
8229///
8230/// (or (shrl v c0) (shl (shrl v c1) c2)):
8231/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8232///
8233/// Such that in all cases, c3+c2==bitwidth(op v c1).
8235 SDValue ExtractFrom, SDValue &Mask,
8236 const SDLoc &DL) {
8237 assert(OppShift && ExtractFrom && "Empty SDValue");
8238 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8239 return SDValue();
8240
8241 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8242
8243 // Value and Type of the shift.
8244 SDValue OppShiftLHS = OppShift.getOperand(0);
8245 EVT ShiftedVT = OppShiftLHS.getValueType();
8246
8247 // Amount of the existing shift.
8248 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8249
8250 // (add v v) -> (shl v 1)
8251 // TODO: Should this be a general DAG canonicalization?
8252 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8253 ExtractFrom.getOpcode() == ISD::ADD &&
8254 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8255 ExtractFrom.getOperand(0) == OppShiftLHS &&
8256 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8257 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8258 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8259
8260 // Preconditions:
8261 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8262 //
8263 // Find opcode of the needed shift to be extracted from (op0 v c0).
8264 unsigned Opcode = ISD::DELETED_NODE;
8265 bool IsMulOrDiv = false;
8266 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8267 // opcode or its arithmetic (mul or udiv) variant.
8268 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8269 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8270 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8271 return false;
8272 Opcode = NeededShift;
8273 return true;
8274 };
8275 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8276 // that the needed shift can be extracted from.
8277 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8278 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8279 return SDValue();
8280
8281 // op0 must be the same opcode on both sides, have the same LHS argument,
8282 // and produce the same value type.
8283 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8284 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8285 ShiftedVT != ExtractFrom.getValueType())
8286 return SDValue();
8287
8288 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8289 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8290 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8291 ConstantSDNode *ExtractFromCst =
8292 isConstOrConstSplat(ExtractFrom.getOperand(1));
8293 // TODO: We should be able to handle non-uniform constant vectors for these values
8294 // Check that we have constant values.
8295 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8296 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8297 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8298 return SDValue();
8299
8300 // Compute the shift amount we need to extract to complete the rotate.
8301 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8302 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8303 return SDValue();
8304 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8305 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8306 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8307 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8308 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8309
8310 // Now try extract the needed shift from the ExtractFrom op and see if the
8311 // result matches up with the existing shift's LHS op.
8312 if (IsMulOrDiv) {
8313 // Op to extract from is a mul or udiv by a constant.
8314 // Check:
8315 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8316 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8317 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8318 NeededShiftAmt.getZExtValue());
8319 APInt ResultAmt;
8320 APInt Rem;
8321 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8322 if (Rem != 0 || ResultAmt != OppLHSAmt)
8323 return SDValue();
8324 } else {
8325 // Op to extract from is a shift by a constant.
8326 // Check:
8327 // c2 - (bitwidth(op0 v c0) - c1) == c0
8328 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8329 ExtractFromAmt.getBitWidth()))
8330 return SDValue();
8331 }
8332
8333 // Return the expanded shift op that should allow a rotate to be formed.
8334 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8335 EVT ResVT = ExtractFrom.getValueType();
8336 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8337 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8338}
8339
8340// Return true if we can prove that, whenever Neg and Pos are both in the
8341// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8342// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8343//
8344// (or (shift1 X, Neg), (shift2 X, Pos))
8345//
8346// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8347// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8348// to consider shift amounts with defined behavior.
8349//
8350// The IsRotate flag should be set when the LHS of both shifts is the same.
8351// Otherwise if matching a general funnel shift, it should be clear.
8352static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8353 SelectionDAG &DAG, bool IsRotate) {
8354 const auto &TLI = DAG.getTargetLoweringInfo();
8355 // If EltSize is a power of 2 then:
8356 //
8357 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8358 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8359 //
8360 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8361 // for the stronger condition:
8362 //
8363 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8364 //
8365 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8366 // we can just replace Neg with Neg' for the rest of the function.
8367 //
8368 // In other cases we check for the even stronger condition:
8369 //
8370 // Neg == EltSize - Pos [B]
8371 //
8372 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8373 // behavior if Pos == 0 (and consequently Neg == EltSize).
8374 //
8375 // We could actually use [A] whenever EltSize is a power of 2, but the
8376 // only extra cases that it would match are those uninteresting ones
8377 // where Neg and Pos are never in range at the same time. E.g. for
8378 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8379 // as well as (sub 32, Pos), but:
8380 //
8381 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8382 //
8383 // always invokes undefined behavior for 32-bit X.
8384 //
8385 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8386 // This allows us to peek through any operations that only affect Mask's
8387 // un-demanded bits.
8388 //
8389 // NOTE: We can only do this when matching operations which won't modify the
8390 // least Log2(EltSize) significant bits and not a general funnel shift.
8391 unsigned MaskLoBits = 0;
8392 if (IsRotate && isPowerOf2_64(EltSize)) {
8393 unsigned Bits = Log2_64(EltSize);
8394 unsigned NegBits = Neg.getScalarValueSizeInBits();
8395 if (NegBits >= Bits) {
8396 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8397 if (SDValue Inner =
8399 Neg = Inner;
8400 MaskLoBits = Bits;
8401 }
8402 }
8403 }
8404
8405 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8406 if (Neg.getOpcode() != ISD::SUB)
8407 return false;
8409 if (!NegC)
8410 return false;
8411 SDValue NegOp1 = Neg.getOperand(1);
8412
8413 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8414 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8415 // are redundant for the purpose of the equality.
8416 if (MaskLoBits) {
8417 unsigned PosBits = Pos.getScalarValueSizeInBits();
8418 if (PosBits >= MaskLoBits) {
8419 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8420 if (SDValue Inner =
8422 Pos = Inner;
8423 }
8424 }
8425 }
8426
8427 // The condition we need is now:
8428 //
8429 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8430 //
8431 // If NegOp1 == Pos then we need:
8432 //
8433 // EltSize & Mask == NegC & Mask
8434 //
8435 // (because "x & Mask" is a truncation and distributes through subtraction).
8436 //
8437 // We also need to account for a potential truncation of NegOp1 if the amount
8438 // has already been legalized to a shift amount type.
8439 APInt Width;
8440 if ((Pos == NegOp1) ||
8441 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8442 Width = NegC->getAPIntValue();
8443
8444 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8445 // Then the condition we want to prove becomes:
8446 //
8447 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8448 //
8449 // which, again because "x & Mask" is a truncation, becomes:
8450 //
8451 // NegC & Mask == (EltSize - PosC) & Mask
8452 // EltSize & Mask == (NegC + PosC) & Mask
8453 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8454 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8455 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8456 else
8457 return false;
8458 } else
8459 return false;
8460
8461 // Now we just need to check that EltSize & Mask == Width & Mask.
8462 if (MaskLoBits)
8463 // EltSize & Mask is 0 since Mask is EltSize - 1.
8464 return Width.getLoBits(MaskLoBits) == 0;
8465 return Width == EltSize;
8466}
8467
8468// A subroutine of MatchRotate used once we have found an OR of two opposite
8469// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8470// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8471// former being preferred if supported. InnerPos and InnerNeg are Pos and
8472// Neg with outer conversions stripped away.
8473SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8474 SDValue Neg, SDValue InnerPos,
8475 SDValue InnerNeg, bool HasPos,
8476 unsigned PosOpcode, unsigned NegOpcode,
8477 const SDLoc &DL) {
8478 // fold (or (shl x, (*ext y)),
8479 // (srl x, (*ext (sub 32, y)))) ->
8480 // (rotl x, y) or (rotr x, (sub 32, y))
8481 //
8482 // fold (or (shl x, (*ext (sub 32, y))),
8483 // (srl x, (*ext y))) ->
8484 // (rotr x, y) or (rotl x, (sub 32, y))
8485 EVT VT = Shifted.getValueType();
8486 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8487 /*IsRotate*/ true)) {
8488 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8489 HasPos ? Pos : Neg);
8490 }
8491
8492 return SDValue();
8493}
8494
8495// A subroutine of MatchRotate used once we have found an OR of two opposite
8496// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8497// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8498// former being preferred if supported. InnerPos and InnerNeg are Pos and
8499// Neg with outer conversions stripped away.
8500// TODO: Merge with MatchRotatePosNeg.
8501SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8502 SDValue Neg, SDValue InnerPos,
8503 SDValue InnerNeg, bool HasPos,
8504 unsigned PosOpcode, unsigned NegOpcode,
8505 const SDLoc &DL) {
8506 EVT VT = N0.getValueType();
8507 unsigned EltBits = VT.getScalarSizeInBits();
8508
8509 // fold (or (shl x0, (*ext y)),
8510 // (srl x1, (*ext (sub 32, y)))) ->
8511 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8512 //
8513 // fold (or (shl x0, (*ext (sub 32, y))),
8514 // (srl x1, (*ext y))) ->
8515 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8516 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8517 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8518 HasPos ? Pos : Neg);
8519 }
8520
8521 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8522 // so for now just use the PosOpcode case if its legal.
8523 // TODO: When can we use the NegOpcode case?
8524 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8525 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8526 if (Op.getOpcode() != BinOpc)
8527 return false;
8528 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8529 return Cst && (Cst->getAPIntValue() == Imm);
8530 };
8531
8532 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8533 // -> (fshl x0, x1, y)
8534 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8535 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8536 InnerPos == InnerNeg.getOperand(0) &&
8538 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8539 }
8540
8541 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8542 // -> (fshr x0, x1, y)
8543 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8544 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8545 InnerNeg == InnerPos.getOperand(0) &&
8547 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8548 }
8549
8550 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8551 // -> (fshr x0, x1, y)
8552 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8553 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8554 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8555 InnerNeg == InnerPos.getOperand(0) &&
8557 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8558 }
8559 }
8560
8561 return SDValue();
8562}
8563
8564// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8565// idioms for rotate, and if the target supports rotation instructions, generate
8566// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8567// with different shifted sources.
8568SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8569 EVT VT = LHS.getValueType();
8570
8571 // The target must have at least one rotate/funnel flavor.
8572 // We still try to match rotate by constant pre-legalization.
8573 // TODO: Support pre-legalization funnel-shift by constant.
8574 bool HasROTL = hasOperation(ISD::ROTL, VT);
8575 bool HasROTR = hasOperation(ISD::ROTR, VT);
8576 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8577 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8578
8579 // If the type is going to be promoted and the target has enabled custom
8580 // lowering for rotate, allow matching rotate by non-constants. Only allow
8581 // this for scalar types.
8582 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8586 }
8587
8588 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8589 return SDValue();
8590
8591 // Check for truncated rotate.
8592 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8593 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8594 assert(LHS.getValueType() == RHS.getValueType());
8595 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8596 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8597 }
8598 }
8599
8600 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8601 SDValue LHSShift; // The shift.
8602 SDValue LHSMask; // AND value if any.
8603 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8604
8605 SDValue RHSShift; // The shift.
8606 SDValue RHSMask; // AND value if any.
8607 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8608
8609 // If neither side matched a rotate half, bail
8610 if (!LHSShift && !RHSShift)
8611 return SDValue();
8612
8613 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8614 // side of the rotate, so try to handle that here. In all cases we need to
8615 // pass the matched shift from the opposite side to compute the opcode and
8616 // needed shift amount to extract. We still want to do this if both sides
8617 // matched a rotate half because one half may be a potential overshift that
8618 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8619 // single one).
8620
8621 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8622 if (LHSShift)
8623 if (SDValue NewRHSShift =
8624 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8625 RHSShift = NewRHSShift;
8626 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8627 if (RHSShift)
8628 if (SDValue NewLHSShift =
8629 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8630 LHSShift = NewLHSShift;
8631
8632 // If a side is still missing, nothing else we can do.
8633 if (!RHSShift || !LHSShift)
8634 return SDValue();
8635
8636 // At this point we've matched or extracted a shift op on each side.
8637
8638 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8639 return SDValue(); // Shifts must disagree.
8640
8641 // Canonicalize shl to left side in a shl/srl pair.
8642 if (RHSShift.getOpcode() == ISD::SHL) {
8643 std::swap(LHS, RHS);
8644 std::swap(LHSShift, RHSShift);
8645 std::swap(LHSMask, RHSMask);
8646 }
8647
8648 // Something has gone wrong - we've lost the shl/srl pair - bail.
8649 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8650 return SDValue();
8651
8652 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8653 SDValue LHSShiftArg = LHSShift.getOperand(0);
8654 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8655 SDValue RHSShiftArg = RHSShift.getOperand(0);
8656 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8657
8658 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8660 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8661 };
8662
8663 auto ApplyMasks = [&](SDValue Res) {
8664 // If there is an AND of either shifted operand, apply it to the result.
8665 if (LHSMask.getNode() || RHSMask.getNode()) {
8668
8669 if (LHSMask.getNode()) {
8670 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8671 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8672 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8673 }
8674 if (RHSMask.getNode()) {
8675 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8676 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8677 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8678 }
8679
8680 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8681 }
8682
8683 return Res;
8684 };
8685
8686 // TODO: Support pre-legalization funnel-shift by constant.
8687 bool IsRotate = LHSShiftArg == RHSShiftArg;
8688 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8689 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8690 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8691 // Look for a disguised rotate by constant.
8692 // The common shifted operand X may be hidden inside another 'or'.
8693 SDValue X, Y;
8694 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8695 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8696 return false;
8697 if (CommonOp == Or.getOperand(0)) {
8698 X = CommonOp;
8699 Y = Or.getOperand(1);
8700 return true;
8701 }
8702 if (CommonOp == Or.getOperand(1)) {
8703 X = CommonOp;
8704 Y = Or.getOperand(0);
8705 return true;
8706 }
8707 return false;
8708 };
8709
8710 SDValue Res;
8711 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8712 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8713 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8714 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8715 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8716 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8717 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8718 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8719 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8720 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8721 } else {
8722 return SDValue();
8723 }
8724
8725 return ApplyMasks(Res);
8726 }
8727
8728 return SDValue(); // Requires funnel shift support.
8729 }
8730
8731 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8732 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8733 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8734 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8735 // iff C1+C2 == EltSizeInBits
8736 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8737 SDValue Res;
8738 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8739 bool UseROTL = !LegalOperations || HasROTL;
8740 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8741 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8742 } else {
8743 bool UseFSHL = !LegalOperations || HasFSHL;
8744 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8745 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8746 }
8747
8748 return ApplyMasks(Res);
8749 }
8750
8751 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8752 // shift.
8753 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8754 return SDValue();
8755
8756 // If there is a mask here, and we have a variable shift, we can't be sure
8757 // that we're masking out the right stuff.
8758 if (LHSMask.getNode() || RHSMask.getNode())
8759 return SDValue();
8760
8761 // If the shift amount is sign/zext/any-extended just peel it off.
8762 SDValue LExtOp0 = LHSShiftAmt;
8763 SDValue RExtOp0 = RHSShiftAmt;
8764 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8765 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8766 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8767 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8768 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8769 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8770 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8771 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8772 LExtOp0 = LHSShiftAmt.getOperand(0);
8773 RExtOp0 = RHSShiftAmt.getOperand(0);
8774 }
8775
8776 if (IsRotate && (HasROTL || HasROTR)) {
8777 SDValue TryL =
8778 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8779 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8780 if (TryL)
8781 return TryL;
8782
8783 SDValue TryR =
8784 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8785 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8786 if (TryR)
8787 return TryR;
8788 }
8789
8790 SDValue TryL =
8791 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8792 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8793 if (TryL)
8794 return TryL;
8795
8796 SDValue TryR =
8797 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8798 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8799 if (TryR)
8800 return TryR;
8801
8802 return SDValue();
8803}
8804
8805/// Recursively traverses the expression calculating the origin of the requested
8806/// byte of the given value. Returns std::nullopt if the provider can't be
8807/// calculated.
8808///
8809/// For all the values except the root of the expression, we verify that the
8810/// value has exactly one use and if not then return std::nullopt. This way if
8811/// the origin of the byte is returned it's guaranteed that the values which
8812/// contribute to the byte are not used outside of this expression.
8813
8814/// However, there is a special case when dealing with vector loads -- we allow
8815/// more than one use if the load is a vector type. Since the values that
8816/// contribute to the byte ultimately come from the ExtractVectorElements of the
8817/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8818/// because those operations are independent from the pattern to be combined.
8819/// For vector loads, we simply care that the ByteProviders are adjacent
8820/// positions of the same vector, and their index matches the byte that is being
8821/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8822/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8823/// byte position we are trying to provide for the LoadCombine. If these do
8824/// not match, then we can not combine the vector loads. \p Index uses the
8825/// byte position we are trying to provide for and is matched against the
8826/// shl and load size. The \p Index algorithm ensures the requested byte is
8827/// provided for by the pattern, and the pattern does not over provide bytes.
8828///
8829///
8830/// The supported LoadCombine pattern for vector loads is as follows
8831/// or
8832/// / \
8833/// or shl
8834/// / \ |
8835/// or shl zext
8836/// / \ | |
8837/// shl zext zext EVE*
8838/// | | | |
8839/// zext EVE* EVE* LOAD
8840/// | | |
8841/// EVE* LOAD LOAD
8842/// |
8843/// LOAD
8844///
8845/// *ExtractVectorElement
8847
8848static std::optional<SDByteProvider>
8849calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8850 std::optional<uint64_t> VectorIndex,
8851 unsigned StartingIndex = 0) {
8852
8853 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8854 if (Depth == 10)
8855 return std::nullopt;
8856
8857 // Only allow multiple uses if the instruction is a vector load (in which
8858 // case we will use the load for every ExtractVectorElement)
8859 if (Depth && !Op.hasOneUse() &&
8860 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8861 return std::nullopt;
8862
8863 // Fail to combine if we have encountered anything but a LOAD after handling
8864 // an ExtractVectorElement.
8865 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8866 return std::nullopt;
8867
8868 unsigned BitWidth = Op.getValueSizeInBits();
8869 if (BitWidth % 8 != 0)
8870 return std::nullopt;
8871 unsigned ByteWidth = BitWidth / 8;
8872 assert(Index < ByteWidth && "invalid index requested");
8873 (void) ByteWidth;
8874
8875 switch (Op.getOpcode()) {
8876 case ISD::OR: {
8877 auto LHS =
8878 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8879 if (!LHS)
8880 return std::nullopt;
8881 auto RHS =
8882 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8883 if (!RHS)
8884 return std::nullopt;
8885
8886 if (LHS->isConstantZero())
8887 return RHS;
8888 if (RHS->isConstantZero())
8889 return LHS;
8890 return std::nullopt;
8891 }
8892 case ISD::SHL: {
8893 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8894 if (!ShiftOp)
8895 return std::nullopt;
8896
8897 uint64_t BitShift = ShiftOp->getZExtValue();
8898
8899 if (BitShift % 8 != 0)
8900 return std::nullopt;
8901 uint64_t ByteShift = BitShift / 8;
8902
8903 // If we are shifting by an amount greater than the index we are trying to
8904 // provide, then do not provide anything. Otherwise, subtract the index by
8905 // the amount we shifted by.
8906 return Index < ByteShift
8908 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8909 Depth + 1, VectorIndex, Index);
8910 }
8911 case ISD::ANY_EXTEND:
8912 case ISD::SIGN_EXTEND:
8913 case ISD::ZERO_EXTEND: {
8914 SDValue NarrowOp = Op->getOperand(0);
8915 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8916 if (NarrowBitWidth % 8 != 0)
8917 return std::nullopt;
8918 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8919
8920 if (Index >= NarrowByteWidth)
8921 return Op.getOpcode() == ISD::ZERO_EXTEND
8922 ? std::optional<SDByteProvider>(
8924 : std::nullopt;
8925 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8926 StartingIndex);
8927 }
8928 case ISD::BSWAP:
8929 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8930 Depth + 1, VectorIndex, StartingIndex);
8932 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8933 if (!OffsetOp)
8934 return std::nullopt;
8935
8936 VectorIndex = OffsetOp->getZExtValue();
8937
8938 SDValue NarrowOp = Op->getOperand(0);
8939 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8940 if (NarrowBitWidth % 8 != 0)
8941 return std::nullopt;
8942 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8943 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8944 // type, leaving the high bits undefined.
8945 if (Index >= NarrowByteWidth)
8946 return std::nullopt;
8947
8948 // Check to see if the position of the element in the vector corresponds
8949 // with the byte we are trying to provide for. In the case of a vector of
8950 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8951 // the element will provide a range of bytes. For example, if we have a
8952 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8953 // 3).
8954 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8955 return std::nullopt;
8956 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8957 return std::nullopt;
8958
8959 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8960 VectorIndex, StartingIndex);
8961 }
8962 case ISD::LOAD: {
8963 auto L = cast<LoadSDNode>(Op.getNode());
8964 if (!L->isSimple() || L->isIndexed())
8965 return std::nullopt;
8966
8967 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8968 if (NarrowBitWidth % 8 != 0)
8969 return std::nullopt;
8970 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8971
8972 // If the width of the load does not reach byte we are trying to provide for
8973 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8974 // question
8975 if (Index >= NarrowByteWidth)
8976 return L->getExtensionType() == ISD::ZEXTLOAD
8977 ? std::optional<SDByteProvider>(
8979 : std::nullopt;
8980
8981 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8982 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8983 }
8984 }
8985
8986 return std::nullopt;
8987}
8988
8989static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8990 return i;
8991}
8992
8993static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8994 return BW - i - 1;
8995}
8996
8997// Check if the bytes offsets we are looking at match with either big or
8998// little endian value loaded. Return true for big endian, false for little
8999// endian, and std::nullopt if match failed.
9000static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9001 int64_t FirstOffset) {
9002 // The endian can be decided only when it is 2 bytes at least.
9003 unsigned Width = ByteOffsets.size();
9004 if (Width < 2)
9005 return std::nullopt;
9006
9007 bool BigEndian = true, LittleEndian = true;
9008 for (unsigned i = 0; i < Width; i++) {
9009 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9010 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9011 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9012 if (!BigEndian && !LittleEndian)
9013 return std::nullopt;
9014 }
9015
9016 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9017 "little endian");
9018 return BigEndian;
9019}
9020
9021// Look through one layer of truncate or extend.
9023 switch (Value.getOpcode()) {
9024 case ISD::TRUNCATE:
9025 case ISD::ZERO_EXTEND:
9026 case ISD::SIGN_EXTEND:
9027 case ISD::ANY_EXTEND:
9028 return Value.getOperand(0);
9029 }
9030 return SDValue();
9031}
9032
9033/// Match a pattern where a wide type scalar value is stored by several narrow
9034/// stores. Fold it into a single store or a BSWAP and a store if the targets
9035/// supports it.
9036///
9037/// Assuming little endian target:
9038/// i8 *p = ...
9039/// i32 val = ...
9040/// p[0] = (val >> 0) & 0xFF;
9041/// p[1] = (val >> 8) & 0xFF;
9042/// p[2] = (val >> 16) & 0xFF;
9043/// p[3] = (val >> 24) & 0xFF;
9044/// =>
9045/// *((i32)p) = val;
9046///
9047/// i8 *p = ...
9048/// i32 val = ...
9049/// p[0] = (val >> 24) & 0xFF;
9050/// p[1] = (val >> 16) & 0xFF;
9051/// p[2] = (val >> 8) & 0xFF;
9052/// p[3] = (val >> 0) & 0xFF;
9053/// =>
9054/// *((i32)p) = BSWAP(val);
9055SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9056 // The matching looks for "store (trunc x)" patterns that appear early but are
9057 // likely to be replaced by truncating store nodes during combining.
9058 // TODO: If there is evidence that running this later would help, this
9059 // limitation could be removed. Legality checks may need to be added
9060 // for the created store and optional bswap/rotate.
9061 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9062 return SDValue();
9063
9064 // We only handle merging simple stores of 1-4 bytes.
9065 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9066 EVT MemVT = N->getMemoryVT();
9067 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9068 !N->isSimple() || N->isIndexed())
9069 return SDValue();
9070
9071 // Collect all of the stores in the chain, upto the maximum store width (i64).
9072 SDValue Chain = N->getChain();
9074 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9075 unsigned MaxWideNumBits = 64;
9076 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9077 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9078 // All stores must be the same size to ensure that we are writing all of the
9079 // bytes in the wide value.
9080 // This store should have exactly one use as a chain operand for another
9081 // store in the merging set. If there are other chain uses, then the
9082 // transform may not be safe because order of loads/stores outside of this
9083 // set may not be preserved.
9084 // TODO: We could allow multiple sizes by tracking each stored byte.
9085 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9086 Store->isIndexed() || !Store->hasOneUse())
9087 return SDValue();
9088 Stores.push_back(Store);
9089 Chain = Store->getChain();
9090 if (MaxStores < Stores.size())
9091 return SDValue();
9092 }
9093 // There is no reason to continue if we do not have at least a pair of stores.
9094 if (Stores.size() < 2)
9095 return SDValue();
9096
9097 // Handle simple types only.
9098 LLVMContext &Context = *DAG.getContext();
9099 unsigned NumStores = Stores.size();
9100 unsigned WideNumBits = NumStores * NarrowNumBits;
9101 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9102 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
9103 return SDValue();
9104
9105 // Check if all bytes of the source value that we are looking at are stored
9106 // to the same base address. Collect offsets from Base address into OffsetMap.
9107 SDValue SourceValue;
9108 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9109 int64_t FirstOffset = INT64_MAX;
9110 StoreSDNode *FirstStore = nullptr;
9111 std::optional<BaseIndexOffset> Base;
9112 for (auto *Store : Stores) {
9113 // All the stores store different parts of the CombinedValue. A truncate is
9114 // required to get the partial value.
9115 SDValue Trunc = Store->getValue();
9116 if (Trunc.getOpcode() != ISD::TRUNCATE)
9117 return SDValue();
9118 // Other than the first/last part, a shift operation is required to get the
9119 // offset.
9120 int64_t Offset = 0;
9121 SDValue WideVal = Trunc.getOperand(0);
9122 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9123 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9124 // The shift amount must be a constant multiple of the narrow type.
9125 // It is translated to the offset address in the wide source value "y".
9126 //
9127 // x = srl y, ShiftAmtC
9128 // i8 z = trunc x
9129 // store z, ...
9130 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9131 if (ShiftAmtC % NarrowNumBits != 0)
9132 return SDValue();
9133
9134 // Make sure we aren't reading bits that are shifted in.
9135 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9136 return SDValue();
9137
9138 Offset = ShiftAmtC / NarrowNumBits;
9139 WideVal = WideVal.getOperand(0);
9140 }
9141
9142 // Stores must share the same source value with different offsets.
9143 if (!SourceValue)
9144 SourceValue = WideVal;
9145 else if (SourceValue != WideVal) {
9146 // Truncate and extends can be stripped to see if the values are related.
9147 if (stripTruncAndExt(SourceValue) != WideVal &&
9148 stripTruncAndExt(WideVal) != SourceValue)
9149 return SDValue();
9150
9151 if (WideVal.getScalarValueSizeInBits() >
9152 SourceValue.getScalarValueSizeInBits())
9153 SourceValue = WideVal;
9154
9155 // Give up if the source value type is smaller than the store size.
9156 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
9157 return SDValue();
9158 }
9159
9160 // Stores must share the same base address.
9162 int64_t ByteOffsetFromBase = 0;
9163 if (!Base)
9164 Base = Ptr;
9165 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9166 return SDValue();
9167
9168 // Remember the first store.
9169 if (ByteOffsetFromBase < FirstOffset) {
9170 FirstStore = Store;
9171 FirstOffset = ByteOffsetFromBase;
9172 }
9173 // Map the offset in the store and the offset in the combined value, and
9174 // early return if it has been set before.
9175 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9176 return SDValue();
9177 OffsetMap[Offset] = ByteOffsetFromBase;
9178 }
9179
9180 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9181 assert(FirstStore && "First store must be set");
9182
9183 // Check that a store of the wide type is both allowed and fast on the target
9184 const DataLayout &Layout = DAG.getDataLayout();
9185 unsigned Fast = 0;
9186 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9187 *FirstStore->getMemOperand(), &Fast);
9188 if (!Allowed || !Fast)
9189 return SDValue();
9190
9191 // Check if the pieces of the value are going to the expected places in memory
9192 // to merge the stores.
9193 auto checkOffsets = [&](bool MatchLittleEndian) {
9194 if (MatchLittleEndian) {
9195 for (unsigned i = 0; i != NumStores; ++i)
9196 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9197 return false;
9198 } else { // MatchBigEndian by reversing loop counter.
9199 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9200 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9201 return false;
9202 }
9203 return true;
9204 };
9205
9206 // Check if the offsets line up for the native data layout of this target.
9207 bool NeedBswap = false;
9208 bool NeedRotate = false;
9209 if (!checkOffsets(Layout.isLittleEndian())) {
9210 // Special-case: check if byte offsets line up for the opposite endian.
9211 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9212 NeedBswap = true;
9213 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9214 NeedRotate = true;
9215 else
9216 return SDValue();
9217 }
9218
9219 SDLoc DL(N);
9220 if (WideVT != SourceValue.getValueType()) {
9221 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9222 "Unexpected store value to merge");
9223 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9224 }
9225
9226 // Before legalize we can introduce illegal bswaps/rotates which will be later
9227 // converted to an explicit bswap sequence. This way we end up with a single
9228 // store and byte shuffling instead of several stores and byte shuffling.
9229 if (NeedBswap) {
9230 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9231 } else if (NeedRotate) {
9232 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9233 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9234 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9235 }
9236
9237 SDValue NewStore =
9238 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9239 FirstStore->getPointerInfo(), FirstStore->getAlign());
9240
9241 // Rely on other DAG combine rules to remove the other individual stores.
9242 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9243 return NewStore;
9244}
9245
9246/// Match a pattern where a wide type scalar value is loaded by several narrow
9247/// loads and combined by shifts and ors. Fold it into a single load or a load
9248/// and a BSWAP if the targets supports it.
9249///
9250/// Assuming little endian target:
9251/// i8 *a = ...
9252/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9253/// =>
9254/// i32 val = *((i32)a)
9255///
9256/// i8 *a = ...
9257/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9258/// =>
9259/// i32 val = BSWAP(*((i32)a))
9260///
9261/// TODO: This rule matches complex patterns with OR node roots and doesn't
9262/// interact well with the worklist mechanism. When a part of the pattern is
9263/// updated (e.g. one of the loads) its direct users are put into the worklist,
9264/// but the root node of the pattern which triggers the load combine is not
9265/// necessarily a direct user of the changed node. For example, once the address
9266/// of t28 load is reassociated load combine won't be triggered:
9267/// t25: i32 = add t4, Constant:i32<2>
9268/// t26: i64 = sign_extend t25
9269/// t27: i64 = add t2, t26
9270/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9271/// t29: i32 = zero_extend t28
9272/// t32: i32 = shl t29, Constant:i8<8>
9273/// t33: i32 = or t23, t32
9274/// As a possible fix visitLoad can check if the load can be a part of a load
9275/// combine pattern and add corresponding OR roots to the worklist.
9276SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9277 assert(N->getOpcode() == ISD::OR &&
9278 "Can only match load combining against OR nodes");
9279
9280 // Handles simple types only
9281 EVT VT = N->getValueType(0);
9282 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9283 return SDValue();
9284 unsigned ByteWidth = VT.getSizeInBits() / 8;
9285
9286 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9287 auto MemoryByteOffset = [&](SDByteProvider P) {
9288 assert(P.hasSrc() && "Must be a memory byte provider");
9289 auto *Load = cast<LoadSDNode>(P.Src.value());
9290
9291 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9292
9293 assert(LoadBitWidth % 8 == 0 &&
9294 "can only analyze providers for individual bytes not bit");
9295 unsigned LoadByteWidth = LoadBitWidth / 8;
9296 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9297 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9298 };
9299
9300 std::optional<BaseIndexOffset> Base;
9301 SDValue Chain;
9302
9304 std::optional<SDByteProvider> FirstByteProvider;
9305 int64_t FirstOffset = INT64_MAX;
9306
9307 // Check if all the bytes of the OR we are looking at are loaded from the same
9308 // base address. Collect bytes offsets from Base address in ByteOffsets.
9309 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9310 unsigned ZeroExtendedBytes = 0;
9311 for (int i = ByteWidth - 1; i >= 0; --i) {
9312 auto P =
9313 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9314 /*StartingIndex*/ i);
9315 if (!P)
9316 return SDValue();
9317
9318 if (P->isConstantZero()) {
9319 // It's OK for the N most significant bytes to be 0, we can just
9320 // zero-extend the load.
9321 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9322 return SDValue();
9323 continue;
9324 }
9325 assert(P->hasSrc() && "provenance should either be memory or zero");
9326 auto *L = cast<LoadSDNode>(P->Src.value());
9327
9328 // All loads must share the same chain
9329 SDValue LChain = L->getChain();
9330 if (!Chain)
9331 Chain = LChain;
9332 else if (Chain != LChain)
9333 return SDValue();
9334
9335 // Loads must share the same base address
9337 int64_t ByteOffsetFromBase = 0;
9338
9339 // For vector loads, the expected load combine pattern will have an
9340 // ExtractElement for each index in the vector. While each of these
9341 // ExtractElements will be accessing the same base address as determined
9342 // by the load instruction, the actual bytes they interact with will differ
9343 // due to different ExtractElement indices. To accurately determine the
9344 // byte position of an ExtractElement, we offset the base load ptr with
9345 // the index multiplied by the byte size of each element in the vector.
9346 if (L->getMemoryVT().isVector()) {
9347 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9348 if (LoadWidthInBit % 8 != 0)
9349 return SDValue();
9350 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9351 Ptr.addToOffset(ByteOffsetFromVector);
9352 }
9353
9354 if (!Base)
9355 Base = Ptr;
9356
9357 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9358 return SDValue();
9359
9360 // Calculate the offset of the current byte from the base address
9361 ByteOffsetFromBase += MemoryByteOffset(*P);
9362 ByteOffsets[i] = ByteOffsetFromBase;
9363
9364 // Remember the first byte load
9365 if (ByteOffsetFromBase < FirstOffset) {
9366 FirstByteProvider = P;
9367 FirstOffset = ByteOffsetFromBase;
9368 }
9369
9370 Loads.insert(L);
9371 }
9372
9373 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9374 "memory, so there must be at least one load which produces the value");
9375 assert(Base && "Base address of the accessed memory location must be set");
9376 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9377
9378 bool NeedsZext = ZeroExtendedBytes > 0;
9379
9380 EVT MemVT =
9381 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9382
9383 if (!MemVT.isSimple())
9384 return SDValue();
9385
9386 // Before legalize we can introduce too wide illegal loads which will be later
9387 // split into legal sized loads. This enables us to combine i64 load by i8
9388 // patterns to a couple of i32 loads on 32 bit targets.
9389 if (LegalOperations &&
9391 MemVT))
9392 return SDValue();
9393
9394 // Check if the bytes of the OR we are looking at match with either big or
9395 // little endian value load
9396 std::optional<bool> IsBigEndian = isBigEndian(
9397 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9398 if (!IsBigEndian)
9399 return SDValue();
9400
9401 assert(FirstByteProvider && "must be set");
9402
9403 // Ensure that the first byte is loaded from zero offset of the first load.
9404 // So the combined value can be loaded from the first load address.
9405 if (MemoryByteOffset(*FirstByteProvider) != 0)
9406 return SDValue();
9407 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9408
9409 // The node we are looking at matches with the pattern, check if we can
9410 // replace it with a single (possibly zero-extended) load and bswap + shift if
9411 // needed.
9412
9413 // If the load needs byte swap check if the target supports it
9414 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9415
9416 // Before legalize we can introduce illegal bswaps which will be later
9417 // converted to an explicit bswap sequence. This way we end up with a single
9418 // load and byte shuffling instead of several loads and byte shuffling.
9419 // We do not introduce illegal bswaps when zero-extending as this tends to
9420 // introduce too many arithmetic instructions.
9421 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9422 !TLI.isOperationLegal(ISD::BSWAP, VT))
9423 return SDValue();
9424
9425 // If we need to bswap and zero extend, we have to insert a shift. Check that
9426 // it is legal.
9427 if (NeedsBswap && NeedsZext && LegalOperations &&
9428 !TLI.isOperationLegal(ISD::SHL, VT))
9429 return SDValue();
9430
9431 // Check that a load of the wide type is both allowed and fast on the target
9432 unsigned Fast = 0;
9433 bool Allowed =
9434 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9435 *FirstLoad->getMemOperand(), &Fast);
9436 if (!Allowed || !Fast)
9437 return SDValue();
9438
9439 SDValue NewLoad =
9440 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9441 Chain, FirstLoad->getBasePtr(),
9442 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9443
9444 // Transfer chain users from old loads to the new load.
9445 for (LoadSDNode *L : Loads)
9446 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9447
9448 if (!NeedsBswap)
9449 return NewLoad;
9450
9451 SDValue ShiftedLoad =
9452 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9453 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9454 VT, SDLoc(N)))
9455 : NewLoad;
9456 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9457}
9458
9459// If the target has andn, bsl, or a similar bit-select instruction,
9460// we want to unfold masked merge, with canonical pattern of:
9461// | A | |B|
9462// ((x ^ y) & m) ^ y
9463// | D |
9464// Into:
9465// (x & m) | (y & ~m)
9466// If y is a constant, m is not a 'not', and the 'andn' does not work with
9467// immediates, we unfold into a different pattern:
9468// ~(~x & m) & (m | y)
9469// If x is a constant, m is a 'not', and the 'andn' does not work with
9470// immediates, we unfold into a different pattern:
9471// (x | ~m) & ~(~m & ~y)
9472// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9473// the very least that breaks andnpd / andnps patterns, and because those
9474// patterns are simplified in IR and shouldn't be created in the DAG
9475SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9476 assert(N->getOpcode() == ISD::XOR);
9477
9478 // Don't touch 'not' (i.e. where y = -1).
9479 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9480 return SDValue();
9481
9482 EVT VT = N->getValueType(0);
9483
9484 // There are 3 commutable operators in the pattern,
9485 // so we have to deal with 8 possible variants of the basic pattern.
9486 SDValue X, Y, M;
9487 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9488 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9489 return false;
9490 SDValue Xor = And.getOperand(XorIdx);
9491 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9492 return false;
9493 SDValue Xor0 = Xor.getOperand(0);
9494 SDValue Xor1 = Xor.getOperand(1);
9495 // Don't touch 'not' (i.e. where y = -1).
9496 if (isAllOnesOrAllOnesSplat(Xor1))
9497 return false;
9498 if (Other == Xor0)
9499 std::swap(Xor0, Xor1);
9500 if (Other != Xor1)
9501 return false;
9502 X = Xor0;
9503 Y = Xor1;
9504 M = And.getOperand(XorIdx ? 0 : 1);
9505 return true;
9506 };
9507
9508 SDValue N0 = N->getOperand(0);
9509 SDValue N1 = N->getOperand(1);
9510 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9511 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9512 return SDValue();
9513
9514 // Don't do anything if the mask is constant. This should not be reachable.
9515 // InstCombine should have already unfolded this pattern, and DAGCombiner
9516 // probably shouldn't produce it, too.
9517 if (isa<ConstantSDNode>(M.getNode()))
9518 return SDValue();
9519
9520 // We can transform if the target has AndNot
9521 if (!TLI.hasAndNot(M))
9522 return SDValue();
9523
9524 SDLoc DL(N);
9525
9526 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9527 // a bitwise not that would already allow ANDN to be used.
9528 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9529 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9530 // If not, we need to do a bit more work to make sure andn is still used.
9531 SDValue NotX = DAG.getNOT(DL, X, VT);
9532 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9533 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9534 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9535 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9536 }
9537
9538 // If X is a constant and M is a bitwise not, check that 'andn' works with
9539 // immediates.
9540 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9541 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9542 // If not, we need to do a bit more work to make sure andn is still used.
9543 SDValue NotM = M.getOperand(0);
9544 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9545 SDValue NotY = DAG.getNOT(DL, Y, VT);
9546 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9547 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9548 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9549 }
9550
9551 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9552 SDValue NotM = DAG.getNOT(DL, M, VT);
9553 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9554
9555 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9556}
9557
9558SDValue DAGCombiner::visitXOR(SDNode *N) {
9559 SDValue N0 = N->getOperand(0);
9560 SDValue N1 = N->getOperand(1);
9561 EVT VT = N0.getValueType();
9562 SDLoc DL(N);
9563
9564 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9565 if (N0.isUndef() && N1.isUndef())
9566 return DAG.getConstant(0, DL, VT);
9567
9568 // fold (xor x, undef) -> undef
9569 if (N0.isUndef())
9570 return N0;
9571 if (N1.isUndef())
9572 return N1;
9573
9574 // fold (xor c1, c2) -> c1^c2
9575 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9576 return C;
9577
9578 // canonicalize constant to RHS
9581 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9582
9583 // fold vector ops
9584 if (VT.isVector()) {
9585 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9586 return FoldedVOp;
9587
9588 // fold (xor x, 0) -> x, vector edition
9590 return N0;
9591 }
9592
9593 // fold (xor x, 0) -> x
9594 if (isNullConstant(N1))
9595 return N0;
9596
9597 if (SDValue NewSel = foldBinOpIntoSelect(N))
9598 return NewSel;
9599
9600 // reassociate xor
9601 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9602 return RXOR;
9603
9604 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9605 if (SDValue SD =
9606 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9607 return SD;
9608
9609 // fold (a^b) -> (a|b) iff a and b share no bits.
9610 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9611 DAG.haveNoCommonBitsSet(N0, N1))
9612 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9613
9614 // look for 'add-like' folds:
9615 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9616 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9618 if (SDValue Combined = visitADDLike(N))
9619 return Combined;
9620
9621 // fold !(x cc y) -> (x !cc y)
9622 unsigned N0Opcode = N0.getOpcode();
9623 SDValue LHS, RHS, CC;
9624 if (TLI.isConstTrueVal(N1) &&
9625 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9626 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9627 LHS.getValueType());
9628 if (!LegalOperations ||
9629 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9630 switch (N0Opcode) {
9631 default:
9632 llvm_unreachable("Unhandled SetCC Equivalent!");
9633 case ISD::SETCC:
9634 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9635 case ISD::SELECT_CC:
9636 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9637 N0.getOperand(3), NotCC);
9638 case ISD::STRICT_FSETCC:
9639 case ISD::STRICT_FSETCCS: {
9640 if (N0.hasOneUse()) {
9641 // FIXME Can we handle multiple uses? Could we token factor the chain
9642 // results from the new/old setcc?
9643 SDValue SetCC =
9644 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9645 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9646 CombineTo(N, SetCC);
9647 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9648 recursivelyDeleteUnusedNodes(N0.getNode());
9649 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9650 }
9651 break;
9652 }
9653 }
9654 }
9655 }
9656
9657 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9658 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9659 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9660 SDValue V = N0.getOperand(0);
9661 SDLoc DL0(N0);
9662 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9663 DAG.getConstant(1, DL0, V.getValueType()));
9664 AddToWorklist(V.getNode());
9665 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9666 }
9667
9668 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9669 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9670 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9671 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9672 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9673 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9674 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9675 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9676 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9677 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9678 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9679 }
9680 }
9681 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9682 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9683 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9684 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9685 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9686 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9687 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9688 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9689 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9690 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9691 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9692 }
9693 }
9694
9695 // fold (not (neg x)) -> (add X, -1)
9696 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9697 // Y is a constant or the subtract has a single use.
9698 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9699 isNullConstant(N0.getOperand(0))) {
9700 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9701 DAG.getAllOnesConstant(DL, VT));
9702 }
9703
9704 // fold (not (add X, -1)) -> (neg X)
9705 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9707 return DAG.getNegative(N0.getOperand(0), DL, VT);
9708 }
9709
9710 // fold (xor (and x, y), y) -> (and (not x), y)
9711 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9712 SDValue X = N0.getOperand(0);
9713 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9714 AddToWorklist(NotX.getNode());
9715 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9716 }
9717
9718 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9719 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9720 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9721 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9722 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9723 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9724 SDValue S0 = S.getOperand(0);
9725 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9727 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9728 return DAG.getNode(ISD::ABS, DL, VT, S0);
9729 }
9730 }
9731
9732 // fold (xor x, x) -> 0
9733 if (N0 == N1)
9734 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9735
9736 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9737 // Here is a concrete example of this equivalence:
9738 // i16 x == 14
9739 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9740 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9741 //
9742 // =>
9743 //
9744 // i16 ~1 == 0b1111111111111110
9745 // i16 rol(~1, 14) == 0b1011111111111111
9746 //
9747 // Some additional tips to help conceptualize this transform:
9748 // - Try to see the operation as placing a single zero in a value of all ones.
9749 // - There exists no value for x which would allow the result to contain zero.
9750 // - Values of x larger than the bitwidth are undefined and do not require a
9751 // consistent result.
9752 // - Pushing the zero left requires shifting one bits in from the right.
9753 // A rotate left of ~1 is a nice way of achieving the desired result.
9754 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9756 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
9757 N0.getOperand(1));
9758 }
9759
9760 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9761 if (N0Opcode == N1.getOpcode())
9762 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9763 return V;
9764
9765 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9766 return R;
9767 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9768 return R;
9769 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9770 return R;
9771
9772 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9773 if (SDValue MM = unfoldMaskedMerge(N))
9774 return MM;
9775
9776 // Simplify the expression using non-local knowledge.
9778 return SDValue(N, 0);
9779
9780 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9781 return Combined;
9782
9783 return SDValue();
9784}
9785
9786/// If we have a shift-by-constant of a bitwise logic op that itself has a
9787/// shift-by-constant operand with identical opcode, we may be able to convert
9788/// that into 2 independent shifts followed by the logic op. This is a
9789/// throughput improvement.
9791 // Match a one-use bitwise logic op.
9792 SDValue LogicOp = Shift->getOperand(0);
9793 if (!LogicOp.hasOneUse())
9794 return SDValue();
9795
9796 unsigned LogicOpcode = LogicOp.getOpcode();
9797 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9798 LogicOpcode != ISD::XOR)
9799 return SDValue();
9800
9801 // Find a matching one-use shift by constant.
9802 unsigned ShiftOpcode = Shift->getOpcode();
9803 SDValue C1 = Shift->getOperand(1);
9804 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9805 assert(C1Node && "Expected a shift with constant operand");
9806 const APInt &C1Val = C1Node->getAPIntValue();
9807 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9808 const APInt *&ShiftAmtVal) {
9809 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9810 return false;
9811
9812 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9813 if (!ShiftCNode)
9814 return false;
9815
9816 // Capture the shifted operand and shift amount value.
9817 ShiftOp = V.getOperand(0);
9818 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9819
9820 // Shift amount types do not have to match their operand type, so check that
9821 // the constants are the same width.
9822 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9823 return false;
9824
9825 // The fold is not valid if the sum of the shift values doesn't fit in the
9826 // given shift amount type.
9827 bool Overflow = false;
9828 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9829 if (Overflow)
9830 return false;
9831
9832 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9833 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9834 return false;
9835
9836 return true;
9837 };
9838
9839 // Logic ops are commutative, so check each operand for a match.
9840 SDValue X, Y;
9841 const APInt *C0Val;
9842 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9843 Y = LogicOp.getOperand(1);
9844 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9845 Y = LogicOp.getOperand(0);
9846 else
9847 return SDValue();
9848
9849 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9850 SDLoc DL(Shift);
9851 EVT VT = Shift->getValueType(0);
9852 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9853 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9854 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9855 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9856 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9857 LogicOp->getFlags());
9858}
9859
9860/// Handle transforms common to the three shifts, when the shift amount is a
9861/// constant.
9862/// We are looking for: (shift being one of shl/sra/srl)
9863/// shift (binop X, C0), C1
9864/// And want to transform into:
9865/// binop (shift X, C1), (shift C0, C1)
9866SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9867 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9868
9869 // Do not turn a 'not' into a regular xor.
9870 if (isBitwiseNot(N->getOperand(0)))
9871 return SDValue();
9872
9873 // The inner binop must be one-use, since we want to replace it.
9874 SDValue LHS = N->getOperand(0);
9875 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9876 return SDValue();
9877
9878 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9879 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9880 return R;
9881
9882 // We want to pull some binops through shifts, so that we have (and (shift))
9883 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9884 // thing happens with address calculations, so it's important to canonicalize
9885 // it.
9886 switch (LHS.getOpcode()) {
9887 default:
9888 return SDValue();
9889 case ISD::OR:
9890 case ISD::XOR:
9891 case ISD::AND:
9892 break;
9893 case ISD::ADD:
9894 if (N->getOpcode() != ISD::SHL)
9895 return SDValue(); // only shl(add) not sr[al](add).
9896 break;
9897 }
9898
9899 // FIXME: disable this unless the input to the binop is a shift by a constant
9900 // or is copy/select. Enable this in other cases when figure out it's exactly
9901 // profitable.
9902 SDValue BinOpLHSVal = LHS.getOperand(0);
9903 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9904 BinOpLHSVal.getOpcode() == ISD::SRA ||
9905 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9906 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9907 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9908 BinOpLHSVal.getOpcode() == ISD::SELECT;
9909
9910 if (!IsShiftByConstant && !IsCopyOrSelect)
9911 return SDValue();
9912
9913 if (IsCopyOrSelect && N->hasOneUse())
9914 return SDValue();
9915
9916 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9917 SDLoc DL(N);
9918 EVT VT = N->getValueType(0);
9919 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9920 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9921 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9922 N->getOperand(1));
9923 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9924 }
9925
9926 return SDValue();
9927}
9928
9929SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9930 assert(N->getOpcode() == ISD::TRUNCATE);
9931 assert(N->getOperand(0).getOpcode() == ISD::AND);
9932
9933 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9934 EVT TruncVT = N->getValueType(0);
9935 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9936 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9937 SDValue N01 = N->getOperand(0).getOperand(1);
9938 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9939 SDLoc DL(N);
9940 SDValue N00 = N->getOperand(0).getOperand(0);
9941 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9942 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9943 AddToWorklist(Trunc00.getNode());
9944 AddToWorklist(Trunc01.getNode());
9945 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9946 }
9947 }
9948
9949 return SDValue();
9950}
9951
9952SDValue DAGCombiner::visitRotate(SDNode *N) {
9953 SDLoc dl(N);
9954 SDValue N0 = N->getOperand(0);
9955 SDValue N1 = N->getOperand(1);
9956 EVT VT = N->getValueType(0);
9957 unsigned Bitsize = VT.getScalarSizeInBits();
9958
9959 // fold (rot x, 0) -> x
9960 if (isNullOrNullSplat(N1))
9961 return N0;
9962
9963 // fold (rot x, c) -> x iff (c % BitSize) == 0
9964 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9965 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9966 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9967 return N0;
9968 }
9969
9970 // fold (rot x, c) -> (rot x, c % BitSize)
9971 bool OutOfRange = false;
9972 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9973 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9974 return true;
9975 };
9976 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9977 EVT AmtVT = N1.getValueType();
9978 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9979 if (SDValue Amt =
9980 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9981 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9982 }
9983
9984 // rot i16 X, 8 --> bswap X
9985 auto *RotAmtC = isConstOrConstSplat(N1);
9986 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9987 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9988 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9989
9990 // Simplify the operands using demanded-bits information.
9992 return SDValue(N, 0);
9993
9994 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9995 if (N1.getOpcode() == ISD::TRUNCATE &&
9996 N1.getOperand(0).getOpcode() == ISD::AND) {
9997 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9998 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9999 }
10000
10001 unsigned NextOp = N0.getOpcode();
10002
10003 // fold (rot* (rot* x, c2), c1)
10004 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10005 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10006 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10008 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10009 EVT ShiftVT = N1.getValueType();
10010 bool SameSide = (N->getOpcode() == NextOp);
10011 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10012 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10013 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10014 {N1, BitsizeC});
10015 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10016 {N0.getOperand(1), BitsizeC});
10017 if (Norm1 && Norm2)
10018 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10019 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10020 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10021 {CombinedShift, BitsizeC});
10022 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10023 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10024 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10025 CombinedShiftNorm);
10026 }
10027 }
10028 }
10029 return SDValue();
10030}
10031
10032SDValue DAGCombiner::visitSHL(SDNode *N) {
10033 SDValue N0 = N->getOperand(0);
10034 SDValue N1 = N->getOperand(1);
10035 if (SDValue V = DAG.simplifyShift(N0, N1))
10036 return V;
10037
10038 SDLoc DL(N);
10039 EVT VT = N0.getValueType();
10040 EVT ShiftVT = N1.getValueType();
10041 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10042
10043 // fold (shl c1, c2) -> c1<<c2
10044 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10045 return C;
10046
10047 // fold vector ops
10048 if (VT.isVector()) {
10049 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10050 return FoldedVOp;
10051
10052 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10053 // If setcc produces all-one true value then:
10054 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10055 if (N1CV && N1CV->isConstant()) {
10056 if (N0.getOpcode() == ISD::AND) {
10057 SDValue N00 = N0->getOperand(0);
10058 SDValue N01 = N0->getOperand(1);
10059 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10060
10061 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10064 if (SDValue C =
10065 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10066 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10067 }
10068 }
10069 }
10070 }
10071
10072 if (SDValue NewSel = foldBinOpIntoSelect(N))
10073 return NewSel;
10074
10075 // if (shl x, c) is known to be zero, return 0
10076 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10077 return DAG.getConstant(0, DL, VT);
10078
10079 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10080 if (N1.getOpcode() == ISD::TRUNCATE &&
10081 N1.getOperand(0).getOpcode() == ISD::AND) {
10082 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10083 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10084 }
10085
10086 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10087 if (N0.getOpcode() == ISD::SHL) {
10088 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10090 APInt c1 = LHS->getAPIntValue();
10091 APInt c2 = RHS->getAPIntValue();
10092 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10093 return (c1 + c2).uge(OpSizeInBits);
10094 };
10095 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10096 return DAG.getConstant(0, DL, VT);
10097
10098 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10100 APInt c1 = LHS->getAPIntValue();
10101 APInt c2 = RHS->getAPIntValue();
10102 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10103 return (c1 + c2).ult(OpSizeInBits);
10104 };
10105 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10106 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10107 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10108 }
10109 }
10110
10111 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10112 // For this to be valid, the second form must not preserve any of the bits
10113 // that are shifted out by the inner shift in the first form. This means
10114 // the outer shift size must be >= the number of bits added by the ext.
10115 // As a corollary, we don't care what kind of ext it is.
10116 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10117 N0.getOpcode() == ISD::ANY_EXTEND ||
10118 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10119 N0.getOperand(0).getOpcode() == ISD::SHL) {
10120 SDValue N0Op0 = N0.getOperand(0);
10121 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10122 EVT InnerVT = N0Op0.getValueType();
10123 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10124
10125 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10127 APInt c1 = LHS->getAPIntValue();
10128 APInt c2 = RHS->getAPIntValue();
10129 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10130 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10131 (c1 + c2).uge(OpSizeInBits);
10132 };
10133 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10134 /*AllowUndefs*/ false,
10135 /*AllowTypeMismatch*/ true))
10136 return DAG.getConstant(0, DL, VT);
10137
10138 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10140 APInt c1 = LHS->getAPIntValue();
10141 APInt c2 = RHS->getAPIntValue();
10142 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10143 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10144 (c1 + c2).ult(OpSizeInBits);
10145 };
10146 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10147 /*AllowUndefs*/ false,
10148 /*AllowTypeMismatch*/ true)) {
10149 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10150 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10151 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10152 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10153 }
10154 }
10155
10156 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10157 // Only fold this if the inner zext has no other uses to avoid increasing
10158 // the total number of instructions.
10159 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10160 N0.getOperand(0).getOpcode() == ISD::SRL) {
10161 SDValue N0Op0 = N0.getOperand(0);
10162 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10163
10164 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10165 APInt c1 = LHS->getAPIntValue();
10166 APInt c2 = RHS->getAPIntValue();
10167 zeroExtendToMatch(c1, c2);
10168 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10169 };
10170 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10171 /*AllowUndefs*/ false,
10172 /*AllowTypeMismatch*/ true)) {
10173 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10174 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10175 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10176 AddToWorklist(NewSHL.getNode());
10177 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10178 }
10179 }
10180
10181 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10182 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10184 const APInt &LHSC = LHS->getAPIntValue();
10185 const APInt &RHSC = RHS->getAPIntValue();
10186 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10187 LHSC.getZExtValue() <= RHSC.getZExtValue();
10188 };
10189
10190 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10191 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10192 if (N0->getFlags().hasExact()) {
10193 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10194 /*AllowUndefs*/ false,
10195 /*AllowTypeMismatch*/ true)) {
10196 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10197 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10198 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10199 }
10200 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10201 /*AllowUndefs*/ false,
10202 /*AllowTypeMismatch*/ true)) {
10203 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10204 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10205 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10206 }
10207 }
10208
10209 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10210 // (and (srl x, (sub c1, c2), MASK)
10211 // Only fold this if the inner shift has no other uses -- if it does,
10212 // folding this will increase the total number of instructions.
10213 if (N0.getOpcode() == ISD::SRL &&
10214 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10216 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10217 /*AllowUndefs*/ false,
10218 /*AllowTypeMismatch*/ true)) {
10219 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10220 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10221 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10222 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10223 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10224 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10225 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10226 }
10227 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10228 /*AllowUndefs*/ false,
10229 /*AllowTypeMismatch*/ true)) {
10230 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10231 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10232 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10233 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10234 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10235 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10236 }
10237 }
10238 }
10239
10240 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10241 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10242 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10243 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10244 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10245 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10246 }
10247
10248 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10249 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10250 // Variant of version done on multiply, except mul by a power of 2 is turned
10251 // into a shift.
10252 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10253 TLI.isDesirableToCommuteWithShift(N, Level)) {
10254 SDValue N01 = N0.getOperand(1);
10255 if (SDValue Shl1 =
10256 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10257 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10258 AddToWorklist(Shl0.getNode());
10260 // Preserve the disjoint flag for Or.
10261 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10263 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10264 }
10265 }
10266
10267 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10268 // TODO: Add zext/add_nuw variant with suitable test coverage
10269 // TODO: Should we limit this with isLegalAddImmediate?
10270 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10271 N0.getOperand(0).getOpcode() == ISD::ADD &&
10272 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10273 TLI.isDesirableToCommuteWithShift(N, Level)) {
10274 SDValue Add = N0.getOperand(0);
10275 SDLoc DL(N0);
10276 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10277 {Add.getOperand(1)})) {
10278 if (SDValue ShlC =
10279 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10280 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10281 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10282 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10283 }
10284 }
10285 }
10286
10287 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10288 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10289 SDValue N01 = N0.getOperand(1);
10290 if (SDValue Shl =
10291 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10292 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10293 }
10294
10296 if (N1C && !N1C->isOpaque())
10297 if (SDValue NewSHL = visitShiftByConstant(N))
10298 return NewSHL;
10299
10300 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10301 // target.
10302 if (((N1.getOpcode() == ISD::CTTZ &&
10303 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10304 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10305 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10307 SDValue Y = N1.getOperand(0);
10308 SDLoc DL(N);
10309 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10310 SDValue And =
10311 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10312 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10313 }
10314
10316 return SDValue(N, 0);
10317
10318 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10319 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10320 const APInt &C0 = N0.getConstantOperandAPInt(0);
10321 const APInt &C1 = N1C->getAPIntValue();
10322 return DAG.getVScale(DL, VT, C0 << C1);
10323 }
10324
10325 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10326 APInt ShlVal;
10327 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10328 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10329 const APInt &C0 = N0.getConstantOperandAPInt(0);
10330 if (ShlVal.ult(C0.getBitWidth())) {
10331 APInt NewStep = C0 << ShlVal;
10332 return DAG.getStepVector(DL, VT, NewStep);
10333 }
10334 }
10335
10336 return SDValue();
10337}
10338
10339// Transform a right shift of a multiply into a multiply-high.
10340// Examples:
10341// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10342// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10344 const TargetLowering &TLI) {
10345 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10346 "SRL or SRA node is required here!");
10347
10348 // Check the shift amount. Proceed with the transformation if the shift
10349 // amount is constant.
10350 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10351 if (!ShiftAmtSrc)
10352 return SDValue();
10353
10354 // The operation feeding into the shift must be a multiply.
10355 SDValue ShiftOperand = N->getOperand(0);
10356 if (ShiftOperand.getOpcode() != ISD::MUL)
10357 return SDValue();
10358
10359 // Both operands must be equivalent extend nodes.
10360 SDValue LeftOp = ShiftOperand.getOperand(0);
10361 SDValue RightOp = ShiftOperand.getOperand(1);
10362
10363 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10364 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10365
10366 if (!IsSignExt && !IsZeroExt)
10367 return SDValue();
10368
10369 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10370 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10371
10372 // return true if U may use the lower bits of its operands
10373 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10374 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10375 return true;
10376 }
10377 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10378 if (!UShiftAmtSrc) {
10379 return true;
10380 }
10381 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10382 return UShiftAmt < NarrowVTSize;
10383 };
10384
10385 // If the lower part of the MUL is also used and MUL_LOHI is supported
10386 // do not introduce the MULH in favor of MUL_LOHI
10387 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10388 if (!ShiftOperand.hasOneUse() &&
10389 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10390 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10391 return SDValue();
10392 }
10393
10394 SDValue MulhRightOp;
10396 unsigned ActiveBits = IsSignExt
10397 ? Constant->getAPIntValue().getSignificantBits()
10398 : Constant->getAPIntValue().getActiveBits();
10399 if (ActiveBits > NarrowVTSize)
10400 return SDValue();
10401 MulhRightOp = DAG.getConstant(
10402 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10403 NarrowVT);
10404 } else {
10405 if (LeftOp.getOpcode() != RightOp.getOpcode())
10406 return SDValue();
10407 // Check that the two extend nodes are the same type.
10408 if (NarrowVT != RightOp.getOperand(0).getValueType())
10409 return SDValue();
10410 MulhRightOp = RightOp.getOperand(0);
10411 }
10412
10413 EVT WideVT = LeftOp.getValueType();
10414 // Proceed with the transformation if the wide types match.
10415 assert((WideVT == RightOp.getValueType()) &&
10416 "Cannot have a multiply node with two different operand types.");
10417
10418 // Proceed with the transformation if the wide type is twice as large
10419 // as the narrow type.
10420 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10421 return SDValue();
10422
10423 // Check the shift amount with the narrow type size.
10424 // Proceed with the transformation if the shift amount is the width
10425 // of the narrow type.
10426 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10427 if (ShiftAmt != NarrowVTSize)
10428 return SDValue();
10429
10430 // If the operation feeding into the MUL is a sign extend (sext),
10431 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10432 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10433
10434 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10435 // or if it is a vector type then we could transform to an acceptable type and
10436 // rely on legalization to split/combine the result.
10437 if (NarrowVT.isVector()) {
10438 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10439 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10440 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10441 return SDValue();
10442 } else {
10443 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10444 return SDValue();
10445 }
10446
10447 SDValue Result =
10448 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10449 bool IsSigned = N->getOpcode() == ISD::SRA;
10450 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10451}
10452
10453// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10454// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10456 unsigned Opcode = N->getOpcode();
10457 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10458 return SDValue();
10459
10460 SDValue N0 = N->getOperand(0);
10461 EVT VT = N->getValueType(0);
10462 SDLoc DL(N);
10463 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10464 SDValue OldLHS = N0.getOperand(0);
10465 SDValue OldRHS = N0.getOperand(1);
10466
10467 // If both operands are bswap/bitreverse, ignore the multiuse
10468 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10469 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10470 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10471 OldRHS.getOperand(0));
10472 }
10473
10474 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10475 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10476 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10477 NewBitReorder);
10478 }
10479
10480 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10481 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10482 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10483 OldRHS.getOperand(0));
10484 }
10485 }
10486 return SDValue();
10487}
10488
10489SDValue DAGCombiner::visitSRA(SDNode *N) {
10490 SDValue N0 = N->getOperand(0);
10491 SDValue N1 = N->getOperand(1);
10492 if (SDValue V = DAG.simplifyShift(N0, N1))
10493 return V;
10494
10495 SDLoc DL(N);
10496 EVT VT = N0.getValueType();
10497 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10498
10499 // fold (sra c1, c2) -> (sra c1, c2)
10500 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10501 return C;
10502
10503 // Arithmetic shifting an all-sign-bit value is a no-op.
10504 // fold (sra 0, x) -> 0
10505 // fold (sra -1, x) -> -1
10506 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10507 return N0;
10508
10509 // fold vector ops
10510 if (VT.isVector())
10511 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10512 return FoldedVOp;
10513
10514 if (SDValue NewSel = foldBinOpIntoSelect(N))
10515 return NewSel;
10516
10518
10519 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10520 // clamp (add c1, c2) to max shift.
10521 if (N0.getOpcode() == ISD::SRA) {
10522 EVT ShiftVT = N1.getValueType();
10523 EVT ShiftSVT = ShiftVT.getScalarType();
10524 SmallVector<SDValue, 16> ShiftValues;
10525
10526 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10527 APInt c1 = LHS->getAPIntValue();
10528 APInt c2 = RHS->getAPIntValue();
10529 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10530 APInt Sum = c1 + c2;
10531 unsigned ShiftSum =
10532 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10533 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10534 return true;
10535 };
10536 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10537 SDValue ShiftValue;
10538 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10539 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10540 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10541 assert(ShiftValues.size() == 1 &&
10542 "Expected matchBinaryPredicate to return one element for "
10543 "SPLAT_VECTORs");
10544 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10545 } else
10546 ShiftValue = ShiftValues[0];
10547 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10548 }
10549 }
10550
10551 // fold (sra (shl X, m), (sub result_size, n))
10552 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10553 // result_size - n != m.
10554 // If truncate is free for the target sext(shl) is likely to result in better
10555 // code.
10556 if (N0.getOpcode() == ISD::SHL && N1C) {
10557 // Get the two constants of the shifts, CN0 = m, CN = n.
10558 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10559 if (N01C) {
10560 LLVMContext &Ctx = *DAG.getContext();
10561 // Determine what the truncate's result bitsize and type would be.
10562 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10563
10564 if (VT.isVector())
10565 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10566
10567 // Determine the residual right-shift amount.
10568 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10569
10570 // If the shift is not a no-op (in which case this should be just a sign
10571 // extend already), the truncated to type is legal, sign_extend is legal
10572 // on that type, and the truncate to that type is both legal and free,
10573 // perform the transform.
10574 if ((ShiftAmt > 0) &&
10577 TLI.isTruncateFree(VT, TruncVT)) {
10578 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10579 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10580 N0.getOperand(0), Amt);
10581 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10582 Shift);
10583 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10584 N->getValueType(0), Trunc);
10585 }
10586 }
10587 }
10588
10589 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10590 // sra (add (shl X, N1C), AddC), N1C -->
10591 // sext (add (trunc X to (width - N1C)), AddC')
10592 // sra (sub AddC, (shl X, N1C)), N1C -->
10593 // sext (sub AddC1',(trunc X to (width - N1C)))
10594 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10595 N0.hasOneUse()) {
10596 bool IsAdd = N0.getOpcode() == ISD::ADD;
10597 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10598 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10599 Shl.hasOneUse()) {
10600 // TODO: AddC does not need to be a splat.
10601 if (ConstantSDNode *AddC =
10602 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10603 // Determine what the truncate's type would be and ask the target if
10604 // that is a free operation.
10605 LLVMContext &Ctx = *DAG.getContext();
10606 unsigned ShiftAmt = N1C->getZExtValue();
10607 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10608 if (VT.isVector())
10609 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10610
10611 // TODO: The simple type check probably belongs in the default hook
10612 // implementation and/or target-specific overrides (because
10613 // non-simple types likely require masking when legalized), but
10614 // that restriction may conflict with other transforms.
10615 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10616 TLI.isTruncateFree(VT, TruncVT)) {
10617 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10618 SDValue ShiftC =
10619 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10620 TruncVT.getScalarSizeInBits()),
10621 DL, TruncVT);
10622 SDValue Add;
10623 if (IsAdd)
10624 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10625 else
10626 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10627 return DAG.getSExtOrTrunc(Add, DL, VT);
10628 }
10629 }
10630 }
10631 }
10632
10633 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10634 if (N1.getOpcode() == ISD::TRUNCATE &&
10635 N1.getOperand(0).getOpcode() == ISD::AND) {
10636 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10637 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10638 }
10639
10640 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10641 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10642 // if c1 is equal to the number of bits the trunc removes
10643 // TODO - support non-uniform vector shift amounts.
10644 if (N0.getOpcode() == ISD::TRUNCATE &&
10645 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10646 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10647 N0.getOperand(0).hasOneUse() &&
10648 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10649 SDValue N0Op0 = N0.getOperand(0);
10650 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10651 EVT LargeVT = N0Op0.getValueType();
10652 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10653 if (LargeShift->getAPIntValue() == TruncBits) {
10654 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10655 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10656 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10657 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10658 SDValue SRA =
10659 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10660 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10661 }
10662 }
10663 }
10664
10665 // Simplify, based on bits shifted out of the LHS.
10667 return SDValue(N, 0);
10668
10669 // If the sign bit is known to be zero, switch this to a SRL.
10670 if (DAG.SignBitIsZero(N0))
10671 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10672
10673 if (N1C && !N1C->isOpaque())
10674 if (SDValue NewSRA = visitShiftByConstant(N))
10675 return NewSRA;
10676
10677 // Try to transform this shift into a multiply-high if
10678 // it matches the appropriate pattern detected in combineShiftToMULH.
10679 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10680 return MULH;
10681
10682 // Attempt to convert a sra of a load into a narrower sign-extending load.
10683 if (SDValue NarrowLoad = reduceLoadWidth(N))
10684 return NarrowLoad;
10685
10686 if (SDValue AVG = foldShiftToAvg(N))
10687 return AVG;
10688
10689 return SDValue();
10690}
10691
10692SDValue DAGCombiner::visitSRL(SDNode *N) {
10693 SDValue N0 = N->getOperand(0);
10694 SDValue N1 = N->getOperand(1);
10695 if (SDValue V = DAG.simplifyShift(N0, N1))
10696 return V;
10697
10698 SDLoc DL(N);
10699 EVT VT = N0.getValueType();
10700 EVT ShiftVT = N1.getValueType();
10701 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10702
10703 // fold (srl c1, c2) -> c1 >>u c2
10704 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10705 return C;
10706
10707 // fold vector ops
10708 if (VT.isVector())
10709 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10710 return FoldedVOp;
10711
10712 if (SDValue NewSel = foldBinOpIntoSelect(N))
10713 return NewSel;
10714
10715 // if (srl x, c) is known to be zero, return 0
10717 if (N1C &&
10718 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10719 return DAG.getConstant(0, DL, VT);
10720
10721 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10722 if (N0.getOpcode() == ISD::SRL) {
10723 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10725 APInt c1 = LHS->getAPIntValue();
10726 APInt c2 = RHS->getAPIntValue();
10727 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10728 return (c1 + c2).uge(OpSizeInBits);
10729 };
10730 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10731 return DAG.getConstant(0, DL, VT);
10732
10733 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10735 APInt c1 = LHS->getAPIntValue();
10736 APInt c2 = RHS->getAPIntValue();
10737 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10738 return (c1 + c2).ult(OpSizeInBits);
10739 };
10740 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10741 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10742 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10743 }
10744 }
10745
10746 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10747 N0.getOperand(0).getOpcode() == ISD::SRL) {
10748 SDValue InnerShift = N0.getOperand(0);
10749 // TODO - support non-uniform vector shift amounts.
10750 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10751 uint64_t c1 = N001C->getZExtValue();
10752 uint64_t c2 = N1C->getZExtValue();
10753 EVT InnerShiftVT = InnerShift.getValueType();
10754 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10755 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10756 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10757 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10758 if (c1 + OpSizeInBits == InnerShiftSize) {
10759 if (c1 + c2 >= InnerShiftSize)
10760 return DAG.getConstant(0, DL, VT);
10761 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10762 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10763 InnerShift.getOperand(0), NewShiftAmt);
10764 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10765 }
10766 // In the more general case, we can clear the high bits after the shift:
10767 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10768 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10769 c1 + c2 < InnerShiftSize) {
10770 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10771 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10772 InnerShift.getOperand(0), NewShiftAmt);
10773 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10774 OpSizeInBits - c2),
10775 DL, InnerShiftVT);
10776 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10777 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10778 }
10779 }
10780 }
10781
10782 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10783 // (and (srl x, (sub c2, c1), MASK)
10784 if (N0.getOpcode() == ISD::SHL &&
10785 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10787 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10789 const APInt &LHSC = LHS->getAPIntValue();
10790 const APInt &RHSC = RHS->getAPIntValue();
10791 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10792 LHSC.getZExtValue() <= RHSC.getZExtValue();
10793 };
10794 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10795 /*AllowUndefs*/ false,
10796 /*AllowTypeMismatch*/ true)) {
10797 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10798 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10799 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10800 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10801 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10802 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10803 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10804 }
10805 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10806 /*AllowUndefs*/ false,
10807 /*AllowTypeMismatch*/ true)) {
10808 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10809 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10810 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10811 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10812 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10813 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10814 }
10815 }
10816
10817 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10818 // TODO - support non-uniform vector shift amounts.
10819 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10820 // Shifting in all undef bits?
10821 EVT SmallVT = N0.getOperand(0).getValueType();
10822 unsigned BitSize = SmallVT.getScalarSizeInBits();
10823 if (N1C->getAPIntValue().uge(BitSize))
10824 return DAG.getUNDEF(VT);
10825
10826 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10827 uint64_t ShiftAmt = N1C->getZExtValue();
10828 SDLoc DL0(N0);
10829 SDValue SmallShift =
10830 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
10831 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
10832 AddToWorklist(SmallShift.getNode());
10833 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10834 return DAG.getNode(ISD::AND, DL, VT,
10835 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10836 DAG.getConstant(Mask, DL, VT));
10837 }
10838 }
10839
10840 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10841 // bit, which is unmodified by sra.
10842 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10843 if (N0.getOpcode() == ISD::SRA)
10844 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10845 }
10846
10847 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10848 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10849 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10850 isPowerOf2_32(OpSizeInBits) &&
10851 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10852 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10853
10854 // If any of the input bits are KnownOne, then the input couldn't be all
10855 // zeros, thus the result of the srl will always be zero.
10856 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10857
10858 // If all of the bits input the to ctlz node are known to be zero, then
10859 // the result of the ctlz is "32" and the result of the shift is one.
10860 APInt UnknownBits = ~Known.Zero;
10861 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10862
10863 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10864 if (UnknownBits.isPowerOf2()) {
10865 // Okay, we know that only that the single bit specified by UnknownBits
10866 // could be set on input to the CTLZ node. If this bit is set, the SRL
10867 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10868 // to an SRL/XOR pair, which is likely to simplify more.
10869 unsigned ShAmt = UnknownBits.countr_zero();
10870 SDValue Op = N0.getOperand(0);
10871
10872 if (ShAmt) {
10873 SDLoc DL(N0);
10874 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10875 DAG.getShiftAmountConstant(ShAmt, VT, DL));
10876 AddToWorklist(Op.getNode());
10877 }
10878 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10879 }
10880 }
10881
10882 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10883 if (N1.getOpcode() == ISD::TRUNCATE &&
10884 N1.getOperand(0).getOpcode() == ISD::AND) {
10885 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10886 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10887 }
10888
10889 // fold operands of srl based on knowledge that the low bits are not
10890 // demanded.
10892 return SDValue(N, 0);
10893
10894 if (N1C && !N1C->isOpaque())
10895 if (SDValue NewSRL = visitShiftByConstant(N))
10896 return NewSRL;
10897
10898 // Attempt to convert a srl of a load into a narrower zero-extending load.
10899 if (SDValue NarrowLoad = reduceLoadWidth(N))
10900 return NarrowLoad;
10901
10902 // Here is a common situation. We want to optimize:
10903 //
10904 // %a = ...
10905 // %b = and i32 %a, 2
10906 // %c = srl i32 %b, 1
10907 // brcond i32 %c ...
10908 //
10909 // into
10910 //
10911 // %a = ...
10912 // %b = and %a, 2
10913 // %c = setcc eq %b, 0
10914 // brcond %c ...
10915 //
10916 // However when after the source operand of SRL is optimized into AND, the SRL
10917 // itself may not be optimized further. Look for it and add the BRCOND into
10918 // the worklist.
10919 //
10920 // The also tends to happen for binary operations when SimplifyDemandedBits
10921 // is involved.
10922 //
10923 // FIXME: This is unecessary if we process the DAG in topological order,
10924 // which we plan to do. This workaround can be removed once the DAG is
10925 // processed in topological order.
10926 if (N->hasOneUse()) {
10927 SDNode *User = *N->user_begin();
10928
10929 // Look pass the truncate.
10930 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
10931 User = *User->user_begin();
10932
10933 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
10934 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
10935 AddToWorklist(User);
10936 }
10937
10938 // Try to transform this shift into a multiply-high if
10939 // it matches the appropriate pattern detected in combineShiftToMULH.
10940 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10941 return MULH;
10942
10943 if (SDValue AVG = foldShiftToAvg(N))
10944 return AVG;
10945
10946 return SDValue();
10947}
10948
10949SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10950 EVT VT = N->getValueType(0);
10951 SDValue N0 = N->getOperand(0);
10952 SDValue N1 = N->getOperand(1);
10953 SDValue N2 = N->getOperand(2);
10954 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10955 unsigned BitWidth = VT.getScalarSizeInBits();
10956 SDLoc DL(N);
10957
10958 // fold (fshl N0, N1, 0) -> N0
10959 // fold (fshr N0, N1, 0) -> N1
10961 if (DAG.MaskedValueIsZero(
10962 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10963 return IsFSHL ? N0 : N1;
10964
10965 auto IsUndefOrZero = [](SDValue V) {
10966 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10967 };
10968
10969 // TODO - support non-uniform vector shift amounts.
10970 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10971 EVT ShAmtTy = N2.getValueType();
10972
10973 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10974 if (Cst->getAPIntValue().uge(BitWidth)) {
10975 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10976 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10977 DAG.getConstant(RotAmt, DL, ShAmtTy));
10978 }
10979
10980 unsigned ShAmt = Cst->getZExtValue();
10981 if (ShAmt == 0)
10982 return IsFSHL ? N0 : N1;
10983
10984 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10985 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10986 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10987 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10988 if (IsUndefOrZero(N0))
10989 return DAG.getNode(
10990 ISD::SRL, DL, VT, N1,
10991 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10992 if (IsUndefOrZero(N1))
10993 return DAG.getNode(
10994 ISD::SHL, DL, VT, N0,
10995 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10996
10997 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10998 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10999 // TODO - bigendian support once we have test coverage.
11000 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11001 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11002 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11003 !DAG.getDataLayout().isBigEndian()) {
11004 auto *LHS = dyn_cast<LoadSDNode>(N0);
11005 auto *RHS = dyn_cast<LoadSDNode>(N1);
11006 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11007 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11008 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
11009 ISD::isNON_EXTLoad(LHS)) {
11010 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11011 SDLoc DL(RHS);
11012 uint64_t PtrOff =
11013 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11014 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11015 unsigned Fast = 0;
11016 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11017 RHS->getAddressSpace(), NewAlign,
11018 RHS->getMemOperand()->getFlags(), &Fast) &&
11019 Fast) {
11020 SDValue NewPtr = DAG.getMemBasePlusOffset(
11021 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11022 AddToWorklist(NewPtr.getNode());
11023 SDValue Load = DAG.getLoad(
11024 VT, DL, RHS->getChain(), NewPtr,
11025 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11026 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11027 // Replace the old load's chain with the new load's chain.
11028 WorklistRemover DeadNodes(*this);
11029 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
11030 return Load;
11031 }
11032 }
11033 }
11034 }
11035 }
11036
11037 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11038 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11039 // iff We know the shift amount is in range.
11040 // TODO: when is it worth doing SUB(BW, N2) as well?
11041 if (isPowerOf2_32(BitWidth)) {
11042 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11043 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11044 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11045 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11046 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11047 }
11048
11049 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11050 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11051 // TODO: Investigate flipping this rotate if only one is legal.
11052 // If funnel shift is legal as well we might be better off avoiding
11053 // non-constant (BW - N2).
11054 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11055 if (N0 == N1 && hasOperation(RotOpc, VT))
11056 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11057
11058 // Simplify, based on bits shifted out of N0/N1.
11060 return SDValue(N, 0);
11061
11062 return SDValue();
11063}
11064
11065SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11066 SDValue N0 = N->getOperand(0);
11067 SDValue N1 = N->getOperand(1);
11068 if (SDValue V = DAG.simplifyShift(N0, N1))
11069 return V;
11070
11071 SDLoc DL(N);
11072 EVT VT = N0.getValueType();
11073
11074 // fold (*shlsat c1, c2) -> c1<<c2
11075 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11076 return C;
11077
11079
11080 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11081 // fold (sshlsat x, c) -> (shl x, c)
11082 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11083 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11084 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11085
11086 // fold (ushlsat x, c) -> (shl x, c)
11087 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11088 N1C->getAPIntValue().ule(
11090 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11091 }
11092
11093 return SDValue();
11094}
11095
11096// Given a ABS node, detect the following patterns:
11097// (ABS (SUB (EXTEND a), (EXTEND b))).
11098// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11099// Generates UABD/SABD instruction.
11100SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11101 EVT SrcVT = N->getValueType(0);
11102
11103 if (N->getOpcode() == ISD::TRUNCATE)
11104 N = N->getOperand(0).getNode();
11105
11106 if (N->getOpcode() != ISD::ABS)
11107 return SDValue();
11108
11109 EVT VT = N->getValueType(0);
11110 SDValue AbsOp1 = N->getOperand(0);
11111 SDValue Op0, Op1;
11112
11113 if (AbsOp1.getOpcode() != ISD::SUB)
11114 return SDValue();
11115
11116 Op0 = AbsOp1.getOperand(0);
11117 Op1 = AbsOp1.getOperand(1);
11118
11119 unsigned Opc0 = Op0.getOpcode();
11120
11121 // Check if the operands of the sub are (zero|sign)-extended.
11122 // TODO: Should we use ValueTracking instead?
11123 if (Opc0 != Op1.getOpcode() ||
11124 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11125 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11126 // fold (abs (sub nsw x, y)) -> abds(x, y)
11127 // Don't fold this for unsupported types as we lose the NSW handling.
11128 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11129 TLI.preferABDSToABSWithNSW(VT)) {
11130 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11131 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11132 }
11133 return SDValue();
11134 }
11135
11136 EVT VT0, VT1;
11137 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11138 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11139 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11140 } else {
11141 VT0 = Op0.getOperand(0).getValueType();
11142 VT1 = Op1.getOperand(0).getValueType();
11143 }
11144 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11145
11146 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11147 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11148 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11149 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11150 (VT1 == MaxVT || Op1->hasOneUse()) &&
11151 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11152 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11153 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11154 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11155 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11156 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11157 }
11158
11159 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11160 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11161 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11162 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11163 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11164 }
11165
11166 return SDValue();
11167}
11168
11169SDValue DAGCombiner::visitABS(SDNode *N) {
11170 SDValue N0 = N->getOperand(0);
11171 EVT VT = N->getValueType(0);
11172 SDLoc DL(N);
11173
11174 // fold (abs c1) -> c2
11175 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11176 return C;
11177 // fold (abs (abs x)) -> (abs x)
11178 if (N0.getOpcode() == ISD::ABS)
11179 return N0;
11180 // fold (abs x) -> x iff not-negative
11181 if (DAG.SignBitIsZero(N0))
11182 return N0;
11183
11184 if (SDValue ABD = foldABSToABD(N, DL))
11185 return ABD;
11186
11187 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11188 // iff zero_extend/truncate are free.
11189 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11190 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11191 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11192 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11193 hasOperation(ISD::ABS, ExtVT)) {
11194 return DAG.getNode(
11195 ISD::ZERO_EXTEND, DL, VT,
11196 DAG.getNode(ISD::ABS, DL, ExtVT,
11197 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11198 }
11199 }
11200
11201 return SDValue();
11202}
11203
11204SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11205 SDValue N0 = N->getOperand(0);
11206 EVT VT = N->getValueType(0);
11207 SDLoc DL(N);
11208
11209 // fold (bswap c1) -> c2
11210 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11211 return C;
11212 // fold (bswap (bswap x)) -> x
11213 if (N0.getOpcode() == ISD::BSWAP)
11214 return N0.getOperand(0);
11215
11216 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11217 // isn't supported, it will be expanded to bswap followed by a manual reversal
11218 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11219 // the two bswaps if the bitreverse gets expanded.
11220 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11221 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11222 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11223 }
11224
11225 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11226 // iff x >= bw/2 (i.e. lower half is known zero)
11227 unsigned BW = VT.getScalarSizeInBits();
11228 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11229 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11230 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11231 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11232 ShAmt->getZExtValue() >= (BW / 2) &&
11233 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11234 TLI.isTruncateFree(VT, HalfVT) &&
11235 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11236 SDValue Res = N0.getOperand(0);
11237 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11238 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11239 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11240 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11241 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11242 return DAG.getZExtOrTrunc(Res, DL, VT);
11243 }
11244 }
11245
11246 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11247 // inverse-shift-of-bswap:
11248 // bswap (X u<< C) --> (bswap X) u>> C
11249 // bswap (X u>> C) --> (bswap X) u<< C
11250 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11251 N0.hasOneUse()) {
11252 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11253 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11254 ShAmt->getZExtValue() % 8 == 0) {
11255 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11256 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11257 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11258 }
11259 }
11260
11261 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11262 return V;
11263
11264 return SDValue();
11265}
11266
11267SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11268 SDValue N0 = N->getOperand(0);
11269 EVT VT = N->getValueType(0);
11270 SDLoc DL(N);
11271
11272 // fold (bitreverse c1) -> c2
11273 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11274 return C;
11275
11276 // fold (bitreverse (bitreverse x)) -> x
11277 if (N0.getOpcode() == ISD::BITREVERSE)
11278 return N0.getOperand(0);
11279
11280 SDValue X, Y;
11281
11282 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11283 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11285 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11286
11287 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11288 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11290 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11291
11292 return SDValue();
11293}
11294
11295SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11296 SDValue N0 = N->getOperand(0);
11297 EVT VT = N->getValueType(0);
11298 SDLoc DL(N);
11299
11300 // fold (ctlz c1) -> c2
11301 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11302 return C;
11303
11304 // If the value is known never to be zero, switch to the undef version.
11305 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11306 if (DAG.isKnownNeverZero(N0))
11307 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11308
11309 return SDValue();
11310}
11311
11312SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11313 SDValue N0 = N->getOperand(0);
11314 EVT VT = N->getValueType(0);
11315 SDLoc DL(N);
11316
11317 // fold (ctlz_zero_undef c1) -> c2
11318 if (SDValue C =
11320 return C;
11321 return SDValue();
11322}
11323
11324SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11325 SDValue N0 = N->getOperand(0);
11326 EVT VT = N->getValueType(0);
11327 SDLoc DL(N);
11328
11329 // fold (cttz c1) -> c2
11330 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11331 return C;
11332
11333 // If the value is known never to be zero, switch to the undef version.
11334 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11335 if (DAG.isKnownNeverZero(N0))
11336 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11337
11338 return SDValue();
11339}
11340
11341SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11342 SDValue N0 = N->getOperand(0);
11343 EVT VT = N->getValueType(0);
11344 SDLoc DL(N);
11345
11346 // fold (cttz_zero_undef c1) -> c2
11347 if (SDValue C =
11349 return C;
11350 return SDValue();
11351}
11352
11353SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11354 SDValue N0 = N->getOperand(0);
11355 EVT VT = N->getValueType(0);
11356 unsigned NumBits = VT.getScalarSizeInBits();
11357 SDLoc DL(N);
11358
11359 // fold (ctpop c1) -> c2
11360 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11361 return C;
11362
11363 // If the source is being shifted, but doesn't affect any active bits,
11364 // then we can call CTPOP on the shift source directly.
11365 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11366 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11367 const APInt &Amt = AmtC->getAPIntValue();
11368 if (Amt.ult(NumBits)) {
11369 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11370 if ((N0.getOpcode() == ISD::SRL &&
11371 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11372 (N0.getOpcode() == ISD::SHL &&
11373 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11374 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11375 }
11376 }
11377 }
11378 }
11379
11380 // If the upper bits are known to be zero, then see if its profitable to
11381 // only count the lower bits.
11382 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11383 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11384 if (hasOperation(ISD::CTPOP, HalfVT) &&
11385 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11386 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11387 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11388 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11389 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11390 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11391 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11392 }
11393 }
11394 }
11395
11396 return SDValue();
11397}
11398
11400 SDValue RHS, const SDNodeFlags Flags,
11401 const TargetLowering &TLI) {
11402 EVT VT = LHS.getValueType();
11403 if (!VT.isFloatingPoint())
11404 return false;
11405
11406 const TargetOptions &Options = DAG.getTarget().Options;
11407
11408 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11410 (Flags.hasNoNaNs() ||
11411 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11412}
11413
11415 SDValue RHS, SDValue True, SDValue False,
11417 const TargetLowering &TLI,
11418 SelectionDAG &DAG) {
11419 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11420 switch (CC) {
11421 case ISD::SETOLT:
11422 case ISD::SETOLE:
11423 case ISD::SETLT:
11424 case ISD::SETLE:
11425 case ISD::SETULT:
11426 case ISD::SETULE: {
11427 // Since it's known never nan to get here already, either fminnum or
11428 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11429 // expanded in terms of it.
11430 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11431 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11432 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11433
11434 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11435 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11436 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11437 return SDValue();
11438 }
11439 case ISD::SETOGT:
11440 case ISD::SETOGE:
11441 case ISD::SETGT:
11442 case ISD::SETGE:
11443 case ISD::SETUGT:
11444 case ISD::SETUGE: {
11445 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11446 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11447 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11448
11449 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11450 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11451 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11452 return SDValue();
11453 }
11454 default:
11455 return SDValue();
11456 }
11457}
11458
11459SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
11460 const unsigned Opcode = N->getOpcode();
11461
11462 // Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11463 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11464 return SDValue();
11465
11466 unsigned FloorISD = 0;
11467 auto VT = N->getValueType(0);
11468 bool IsUnsigned = false;
11469
11470 // Decide wether signed or unsigned.
11471 switch (Opcode) {
11472 case ISD::SRA:
11473 if (!hasOperation(ISD::AVGFLOORS, VT))
11474 return SDValue();
11475 FloorISD = ISD::AVGFLOORS;
11476 break;
11477 case ISD::SRL:
11478 IsUnsigned = true;
11479 if (!hasOperation(ISD::AVGFLOORU, VT))
11480 return SDValue();
11481 FloorISD = ISD::AVGFLOORU;
11482 break;
11483 default:
11484 return SDValue();
11485 }
11486
11487 // Captured values.
11488 SDValue A, B, Add;
11489
11490 // Match floor average as it is common to both floor/ceil avgs.
11491 if (!sd_match(N, m_BinOp(Opcode,
11493 m_One())))
11494 return SDValue();
11495
11496 // Can't optimize adds that may wrap.
11497 if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap())
11498 return SDValue();
11499
11500 if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())
11501 return SDValue();
11502
11503 return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
11504}
11505
11506/// Generate Min/Max node
11507SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11508 SDValue RHS, SDValue True,
11509 SDValue False, ISD::CondCode CC) {
11510 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11511 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11512
11513 // If we can't directly match this, try to see if we can pull an fneg out of
11514 // the select.
11516 True, DAG, LegalOperations, ForCodeSize);
11517 if (!NegTrue)
11518 return SDValue();
11519
11520 HandleSDNode NegTrueHandle(NegTrue);
11521
11522 // Try to unfold an fneg from the select if we are comparing the negated
11523 // constant.
11524 //
11525 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11526 //
11527 // TODO: Handle fabs
11528 if (LHS == NegTrue) {
11529 // If we can't directly match this, try to see if we can pull an fneg out of
11530 // the select.
11532 RHS, DAG, LegalOperations, ForCodeSize);
11533 if (NegRHS) {
11534 HandleSDNode NegRHSHandle(NegRHS);
11535 if (NegRHS == False) {
11536 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11537 False, CC, TLI, DAG);
11538 if (Combined)
11539 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11540 }
11541 }
11542 }
11543
11544 return SDValue();
11545}
11546
11547/// If a (v)select has a condition value that is a sign-bit test, try to smear
11548/// the condition operand sign-bit across the value width and use it as a mask.
11550 SelectionDAG &DAG) {
11551 SDValue Cond = N->getOperand(0);
11552 SDValue C1 = N->getOperand(1);
11553 SDValue C2 = N->getOperand(2);
11555 return SDValue();
11556
11557 EVT VT = N->getValueType(0);
11558 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11559 VT != Cond.getOperand(0).getValueType())
11560 return SDValue();
11561
11562 // The inverted-condition + commuted-select variants of these patterns are
11563 // canonicalized to these forms in IR.
11564 SDValue X = Cond.getOperand(0);
11565 SDValue CondC = Cond.getOperand(1);
11566 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11567 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11569 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11570 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11571 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11572 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11573 }
11574 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11575 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11576 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11577 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11578 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11579 }
11580 return SDValue();
11581}
11582
11584 const TargetLowering &TLI) {
11585 if (!TLI.convertSelectOfConstantsToMath(VT))
11586 return false;
11587
11588 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11589 return true;
11591 return true;
11592
11593 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11594 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11595 return true;
11596 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11597 return true;
11598
11599 return false;
11600}
11601
11602SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11603 SDValue Cond = N->getOperand(0);
11604 SDValue N1 = N->getOperand(1);
11605 SDValue N2 = N->getOperand(2);
11606 EVT VT = N->getValueType(0);
11607 EVT CondVT = Cond.getValueType();
11608 SDLoc DL(N);
11609
11610 if (!VT.isInteger())
11611 return SDValue();
11612
11613 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11614 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11615 if (!C1 || !C2)
11616 return SDValue();
11617
11618 if (CondVT != MVT::i1 || LegalOperations) {
11619 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11620 // We can't do this reliably if integer based booleans have different contents
11621 // to floating point based booleans. This is because we can't tell whether we
11622 // have an integer-based boolean or a floating-point-based boolean unless we
11623 // can find the SETCC that produced it and inspect its operands. This is
11624 // fairly easy if C is the SETCC node, but it can potentially be
11625 // undiscoverable (or not reasonably discoverable). For example, it could be
11626 // in another basic block or it could require searching a complicated
11627 // expression.
11628 if (CondVT.isInteger() &&
11629 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11631 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11633 C1->isZero() && C2->isOne()) {
11634 SDValue NotCond =
11635 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11636 if (VT.bitsEq(CondVT))
11637 return NotCond;
11638 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11639 }
11640
11641 return SDValue();
11642 }
11643
11644 // Only do this before legalization to avoid conflicting with target-specific
11645 // transforms in the other direction (create a select from a zext/sext). There
11646 // is also a target-independent combine here in DAGCombiner in the other
11647 // direction for (select Cond, -1, 0) when the condition is not i1.
11648 assert(CondVT == MVT::i1 && !LegalOperations);
11649
11650 // select Cond, 1, 0 --> zext (Cond)
11651 if (C1->isOne() && C2->isZero())
11652 return DAG.getZExtOrTrunc(Cond, DL, VT);
11653
11654 // select Cond, -1, 0 --> sext (Cond)
11655 if (C1->isAllOnes() && C2->isZero())
11656 return DAG.getSExtOrTrunc(Cond, DL, VT);
11657
11658 // select Cond, 0, 1 --> zext (!Cond)
11659 if (C1->isZero() && C2->isOne()) {
11660 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11661 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11662 return NotCond;
11663 }
11664
11665 // select Cond, 0, -1 --> sext (!Cond)
11666 if (C1->isZero() && C2->isAllOnes()) {
11667 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11668 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11669 return NotCond;
11670 }
11671
11672 // Use a target hook because some targets may prefer to transform in the
11673 // other direction.
11675 return SDValue();
11676
11677 // For any constants that differ by 1, we can transform the select into
11678 // an extend and add.
11679 const APInt &C1Val = C1->getAPIntValue();
11680 const APInt &C2Val = C2->getAPIntValue();
11681
11682 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11683 if (C1Val - 1 == C2Val) {
11684 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11685 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11686 }
11687
11688 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11689 if (C1Val + 1 == C2Val) {
11690 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11691 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11692 }
11693
11694 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11695 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11696 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11697 SDValue ShAmtC =
11698 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11699 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11700 }
11701
11702 // select Cond, -1, C --> or (sext Cond), C
11703 if (C1->isAllOnes()) {
11704 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11705 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11706 }
11707
11708 // select Cond, C, -1 --> or (sext (not Cond)), C
11709 if (C2->isAllOnes()) {
11710 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11711 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11712 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11713 }
11714
11716 return V;
11717
11718 return SDValue();
11719}
11720
11721template <class MatchContextClass>
11723 SelectionDAG &DAG) {
11724 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11725 N->getOpcode() == ISD::VP_SELECT) &&
11726 "Expected a (v)(vp.)select");
11727 SDValue Cond = N->getOperand(0);
11728 SDValue T = N->getOperand(1), F = N->getOperand(2);
11729 EVT VT = N->getValueType(0);
11730 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11731 MatchContextClass matcher(DAG, TLI, N);
11732
11733 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11734 return SDValue();
11735
11736 // select Cond, Cond, F --> or Cond, freeze(F)
11737 // select Cond, 1, F --> or Cond, freeze(F)
11738 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11739 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
11740
11741 // select Cond, T, Cond --> and Cond, freeze(T)
11742 // select Cond, T, 0 --> and Cond, freeze(T)
11743 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11744 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
11745
11746 // select Cond, T, 1 --> or (not Cond), freeze(T)
11747 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11748 SDValue NotCond =
11749 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11750 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
11751 }
11752
11753 // select Cond, 0, F --> and (not Cond), freeze(F)
11754 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11755 SDValue NotCond =
11756 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11757 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
11758 }
11759
11760 return SDValue();
11761}
11762
11764 SDValue N0 = N->getOperand(0);
11765 SDValue N1 = N->getOperand(1);
11766 SDValue N2 = N->getOperand(2);
11767 EVT VT = N->getValueType(0);
11768 unsigned EltSizeInBits = VT.getScalarSizeInBits();
11769
11770 SDValue Cond0, Cond1;
11772 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
11773 m_CondCode(CC)))) ||
11774 VT != Cond0.getValueType())
11775 return SDValue();
11776
11777 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11778 // compare is inverted from that pattern ("Cond0 s> -1").
11779 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11780 ; // This is the pattern we are looking for.
11781 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11782 std::swap(N1, N2);
11783 else
11784 return SDValue();
11785
11786 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11787 if (isNullOrNullSplat(N2)) {
11788 SDLoc DL(N);
11789 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11790 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11791 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
11792 }
11793
11794 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11795 if (isAllOnesOrAllOnesSplat(N1)) {
11796 SDLoc DL(N);
11797 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11798 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11799 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
11800 }
11801
11802 // If we have to invert the sign bit mask, only do that transform if the
11803 // target has a bitwise 'and not' instruction (the invert is free).
11804 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11805 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11806 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11807 SDLoc DL(N);
11808 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11809 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11810 SDValue Not = DAG.getNOT(DL, Sra, VT);
11811 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
11812 }
11813
11814 // TODO: There's another pattern in this family, but it may require
11815 // implementing hasOrNot() to check for profitability:
11816 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11817
11818 return SDValue();
11819}
11820
11821// Match SELECTs with absolute difference patterns.
11822// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
11823// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
11824// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
11825// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
11826SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
11827 SDValue False, ISD::CondCode CC,
11828 const SDLoc &DL) {
11829 bool IsSigned = isSignedIntSetCC(CC);
11830 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
11831 EVT VT = LHS.getValueType();
11832
11833 if (LegalOperations && !hasOperation(ABDOpc, VT))
11834 return SDValue();
11835
11836 switch (CC) {
11837 case ISD::SETGT:
11838 case ISD::SETGE:
11839 case ISD::SETUGT:
11840 case ISD::SETUGE:
11841 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11842 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
11843 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11844 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11845 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11846 hasOperation(ABDOpc, VT))
11847 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11848 break;
11849 case ISD::SETLT:
11850 case ISD::SETLE:
11851 case ISD::SETULT:
11852 case ISD::SETULE:
11853 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11854 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
11855 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11856 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11857 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11858 hasOperation(ABDOpc, VT))
11859 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11860 break;
11861 default:
11862 break;
11863 }
11864
11865 return SDValue();
11866}
11867
11868SDValue DAGCombiner::visitSELECT(SDNode *N) {
11869 SDValue N0 = N->getOperand(0);
11870 SDValue N1 = N->getOperand(1);
11871 SDValue N2 = N->getOperand(2);
11872 EVT VT = N->getValueType(0);
11873 EVT VT0 = N0.getValueType();
11874 SDLoc DL(N);
11875 SDNodeFlags Flags = N->getFlags();
11876
11877 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11878 return V;
11879
11880 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
11881 return V;
11882
11883 // select (not Cond), N1, N2 -> select Cond, N2, N1
11884 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11885 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11886 SelectOp->setFlags(Flags);
11887 return SelectOp;
11888 }
11889
11890 if (SDValue V = foldSelectOfConstants(N))
11891 return V;
11892
11893 // If we can fold this based on the true/false value, do so.
11894 if (SimplifySelectOps(N, N1, N2))
11895 return SDValue(N, 0); // Don't revisit N.
11896
11897 if (VT0 == MVT::i1) {
11898 // The code in this block deals with the following 2 equivalences:
11899 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11900 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11901 // The target can specify its preferred form with the
11902 // shouldNormalizeToSelectSequence() callback. However we always transform
11903 // to the right anyway if we find the inner select exists in the DAG anyway
11904 // and we always transform to the left side if we know that we can further
11905 // optimize the combination of the conditions.
11906 bool normalizeToSequence =
11908 // select (and Cond0, Cond1), X, Y
11909 // -> select Cond0, (select Cond1, X, Y), Y
11910 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11911 SDValue Cond0 = N0->getOperand(0);
11912 SDValue Cond1 = N0->getOperand(1);
11913 SDValue InnerSelect =
11914 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11915 if (normalizeToSequence || !InnerSelect.use_empty())
11916 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11917 InnerSelect, N2, Flags);
11918 // Cleanup on failure.
11919 if (InnerSelect.use_empty())
11920 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11921 }
11922 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11923 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11924 SDValue Cond0 = N0->getOperand(0);
11925 SDValue Cond1 = N0->getOperand(1);
11926 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11927 Cond1, N1, N2, Flags);
11928 if (normalizeToSequence || !InnerSelect.use_empty())
11929 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11930 InnerSelect, Flags);
11931 // Cleanup on failure.
11932 if (InnerSelect.use_empty())
11933 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11934 }
11935
11936 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11937 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11938 SDValue N1_0 = N1->getOperand(0);
11939 SDValue N1_1 = N1->getOperand(1);
11940 SDValue N1_2 = N1->getOperand(2);
11941 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11942 // Create the actual and node if we can generate good code for it.
11943 if (!normalizeToSequence) {
11944 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11945 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11946 N2, Flags);
11947 }
11948 // Otherwise see if we can optimize the "and" to a better pattern.
11949 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11950 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11951 N2, Flags);
11952 }
11953 }
11954 }
11955 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11956 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11957 SDValue N2_0 = N2->getOperand(0);
11958 SDValue N2_1 = N2->getOperand(1);
11959 SDValue N2_2 = N2->getOperand(2);
11960 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11961 // Create the actual or node if we can generate good code for it.
11962 if (!normalizeToSequence) {
11963 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11964 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11965 N2_2, Flags);
11966 }
11967 // Otherwise see if we can optimize to a better pattern.
11968 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11969 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11970 N2_2, Flags);
11971 }
11972 }
11973
11974 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
11975 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11976 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
11977 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11978 N2.getOperand(1) == N1.getOperand(0) &&
11979 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11980 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
11981
11982 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
11983 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11984 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
11985 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11986 N2.getOperand(1) == N1.getOperand(0) &&
11987 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11988 return DAG.getNegative(
11989 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
11990 DL, VT);
11991 }
11992
11993 // Fold selects based on a setcc into other things, such as min/max/abs.
11994 if (N0.getOpcode() == ISD::SETCC) {
11995 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11996 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11997
11998 // select (fcmp lt x, y), x, y -> fminnum x, y
11999 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12000 //
12001 // This is OK if we don't care what happens if either operand is a NaN.
12002 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12003 if (SDValue FMinMax =
12004 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12005 return FMinMax;
12006
12007 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12008 // This is conservatively limited to pre-legal-operations to give targets
12009 // a chance to reverse the transform if they want to do that. Also, it is
12010 // unlikely that the pattern would be formed late, so it's probably not
12011 // worth going through the other checks.
12012 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12013 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12014 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12015 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12016 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12017 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12018 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12019 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12020 //
12021 // The IR equivalent of this transform would have this form:
12022 // %a = add %x, C
12023 // %c = icmp ugt %x, ~C
12024 // %r = select %c, -1, %a
12025 // =>
12026 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12027 // %u0 = extractvalue %u, 0
12028 // %u1 = extractvalue %u, 1
12029 // %r = select %u1, -1, %u0
12030 SDVTList VTs = DAG.getVTList(VT, VT0);
12031 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12032 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12033 }
12034 }
12035
12036 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12037 (!LegalOperations &&
12039 // Any flags available in a select/setcc fold will be on the setcc as they
12040 // migrated from fcmp
12041 Flags = N0->getFlags();
12042 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
12043 N2, N0.getOperand(2));
12044 SelectNode->setFlags(Flags);
12045 return SelectNode;
12046 }
12047
12048 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12049 return ABD;
12050
12051 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12052 return NewSel;
12053 }
12054
12055 if (!VT.isVector())
12056 if (SDValue BinOp = foldSelectOfBinops(N))
12057 return BinOp;
12058
12059 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12060 return R;
12061
12062 return SDValue();
12063}
12064
12065// This function assumes all the vselect's arguments are CONCAT_VECTOR
12066// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12068 SDLoc DL(N);
12069 SDValue Cond = N->getOperand(0);
12070 SDValue LHS = N->getOperand(1);
12071 SDValue RHS = N->getOperand(2);
12072 EVT VT = N->getValueType(0);
12073 int NumElems = VT.getVectorNumElements();
12074 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12075 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12076 Cond.getOpcode() == ISD::BUILD_VECTOR);
12077
12078 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12079 // binary ones here.
12080 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12081 return SDValue();
12082
12083 // We're sure we have an even number of elements due to the
12084 // concat_vectors we have as arguments to vselect.
12085 // Skip BV elements until we find one that's not an UNDEF
12086 // After we find an UNDEF element, keep looping until we get to half the
12087 // length of the BV and see if all the non-undef nodes are the same.
12088 ConstantSDNode *BottomHalf = nullptr;
12089 for (int i = 0; i < NumElems / 2; ++i) {
12090 if (Cond->getOperand(i)->isUndef())
12091 continue;
12092
12093 if (BottomHalf == nullptr)
12094 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12095 else if (Cond->getOperand(i).getNode() != BottomHalf)
12096 return SDValue();
12097 }
12098
12099 // Do the same for the second half of the BuildVector
12100 ConstantSDNode *TopHalf = nullptr;
12101 for (int i = NumElems / 2; i < NumElems; ++i) {
12102 if (Cond->getOperand(i)->isUndef())
12103 continue;
12104
12105 if (TopHalf == nullptr)
12106 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12107 else if (Cond->getOperand(i).getNode() != TopHalf)
12108 return SDValue();
12109 }
12110
12111 assert(TopHalf && BottomHalf &&
12112 "One half of the selector was all UNDEFs and the other was all the "
12113 "same value. This should have been addressed before this function.");
12114 return DAG.getNode(
12116 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12117 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12118}
12119
12120bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12121 SelectionDAG &DAG, const SDLoc &DL) {
12122
12123 // Only perform the transformation when existing operands can be reused.
12124 if (IndexIsScaled)
12125 return false;
12126
12127 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12128 return false;
12129
12130 EVT VT = BasePtr.getValueType();
12131
12132 if (SDValue SplatVal = DAG.getSplatValue(Index);
12133 SplatVal && !isNullConstant(SplatVal) &&
12134 SplatVal.getValueType() == VT) {
12135 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12136 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12137 return true;
12138 }
12139
12140 if (Index.getOpcode() != ISD::ADD)
12141 return false;
12142
12143 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12144 SplatVal && SplatVal.getValueType() == VT) {
12145 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12146 Index = Index.getOperand(1);
12147 return true;
12148 }
12149 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12150 SplatVal && SplatVal.getValueType() == VT) {
12151 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12152 Index = Index.getOperand(0);
12153 return true;
12154 }
12155 return false;
12156}
12157
12158// Fold sext/zext of index into index type.
12159bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12160 SelectionDAG &DAG) {
12161 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12162
12163 // It's always safe to look through zero extends.
12164 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12165 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12166 IndexType = ISD::UNSIGNED_SCALED;
12167 Index = Index.getOperand(0);
12168 return true;
12169 }
12170 if (ISD::isIndexTypeSigned(IndexType)) {
12171 IndexType = ISD::UNSIGNED_SCALED;
12172 return true;
12173 }
12174 }
12175
12176 // It's only safe to look through sign extends when Index is signed.
12177 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12178 ISD::isIndexTypeSigned(IndexType) &&
12179 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12180 Index = Index.getOperand(0);
12181 return true;
12182 }
12183
12184 return false;
12185}
12186
12187SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12188 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12189 SDValue Mask = MSC->getMask();
12190 SDValue Chain = MSC->getChain();
12191 SDValue Index = MSC->getIndex();
12192 SDValue Scale = MSC->getScale();
12193 SDValue StoreVal = MSC->getValue();
12194 SDValue BasePtr = MSC->getBasePtr();
12195 SDValue VL = MSC->getVectorLength();
12196 ISD::MemIndexType IndexType = MSC->getIndexType();
12197 SDLoc DL(N);
12198
12199 // Zap scatters with a zero mask.
12201 return Chain;
12202
12203 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12204 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12205 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12206 DL, Ops, MSC->getMemOperand(), IndexType);
12207 }
12208
12209 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12210 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12211 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12212 DL, Ops, MSC->getMemOperand(), IndexType);
12213 }
12214
12215 return SDValue();
12216}
12217
12218SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12219 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12220 SDValue Mask = MSC->getMask();
12221 SDValue Chain = MSC->getChain();
12222 SDValue Index = MSC->getIndex();
12223 SDValue Scale = MSC->getScale();
12224 SDValue StoreVal = MSC->getValue();
12225 SDValue BasePtr = MSC->getBasePtr();
12226 ISD::MemIndexType IndexType = MSC->getIndexType();
12227 SDLoc DL(N);
12228
12229 // Zap scatters with a zero mask.
12231 return Chain;
12232
12233 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12234 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12235 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12236 DL, Ops, MSC->getMemOperand(), IndexType,
12237 MSC->isTruncatingStore());
12238 }
12239
12240 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12241 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12242 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12243 DL, Ops, MSC->getMemOperand(), IndexType,
12244 MSC->isTruncatingStore());
12245 }
12246
12247 return SDValue();
12248}
12249
12250SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12251 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12252 SDValue Mask = MST->getMask();
12253 SDValue Chain = MST->getChain();
12254 SDValue Value = MST->getValue();
12255 SDValue Ptr = MST->getBasePtr();
12256 SDLoc DL(N);
12257
12258 // Zap masked stores with a zero mask.
12260 return Chain;
12261
12262 // Remove a masked store if base pointers and masks are equal.
12263 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12264 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12265 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12266 !MST->getBasePtr().isUndef() &&
12267 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12268 MST1->getMemoryVT().getStoreSize()) ||
12270 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12271 MST->getMemoryVT().getStoreSize())) {
12272 CombineTo(MST1, MST1->getChain());
12273 if (N->getOpcode() != ISD::DELETED_NODE)
12274 AddToWorklist(N);
12275 return SDValue(N, 0);
12276 }
12277 }
12278
12279 // If this is a masked load with an all ones mask, we can use a unmasked load.
12280 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12281 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12282 !MST->isCompressingStore() && !MST->isTruncatingStore())
12283 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12284 MST->getBasePtr(), MST->getPointerInfo(),
12285 MST->getOriginalAlign(),
12286 MST->getMemOperand()->getFlags(), MST->getAAInfo());
12287
12288 // Try transforming N to an indexed store.
12289 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12290 return SDValue(N, 0);
12291
12292 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12293 Value.getValueType().isInteger() &&
12294 (!isa<ConstantSDNode>(Value) ||
12295 !cast<ConstantSDNode>(Value)->isOpaque())) {
12296 APInt TruncDemandedBits =
12297 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12299
12300 // See if we can simplify the operation with
12301 // SimplifyDemandedBits, which only works if the value has a single use.
12302 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12303 // Re-visit the store if anything changed and the store hasn't been merged
12304 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12305 // node back to the worklist if necessary, but we also need to re-visit
12306 // the Store node itself.
12307 if (N->getOpcode() != ISD::DELETED_NODE)
12308 AddToWorklist(N);
12309 return SDValue(N, 0);
12310 }
12311 }
12312
12313 // If this is a TRUNC followed by a masked store, fold this into a masked
12314 // truncating store. We can do this even if this is already a masked
12315 // truncstore.
12316 // TODO: Try combine to masked compress store if possiable.
12317 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12318 MST->isUnindexed() && !MST->isCompressingStore() &&
12319 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12320 MST->getMemoryVT(), LegalOperations)) {
12321 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12322 Value.getOperand(0).getValueType());
12323 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12324 MST->getOffset(), Mask, MST->getMemoryVT(),
12325 MST->getMemOperand(), MST->getAddressingMode(),
12326 /*IsTruncating=*/true);
12327 }
12328
12329 return SDValue();
12330}
12331
12332SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12333 auto *SST = cast<VPStridedStoreSDNode>(N);
12334 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12335 // Combine strided stores with unit-stride to a regular VP store.
12336 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12337 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12338 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12339 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12340 SST->getVectorLength(), SST->getMemoryVT(),
12341 SST->getMemOperand(), SST->getAddressingMode(),
12342 SST->isTruncatingStore(), SST->isCompressingStore());
12343 }
12344 return SDValue();
12345}
12346
12347SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12348 SDLoc DL(N);
12349 SDValue Vec = N->getOperand(0);
12350 SDValue Mask = N->getOperand(1);
12351 SDValue Passthru = N->getOperand(2);
12352 EVT VecVT = Vec.getValueType();
12353
12354 bool HasPassthru = !Passthru.isUndef();
12355
12356 APInt SplatVal;
12357 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12358 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12359
12360 if (Vec.isUndef() || Mask.isUndef())
12361 return Passthru;
12362
12363 // No need for potentially expensive compress if the mask is constant.
12366 EVT ScalarVT = VecVT.getVectorElementType();
12367 unsigned NumSelected = 0;
12368 unsigned NumElmts = VecVT.getVectorNumElements();
12369 for (unsigned I = 0; I < NumElmts; ++I) {
12370 SDValue MaskI = Mask.getOperand(I);
12371 // We treat undef mask entries as "false".
12372 if (MaskI.isUndef())
12373 continue;
12374
12375 if (TLI.isConstTrueVal(MaskI)) {
12376 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12377 DAG.getVectorIdxConstant(I, DL));
12378 Ops.push_back(VecI);
12379 NumSelected++;
12380 }
12381 }
12382 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12383 SDValue Val =
12384 HasPassthru
12385 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12386 DAG.getVectorIdxConstant(Rest, DL))
12387 : DAG.getUNDEF(ScalarVT);
12388 Ops.push_back(Val);
12389 }
12390 return DAG.getBuildVector(VecVT, DL, Ops);
12391 }
12392
12393 return SDValue();
12394}
12395
12396SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12397 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12398 SDValue Mask = MGT->getMask();
12399 SDValue Chain = MGT->getChain();
12400 SDValue Index = MGT->getIndex();
12401 SDValue Scale = MGT->getScale();
12402 SDValue BasePtr = MGT->getBasePtr();
12403 SDValue VL = MGT->getVectorLength();
12404 ISD::MemIndexType IndexType = MGT->getIndexType();
12405 SDLoc DL(N);
12406
12407 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12408 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12409 return DAG.getGatherVP(
12410 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12411 Ops, MGT->getMemOperand(), IndexType);
12412 }
12413
12414 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12415 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12416 return DAG.getGatherVP(
12417 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12418 Ops, MGT->getMemOperand(), IndexType);
12419 }
12420
12421 return SDValue();
12422}
12423
12424SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12425 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12426 SDValue Mask = MGT->getMask();
12427 SDValue Chain = MGT->getChain();
12428 SDValue Index = MGT->getIndex();
12429 SDValue Scale = MGT->getScale();
12430 SDValue PassThru = MGT->getPassThru();
12431 SDValue BasePtr = MGT->getBasePtr();
12432 ISD::MemIndexType IndexType = MGT->getIndexType();
12433 SDLoc DL(N);
12434
12435 // Zap gathers with a zero mask.
12437 return CombineTo(N, PassThru, MGT->getChain());
12438
12439 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12440 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12441 return DAG.getMaskedGather(
12442 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12443 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12444 }
12445
12446 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12447 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12448 return DAG.getMaskedGather(
12449 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12450 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12451 }
12452
12453 return SDValue();
12454}
12455
12456SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12457 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12458 SDValue Mask = MLD->getMask();
12459 SDLoc DL(N);
12460
12461 // Zap masked loads with a zero mask.
12463 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12464
12465 // If this is a masked load with an all ones mask, we can use a unmasked load.
12466 // FIXME: Can we do this for indexed, expanding, or extending loads?
12467 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12468 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12469 SDValue NewLd = DAG.getLoad(
12470 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12471 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12472 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12473 return CombineTo(N, NewLd, NewLd.getValue(1));
12474 }
12475
12476 // Try transforming N to an indexed load.
12477 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12478 return SDValue(N, 0);
12479
12480 return SDValue();
12481}
12482
12483SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12484 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12485 SDValue Chain = HG->getChain();
12486 SDValue Inc = HG->getInc();
12487 SDValue Mask = HG->getMask();
12488 SDValue BasePtr = HG->getBasePtr();
12489 SDValue Index = HG->getIndex();
12490 SDLoc DL(HG);
12491
12492 EVT MemVT = HG->getMemoryVT();
12493 MachineMemOperand *MMO = HG->getMemOperand();
12494 ISD::MemIndexType IndexType = HG->getIndexType();
12495
12497 return Chain;
12498
12499 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12500 HG->getScale(), HG->getIntID()};
12501 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
12502 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12503 MMO, IndexType);
12504
12505 EVT DataVT = Index.getValueType();
12506 if (refineIndexType(Index, IndexType, DataVT, DAG))
12507 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12508 MMO, IndexType);
12509 return SDValue();
12510}
12511
12512SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12513 auto *SLD = cast<VPStridedLoadSDNode>(N);
12514 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12515 // Combine strided loads with unit-stride to a regular VP load.
12516 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12517 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12518 SDValue NewLd = DAG.getLoadVP(
12519 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12520 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12521 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12522 SLD->getMemOperand(), SLD->isExpandingLoad());
12523 return CombineTo(N, NewLd, NewLd.getValue(1));
12524 }
12525 return SDValue();
12526}
12527
12528/// A vector select of 2 constant vectors can be simplified to math/logic to
12529/// avoid a variable select instruction and possibly avoid constant loads.
12530SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12531 SDValue Cond = N->getOperand(0);
12532 SDValue N1 = N->getOperand(1);
12533 SDValue N2 = N->getOperand(2);
12534 EVT VT = N->getValueType(0);
12535 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12539 return SDValue();
12540
12541 // Check if we can use the condition value to increment/decrement a single
12542 // constant value. This simplifies a select to an add and removes a constant
12543 // load/materialization from the general case.
12544 bool AllAddOne = true;
12545 bool AllSubOne = true;
12546 unsigned Elts = VT.getVectorNumElements();
12547 for (unsigned i = 0; i != Elts; ++i) {
12548 SDValue N1Elt = N1.getOperand(i);
12549 SDValue N2Elt = N2.getOperand(i);
12550 if (N1Elt.isUndef() || N2Elt.isUndef())
12551 continue;
12552 if (N1Elt.getValueType() != N2Elt.getValueType()) {
12553 AllAddOne = false;
12554 AllSubOne = false;
12555 break;
12556 }
12557
12558 const APInt &C1 = N1Elt->getAsAPIntVal();
12559 const APInt &C2 = N2Elt->getAsAPIntVal();
12560 if (C1 != C2 + 1)
12561 AllAddOne = false;
12562 if (C1 != C2 - 1)
12563 AllSubOne = false;
12564 }
12565
12566 // Further simplifications for the extra-special cases where the constants are
12567 // all 0 or all -1 should be implemented as folds of these patterns.
12568 SDLoc DL(N);
12569 if (AllAddOne || AllSubOne) {
12570 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12571 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12572 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12573 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12574 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12575 }
12576
12577 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12578 APInt Pow2C;
12579 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12580 isNullOrNullSplat(N2)) {
12581 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12582 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12583 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12584 }
12585
12587 return V;
12588
12589 // The general case for select-of-constants:
12590 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12591 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12592 // leave that to a machine-specific pass.
12593 return SDValue();
12594}
12595
12596SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12597 SDValue N0 = N->getOperand(0);
12598 SDValue N1 = N->getOperand(1);
12599 SDValue N2 = N->getOperand(2);
12600 SDLoc DL(N);
12601
12602 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12603 return V;
12604
12605 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
12606 return V;
12607
12608 return SDValue();
12609}
12610
12611SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12612 SDValue N0 = N->getOperand(0);
12613 SDValue N1 = N->getOperand(1);
12614 SDValue N2 = N->getOperand(2);
12615 EVT VT = N->getValueType(0);
12616 SDLoc DL(N);
12617
12618 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12619 return V;
12620
12621 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12622 return V;
12623
12624 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12625 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12626 return DAG.getSelect(DL, VT, F, N2, N1);
12627
12628 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12629 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12632 TLI.getBooleanContents(N0.getValueType()) ==
12634 return DAG.getNode(
12635 ISD::ADD, DL, N1.getValueType(), N2,
12636 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12637 }
12638
12639 // Canonicalize integer abs.
12640 // vselect (setg[te] X, 0), X, -X ->
12641 // vselect (setgt X, -1), X, -X ->
12642 // vselect (setl[te] X, 0), -X, X ->
12643 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12644 if (N0.getOpcode() == ISD::SETCC) {
12645 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12646 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12647 bool isAbs = false;
12648 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12649
12650 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12651 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12652 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12654 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12655 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12657
12658 if (isAbs) {
12660 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12661
12662 SDValue Shift = DAG.getNode(
12663 ISD::SRA, DL, VT, LHS,
12664 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12665 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12666 AddToWorklist(Shift.getNode());
12667 AddToWorklist(Add.getNode());
12668 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12669 }
12670
12671 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12672 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12673 //
12674 // This is OK if we don't care about what happens if either operand is a
12675 // NaN.
12676 //
12677 if (N0.hasOneUse() &&
12678 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12679 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12680 return FMinMax;
12681 }
12682
12683 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12684 return S;
12685 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12686 return S;
12687
12688 // If this select has a condition (setcc) with narrower operands than the
12689 // select, try to widen the compare to match the select width.
12690 // TODO: This should be extended to handle any constant.
12691 // TODO: This could be extended to handle non-loading patterns, but that
12692 // requires thorough testing to avoid regressions.
12693 if (isNullOrNullSplat(RHS)) {
12694 EVT NarrowVT = LHS.getValueType();
12696 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12697 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12698 unsigned WideWidth = WideVT.getScalarSizeInBits();
12699 bool IsSigned = isSignedIntSetCC(CC);
12700 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12701 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12702 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12703 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12704 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12705 // Both compare operands can be widened for free. The LHS can use an
12706 // extended load, and the RHS is a constant:
12707 // vselect (ext (setcc load(X), C)), N1, N2 -->
12708 // vselect (setcc extload(X), C'), N1, N2
12709 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12710 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12711 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12712 EVT WideSetCCVT = getSetCCResultType(WideVT);
12713 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12714 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12715 }
12716 }
12717
12718 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
12719 return ABD;
12720
12721 // Match VSELECTs into add with unsigned saturation.
12722 if (hasOperation(ISD::UADDSAT, VT)) {
12723 // Check if one of the arms of the VSELECT is vector with all bits set.
12724 // If it's on the left side invert the predicate to simplify logic below.
12725 SDValue Other;
12726 ISD::CondCode SatCC = CC;
12728 Other = N2;
12729 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12730 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12731 Other = N1;
12732 }
12733
12734 if (Other && Other.getOpcode() == ISD::ADD) {
12735 SDValue CondLHS = LHS, CondRHS = RHS;
12736 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12737
12738 // Canonicalize condition operands.
12739 if (SatCC == ISD::SETUGE) {
12740 std::swap(CondLHS, CondRHS);
12741 SatCC = ISD::SETULE;
12742 }
12743
12744 // We can test against either of the addition operands.
12745 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12746 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12747 if (SatCC == ISD::SETULE && Other == CondRHS &&
12748 (OpLHS == CondLHS || OpRHS == CondLHS))
12749 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12750
12751 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12752 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12753 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12754 CondLHS == OpLHS) {
12755 // If the RHS is a constant we have to reverse the const
12756 // canonicalization.
12757 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12758 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12759 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12760 };
12761 if (SatCC == ISD::SETULE &&
12762 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12763 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12764 }
12765 }
12766 }
12767
12768 // Match VSELECTs into sub with unsigned saturation.
12769 if (hasOperation(ISD::USUBSAT, VT)) {
12770 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12771 // the left side invert the predicate to simplify logic below.
12772 SDValue Other;
12773 ISD::CondCode SatCC = CC;
12775 Other = N2;
12776 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12778 Other = N1;
12779 }
12780
12781 // zext(x) >= y ? trunc(zext(x) - y) : 0
12782 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12783 // zext(x) > y ? trunc(zext(x) - y) : 0
12784 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12785 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12786 Other.getOperand(0).getOpcode() == ISD::SUB &&
12787 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12788 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12789 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12790 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12791 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12792 DAG, DL))
12793 return R;
12794 }
12795
12796 if (Other && Other.getNumOperands() == 2) {
12797 SDValue CondRHS = RHS;
12798 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12799
12800 if (OpLHS == LHS) {
12801 // Look for a general sub with unsigned saturation first.
12802 // x >= y ? x-y : 0 --> usubsat x, y
12803 // x > y ? x-y : 0 --> usubsat x, y
12804 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12805 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12806 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12807
12808 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12809 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12810 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12811 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12812 // If the RHS is a constant we have to reverse the const
12813 // canonicalization.
12814 // x > C-1 ? x+-C : 0 --> usubsat x, C
12815 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12816 return (!Op && !Cond) ||
12817 (Op && Cond &&
12818 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12819 };
12820 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12821 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12822 /*AllowUndefs*/ true)) {
12823 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12824 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12825 }
12826
12827 // Another special case: If C was a sign bit, the sub has been
12828 // canonicalized into a xor.
12829 // FIXME: Would it be better to use computeKnownBits to
12830 // determine whether it's safe to decanonicalize the xor?
12831 // x s< 0 ? x^C : 0 --> usubsat x, C
12832 APInt SplatValue;
12833 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12834 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12836 SplatValue.isSignMask()) {
12837 // Note that we have to rebuild the RHS constant here to
12838 // ensure we don't rely on particular values of undef lanes.
12839 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12840 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12841 }
12842 }
12843 }
12844 }
12845 }
12846 }
12847 }
12848
12849 if (SimplifySelectOps(N, N1, N2))
12850 return SDValue(N, 0); // Don't revisit N.
12851
12852 // Fold (vselect all_ones, N1, N2) -> N1
12854 return N1;
12855 // Fold (vselect all_zeros, N1, N2) -> N2
12857 return N2;
12858
12859 // The ConvertSelectToConcatVector function is assuming both the above
12860 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12861 // and addressed.
12862 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12865 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12866 return CV;
12867 }
12868
12869 if (SDValue V = foldVSelectOfConstants(N))
12870 return V;
12871
12872 if (hasOperation(ISD::SRA, VT))
12874 return V;
12875
12877 return SDValue(N, 0);
12878
12879 return SDValue();
12880}
12881
12882SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12883 SDValue N0 = N->getOperand(0);
12884 SDValue N1 = N->getOperand(1);
12885 SDValue N2 = N->getOperand(2);
12886 SDValue N3 = N->getOperand(3);
12887 SDValue N4 = N->getOperand(4);
12888 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12889 SDLoc DL(N);
12890
12891 // fold select_cc lhs, rhs, x, x, cc -> x
12892 if (N2 == N3)
12893 return N2;
12894
12895 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12896 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12897 isNullConstant(N1))
12898 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
12899
12900 // Determine if the condition we're dealing with is constant
12901 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12902 CC, DL, false)) {
12903 AddToWorklist(SCC.getNode());
12904
12905 // cond always true -> true val
12906 // cond always false -> false val
12907 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12908 return SCCC->isZero() ? N3 : N2;
12909
12910 // When the condition is UNDEF, just return the first operand. This is
12911 // coherent the DAG creation, no setcc node is created in this case
12912 if (SCC->isUndef())
12913 return N2;
12914
12915 // Fold to a simpler select_cc
12916 if (SCC.getOpcode() == ISD::SETCC) {
12917 SDValue SelectOp =
12918 DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0),
12919 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12920 SelectOp->setFlags(SCC->getFlags());
12921 return SelectOp;
12922 }
12923 }
12924
12925 // If we can fold this based on the true/false value, do so.
12926 if (SimplifySelectOps(N, N2, N3))
12927 return SDValue(N, 0); // Don't revisit N.
12928
12929 // fold select_cc into other things, such as min/max/abs
12930 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
12931}
12932
12933SDValue DAGCombiner::visitSETCC(SDNode *N) {
12934 // setcc is very commonly used as an argument to brcond. This pattern
12935 // also lend itself to numerous combines and, as a result, it is desired
12936 // we keep the argument to a brcond as a setcc as much as possible.
12937 bool PreferSetCC =
12938 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
12939
12940 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12941 EVT VT = N->getValueType(0);
12942 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12943 SDLoc DL(N);
12944
12945 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
12946 // If we prefer to have a setcc, and we don't, we'll try our best to
12947 // recreate one using rebuildSetCC.
12948 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12949 SDValue NewSetCC = rebuildSetCC(Combined);
12950
12951 // We don't have anything interesting to combine to.
12952 if (NewSetCC.getNode() == N)
12953 return SDValue();
12954
12955 if (NewSetCC)
12956 return NewSetCC;
12957 }
12958 return Combined;
12959 }
12960
12961 // Optimize
12962 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12963 // or
12964 // 2) (icmp eq/ne X, (rotate X, C1))
12965 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12966 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12967 // Then:
12968 // If C1 is a power of 2, then the rotate and shift+and versions are
12969 // equivilent, so we can interchange them depending on target preference.
12970 // Otherwise, if we have the shift+and version we can interchange srl/shl
12971 // which inturn affects the constant C0. We can use this to get better
12972 // constants again determined by target preference.
12973 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12974 auto IsAndWithShift = [](SDValue A, SDValue B) {
12975 return A.getOpcode() == ISD::AND &&
12976 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12977 A.getOperand(0) == B.getOperand(0);
12978 };
12979 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12980 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12981 B.getOperand(0) == A;
12982 };
12983 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12984 bool IsRotate = false;
12985
12986 // Find either shift+and or rotate pattern.
12987 if (IsAndWithShift(N0, N1)) {
12988 AndOrOp = N0;
12989 ShiftOrRotate = N1;
12990 } else if (IsAndWithShift(N1, N0)) {
12991 AndOrOp = N1;
12992 ShiftOrRotate = N0;
12993 } else if (IsRotateWithOp(N0, N1)) {
12994 IsRotate = true;
12995 AndOrOp = N0;
12996 ShiftOrRotate = N1;
12997 } else if (IsRotateWithOp(N1, N0)) {
12998 IsRotate = true;
12999 AndOrOp = N1;
13000 ShiftOrRotate = N0;
13001 }
13002
13003 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13004 (IsRotate || AndOrOp.hasOneUse())) {
13005 EVT OpVT = N0.getValueType();
13006 // Get constant shift/rotate amount and possibly mask (if its shift+and
13007 // variant).
13008 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13009 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13010 /*AllowTrunc*/ false);
13011 if (CNode == nullptr)
13012 return std::nullopt;
13013 return CNode->getAPIntValue();
13014 };
13015 std::optional<APInt> AndCMask =
13016 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13017 std::optional<APInt> ShiftCAmt =
13018 GetAPIntValue(ShiftOrRotate.getOperand(1));
13019 unsigned NumBits = OpVT.getScalarSizeInBits();
13020
13021 // We found constants.
13022 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13023 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13024 // Check that the constants meet the constraints.
13025 bool CanTransform = IsRotate;
13026 if (!CanTransform) {
13027 // Check that mask and shift compliment eachother
13028 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13029 // Check that we are comparing all bits
13030 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13031 // Check that the and mask is correct for the shift
13032 CanTransform &=
13033 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13034 }
13035
13036 // See if target prefers another shift/rotate opcode.
13037 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13038 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13039 // Transform is valid and we have a new preference.
13040 if (CanTransform && NewShiftOpc != ShiftOpc) {
13041 SDValue NewShiftOrRotate =
13042 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13043 ShiftOrRotate.getOperand(1));
13044 SDValue NewAndOrOp = SDValue();
13045
13046 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13047 APInt NewMask =
13048 NewShiftOpc == ISD::SHL
13049 ? APInt::getHighBitsSet(NumBits,
13050 NumBits - ShiftCAmt->getZExtValue())
13051 : APInt::getLowBitsSet(NumBits,
13052 NumBits - ShiftCAmt->getZExtValue());
13053 NewAndOrOp =
13054 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13055 DAG.getConstant(NewMask, DL, OpVT));
13056 } else {
13057 NewAndOrOp = ShiftOrRotate.getOperand(0);
13058 }
13059
13060 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13061 }
13062 }
13063 }
13064 }
13065 return SDValue();
13066}
13067
13068SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13069 SDValue LHS = N->getOperand(0);
13070 SDValue RHS = N->getOperand(1);
13071 SDValue Carry = N->getOperand(2);
13072 SDValue Cond = N->getOperand(3);
13073
13074 // If Carry is false, fold to a regular SETCC.
13075 if (isNullConstant(Carry))
13076 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13077
13078 return SDValue();
13079}
13080
13081/// Check if N satisfies:
13082/// N is used once.
13083/// N is a Load.
13084/// The load is compatible with ExtOpcode. It means
13085/// If load has explicit zero/sign extension, ExpOpcode must have the same
13086/// extension.
13087/// Otherwise returns true.
13088static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13089 if (!N.hasOneUse())
13090 return false;
13091
13092 if (!isa<LoadSDNode>(N))
13093 return false;
13094
13095 LoadSDNode *Load = cast<LoadSDNode>(N);
13096 ISD::LoadExtType LoadExt = Load->getExtensionType();
13097 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13098 return true;
13099
13100 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13101 // extension.
13102 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13103 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13104 return false;
13105
13106 return true;
13107}
13108
13109/// Fold
13110/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13111/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13112/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13113/// This function is called by the DAGCombiner when visiting sext/zext/aext
13114/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13116 SelectionDAG &DAG, const SDLoc &DL,
13117 CombineLevel Level) {
13118 unsigned Opcode = N->getOpcode();
13119 SDValue N0 = N->getOperand(0);
13120 EVT VT = N->getValueType(0);
13121 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13122 Opcode == ISD::ANY_EXTEND) &&
13123 "Expected EXTEND dag node in input!");
13124
13125 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13126 !N0.hasOneUse())
13127 return SDValue();
13128
13129 SDValue Op1 = N0->getOperand(1);
13130 SDValue Op2 = N0->getOperand(2);
13131 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13132 return SDValue();
13133
13134 auto ExtLoadOpcode = ISD::EXTLOAD;
13135 if (Opcode == ISD::SIGN_EXTEND)
13136 ExtLoadOpcode = ISD::SEXTLOAD;
13137 else if (Opcode == ISD::ZERO_EXTEND)
13138 ExtLoadOpcode = ISD::ZEXTLOAD;
13139
13140 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13141 // Combine2), so we should conservatively check the OperationAction.
13142 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13143 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13144 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13145 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13146 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13148 return SDValue();
13149
13150 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13151 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13152 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13153}
13154
13155/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13156/// a build_vector of constants.
13157/// This function is called by the DAGCombiner when visiting sext/zext/aext
13158/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13159/// Vector extends are not folded if operations are legal; this is to
13160/// avoid introducing illegal build_vector dag nodes.
13162 const TargetLowering &TLI,
13163 SelectionDAG &DAG, bool LegalTypes) {
13164 unsigned Opcode = N->getOpcode();
13165 SDValue N0 = N->getOperand(0);
13166 EVT VT = N->getValueType(0);
13167
13168 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13169 "Expected EXTEND dag node in input!");
13170
13171 // fold (sext c1) -> c1
13172 // fold (zext c1) -> c1
13173 // fold (aext c1) -> c1
13174 if (isa<ConstantSDNode>(N0))
13175 return DAG.getNode(Opcode, DL, VT, N0);
13176
13177 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13178 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13179 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13180 if (N0->getOpcode() == ISD::SELECT) {
13181 SDValue Op1 = N0->getOperand(1);
13182 SDValue Op2 = N0->getOperand(2);
13183 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13184 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13185 // For any_extend, choose sign extension of the constants to allow a
13186 // possible further transform to sign_extend_inreg.i.e.
13187 //
13188 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13189 // t2: i64 = any_extend t1
13190 // -->
13191 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13192 // -->
13193 // t4: i64 = sign_extend_inreg t3
13194 unsigned FoldOpc = Opcode;
13195 if (FoldOpc == ISD::ANY_EXTEND)
13196 FoldOpc = ISD::SIGN_EXTEND;
13197 return DAG.getSelect(DL, VT, N0->getOperand(0),
13198 DAG.getNode(FoldOpc, DL, VT, Op1),
13199 DAG.getNode(FoldOpc, DL, VT, Op2));
13200 }
13201 }
13202
13203 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13204 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13205 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13206 EVT SVT = VT.getScalarType();
13207 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13209 return SDValue();
13210
13211 // We can fold this node into a build_vector.
13212 unsigned VTBits = SVT.getSizeInBits();
13213 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13215 unsigned NumElts = VT.getVectorNumElements();
13216
13217 for (unsigned i = 0; i != NumElts; ++i) {
13218 SDValue Op = N0.getOperand(i);
13219 if (Op.isUndef()) {
13220 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13221 Elts.push_back(DAG.getUNDEF(SVT));
13222 else
13223 Elts.push_back(DAG.getConstant(0, DL, SVT));
13224 continue;
13225 }
13226
13227 SDLoc DL(Op);
13228 // Get the constant value and if needed trunc it to the size of the type.
13229 // Nodes like build_vector might have constants wider than the scalar type.
13230 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13231 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13232 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13233 else
13234 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13235 }
13236
13237 return DAG.getBuildVector(VT, DL, Elts);
13238}
13239
13240// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13241// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13242// transformation. Returns true if extension are possible and the above
13243// mentioned transformation is profitable.
13245 unsigned ExtOpc,
13246 SmallVectorImpl<SDNode *> &ExtendNodes,
13247 const TargetLowering &TLI) {
13248 bool HasCopyToRegUses = false;
13249 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13250 for (SDUse &Use : N0->uses()) {
13251 SDNode *User = Use.getUser();
13252 if (User == N)
13253 continue;
13254 if (Use.getResNo() != N0.getResNo())
13255 continue;
13256 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13257 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13258 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13259 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13260 // Sign bits will be lost after a zext.
13261 return false;
13262 bool Add = false;
13263 for (unsigned i = 0; i != 2; ++i) {
13264 SDValue UseOp = User->getOperand(i);
13265 if (UseOp == N0)
13266 continue;
13267 if (!isa<ConstantSDNode>(UseOp))
13268 return false;
13269 Add = true;
13270 }
13271 if (Add)
13272 ExtendNodes.push_back(User);
13273 continue;
13274 }
13275 // If truncates aren't free and there are users we can't
13276 // extend, it isn't worthwhile.
13277 if (!isTruncFree)
13278 return false;
13279 // Remember if this value is live-out.
13280 if (User->getOpcode() == ISD::CopyToReg)
13281 HasCopyToRegUses = true;
13282 }
13283
13284 if (HasCopyToRegUses) {
13285 bool BothLiveOut = false;
13286 for (SDUse &Use : N->uses()) {
13287 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13288 BothLiveOut = true;
13289 break;
13290 }
13291 }
13292 if (BothLiveOut)
13293 // Both unextended and extended values are live out. There had better be
13294 // a good reason for the transformation.
13295 return !ExtendNodes.empty();
13296 }
13297 return true;
13298}
13299
13300void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13301 SDValue OrigLoad, SDValue ExtLoad,
13302 ISD::NodeType ExtType) {
13303 // Extend SetCC uses if necessary.
13304 SDLoc DL(ExtLoad);
13305 for (SDNode *SetCC : SetCCs) {
13307
13308 for (unsigned j = 0; j != 2; ++j) {
13309 SDValue SOp = SetCC->getOperand(j);
13310 if (SOp == OrigLoad)
13311 Ops.push_back(ExtLoad);
13312 else
13313 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13314 }
13315
13316 Ops.push_back(SetCC->getOperand(2));
13317 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13318 }
13319}
13320
13321// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13322SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13323 SDValue N0 = N->getOperand(0);
13324 EVT DstVT = N->getValueType(0);
13325 EVT SrcVT = N0.getValueType();
13326
13327 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13328 N->getOpcode() == ISD::ZERO_EXTEND) &&
13329 "Unexpected node type (not an extend)!");
13330
13331 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13332 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13333 // (v8i32 (sext (v8i16 (load x))))
13334 // into:
13335 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13336 // (v4i32 (sextload (x + 16)))))
13337 // Where uses of the original load, i.e.:
13338 // (v8i16 (load x))
13339 // are replaced with:
13340 // (v8i16 (truncate
13341 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13342 // (v4i32 (sextload (x + 16)))))))
13343 //
13344 // This combine is only applicable to illegal, but splittable, vectors.
13345 // All legal types, and illegal non-vector types, are handled elsewhere.
13346 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13347 //
13348 if (N0->getOpcode() != ISD::LOAD)
13349 return SDValue();
13350
13351 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13352
13353 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13354 !N0.hasOneUse() || !LN0->isSimple() ||
13355 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13357 return SDValue();
13358
13360 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13361 return SDValue();
13362
13363 ISD::LoadExtType ExtType =
13364 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13365
13366 // Try to split the vector types to get down to legal types.
13367 EVT SplitSrcVT = SrcVT;
13368 EVT SplitDstVT = DstVT;
13369 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13370 SplitSrcVT.getVectorNumElements() > 1) {
13371 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13372 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13373 }
13374
13375 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13376 return SDValue();
13377
13378 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13379
13380 SDLoc DL(N);
13381 const unsigned NumSplits =
13382 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13383 const unsigned Stride = SplitSrcVT.getStoreSize();
13386
13387 SDValue BasePtr = LN0->getBasePtr();
13388 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13389 const unsigned Offset = Idx * Stride;
13390
13391 SDValue SplitLoad =
13392 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13393 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13394 SplitSrcVT, LN0->getOriginalAlign(),
13395 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13396
13397 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13398
13399 Loads.push_back(SplitLoad.getValue(0));
13400 Chains.push_back(SplitLoad.getValue(1));
13401 }
13402
13403 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13404 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13405
13406 // Simplify TF.
13407 AddToWorklist(NewChain.getNode());
13408
13409 CombineTo(N, NewValue);
13410
13411 // Replace uses of the original load (before extension)
13412 // with a truncate of the concatenated sextloaded vectors.
13413 SDValue Trunc =
13414 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13415 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13416 CombineTo(N0.getNode(), Trunc, NewChain);
13417 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13418}
13419
13420// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13421// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13422SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13423 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13424 EVT VT = N->getValueType(0);
13425 EVT OrigVT = N->getOperand(0).getValueType();
13426 if (TLI.isZExtFree(OrigVT, VT))
13427 return SDValue();
13428
13429 // and/or/xor
13430 SDValue N0 = N->getOperand(0);
13431 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13432 N0.getOperand(1).getOpcode() != ISD::Constant ||
13433 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13434 return SDValue();
13435
13436 // shl/shr
13437 SDValue N1 = N0->getOperand(0);
13438 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13439 N1.getOperand(1).getOpcode() != ISD::Constant ||
13440 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13441 return SDValue();
13442
13443 // load
13444 if (!isa<LoadSDNode>(N1.getOperand(0)))
13445 return SDValue();
13446 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13447 EVT MemVT = Load->getMemoryVT();
13448 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13449 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13450 return SDValue();
13451
13452
13453 // If the shift op is SHL, the logic op must be AND, otherwise the result
13454 // will be wrong.
13455 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13456 return SDValue();
13457
13458 if (!N0.hasOneUse() || !N1.hasOneUse())
13459 return SDValue();
13460
13462 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13463 ISD::ZERO_EXTEND, SetCCs, TLI))
13464 return SDValue();
13465
13466 // Actually do the transformation.
13467 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13468 Load->getChain(), Load->getBasePtr(),
13469 Load->getMemoryVT(), Load->getMemOperand());
13470
13471 SDLoc DL1(N1);
13472 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13473 N1.getOperand(1));
13474
13476 SDLoc DL0(N0);
13477 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13478 DAG.getConstant(Mask, DL0, VT));
13479
13480 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13481 CombineTo(N, And);
13482 if (SDValue(Load, 0).hasOneUse()) {
13483 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13484 } else {
13485 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13486 Load->getValueType(0), ExtLoad);
13487 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13488 }
13489
13490 // N0 is dead at this point.
13491 recursivelyDeleteUnusedNodes(N0.getNode());
13492
13493 return SDValue(N,0); // Return N so it doesn't get rechecked!
13494}
13495
13496/// If we're narrowing or widening the result of a vector select and the final
13497/// size is the same size as a setcc (compare) feeding the select, then try to
13498/// apply the cast operation to the select's operands because matching vector
13499/// sizes for a select condition and other operands should be more efficient.
13500SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13501 unsigned CastOpcode = Cast->getOpcode();
13502 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13503 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13504 CastOpcode == ISD::FP_ROUND) &&
13505 "Unexpected opcode for vector select narrowing/widening");
13506
13507 // We only do this transform before legal ops because the pattern may be
13508 // obfuscated by target-specific operations after legalization. Do not create
13509 // an illegal select op, however, because that may be difficult to lower.
13510 EVT VT = Cast->getValueType(0);
13511 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13512 return SDValue();
13513
13514 SDValue VSel = Cast->getOperand(0);
13515 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13516 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13517 return SDValue();
13518
13519 // Does the setcc have the same vector size as the casted select?
13520 SDValue SetCC = VSel.getOperand(0);
13521 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13522 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13523 return SDValue();
13524
13525 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13526 SDValue A = VSel.getOperand(1);
13527 SDValue B = VSel.getOperand(2);
13528 SDValue CastA, CastB;
13529 SDLoc DL(Cast);
13530 if (CastOpcode == ISD::FP_ROUND) {
13531 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13532 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13533 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13534 } else {
13535 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13536 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13537 }
13538 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13539}
13540
13541// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13542// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13544 const TargetLowering &TLI, EVT VT,
13545 bool LegalOperations, SDNode *N,
13546 SDValue N0, ISD::LoadExtType ExtLoadType) {
13547 SDNode *N0Node = N0.getNode();
13548 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13549 : ISD::isZEXTLoad(N0Node);
13550 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13551 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13552 return SDValue();
13553
13554 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13555 EVT MemVT = LN0->getMemoryVT();
13556 if ((LegalOperations || !LN0->isSimple() ||
13557 VT.isVector()) &&
13558 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13559 return SDValue();
13560
13561 SDValue ExtLoad =
13562 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13563 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13564 Combiner.CombineTo(N, ExtLoad);
13565 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13566 if (LN0->use_empty())
13567 Combiner.recursivelyDeleteUnusedNodes(LN0);
13568 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13569}
13570
13571// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13572// Only generate vector extloads when 1) they're legal, and 2) they are
13573// deemed desirable by the target. NonNegZExt can be set to true if a zero
13574// extend has the nonneg flag to allow use of sextload if profitable.
13576 const TargetLowering &TLI, EVT VT,
13577 bool LegalOperations, SDNode *N, SDValue N0,
13578 ISD::LoadExtType ExtLoadType,
13579 ISD::NodeType ExtOpc,
13580 bool NonNegZExt = false) {
13582 return {};
13583
13584 // If this is zext nneg, see if it would make sense to treat it as a sext.
13585 if (NonNegZExt) {
13586 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13587 "Unexpected load type or opcode");
13588 for (SDNode *User : N0->users()) {
13589 if (User->getOpcode() == ISD::SETCC) {
13590 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13592 ExtLoadType = ISD::SEXTLOAD;
13593 ExtOpc = ISD::SIGN_EXTEND;
13594 break;
13595 }
13596 }
13597 }
13598 }
13599
13600 // TODO: isFixedLengthVector() should be removed and any negative effects on
13601 // code generation being the result of that target's implementation of
13602 // isVectorLoadExtDesirable().
13603 if ((LegalOperations || VT.isFixedLengthVector() ||
13604 !cast<LoadSDNode>(N0)->isSimple()) &&
13605 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13606 return {};
13607
13608 bool DoXform = true;
13610 if (!N0.hasOneUse())
13611 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13612 if (VT.isVector())
13613 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13614 if (!DoXform)
13615 return {};
13616
13617 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13618 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13619 LN0->getBasePtr(), N0.getValueType(),
13620 LN0->getMemOperand());
13621 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13622 // If the load value is used only by N, replace it via CombineTo N.
13623 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13624 Combiner.CombineTo(N, ExtLoad);
13625 if (NoReplaceTrunc) {
13626 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13627 Combiner.recursivelyDeleteUnusedNodes(LN0);
13628 } else {
13629 SDValue Trunc =
13630 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13631 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13632 }
13633 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13634}
13635
13636static SDValue
13638 bool LegalOperations, SDNode *N, SDValue N0,
13639 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13640 if (!N0.hasOneUse())
13641 return SDValue();
13642
13643 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13644 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13645 return SDValue();
13646
13647 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13648 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13649 return SDValue();
13650
13651 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13652 return SDValue();
13653
13654 SDLoc dl(Ld);
13655 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13656 SDValue NewLoad = DAG.getMaskedLoad(
13657 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13658 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13659 ExtLoadType, Ld->isExpandingLoad());
13660 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13661 return NewLoad;
13662}
13663
13664// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13666 const TargetLowering &TLI, EVT VT,
13667 SDValue N0,
13668 ISD::LoadExtType ExtLoadType) {
13669 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13670 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13671 return {};
13672 EVT MemoryVT = ALoad->getMemoryVT();
13673 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13674 return {};
13675 // Can't fold into ALoad if it is already extending differently.
13676 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13677 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13678 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13679 return {};
13680
13681 EVT OrigVT = ALoad->getValueType(0);
13682 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13683 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13684 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13685 ALoad->getBasePtr(), ALoad->getMemOperand()));
13686 NewALoad->setExtensionType(ExtLoadType);
13688 SDValue(ALoad, 0),
13689 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13690 // Update the chain uses.
13691 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13692 return SDValue(NewALoad, 0);
13693}
13694
13696 bool LegalOperations) {
13697 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13698 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13699
13700 SDValue SetCC = N->getOperand(0);
13701 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13702 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13703 return SDValue();
13704
13705 SDValue X = SetCC.getOperand(0);
13706 SDValue Ones = SetCC.getOperand(1);
13707 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13708 EVT VT = N->getValueType(0);
13709 EVT XVT = X.getValueType();
13710 // setge X, C is canonicalized to setgt, so we do not need to match that
13711 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13712 // not require the 'not' op.
13713 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13714 // Invert and smear/shift the sign bit:
13715 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13716 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13717 SDLoc DL(N);
13718 unsigned ShCt = VT.getSizeInBits() - 1;
13719 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13720 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13721 SDValue NotX = DAG.getNOT(DL, X, VT);
13722 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13723 auto ShiftOpcode =
13724 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13725 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13726 }
13727 }
13728 return SDValue();
13729}
13730
13731SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13732 SDValue N0 = N->getOperand(0);
13733 if (N0.getOpcode() != ISD::SETCC)
13734 return SDValue();
13735
13736 SDValue N00 = N0.getOperand(0);
13737 SDValue N01 = N0.getOperand(1);
13738 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13739 EVT VT = N->getValueType(0);
13740 EVT N00VT = N00.getValueType();
13741 SDLoc DL(N);
13742
13743 // Propagate fast-math-flags.
13744 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13745
13746 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13747 // the same size as the compared operands. Try to optimize sext(setcc())
13748 // if this is the case.
13749 if (VT.isVector() && !LegalOperations &&
13750 TLI.getBooleanContents(N00VT) ==
13752 EVT SVT = getSetCCResultType(N00VT);
13753
13754 // If we already have the desired type, don't change it.
13755 if (SVT != N0.getValueType()) {
13756 // We know that the # elements of the results is the same as the
13757 // # elements of the compare (and the # elements of the compare result
13758 // for that matter). Check to see that they are the same size. If so,
13759 // we know that the element size of the sext'd result matches the
13760 // element size of the compare operands.
13761 if (VT.getSizeInBits() == SVT.getSizeInBits())
13762 return DAG.getSetCC(DL, VT, N00, N01, CC);
13763
13764 // If the desired elements are smaller or larger than the source
13765 // elements, we can use a matching integer vector type and then
13766 // truncate/sign extend.
13767 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13768 if (SVT == MatchingVecType) {
13769 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13770 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13771 }
13772 }
13773
13774 // Try to eliminate the sext of a setcc by zexting the compare operands.
13775 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13777 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13778 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13779 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13780
13781 // We have an unsupported narrow vector compare op that would be legal
13782 // if extended to the destination type. See if the compare operands
13783 // can be freely extended to the destination type.
13784 auto IsFreeToExtend = [&](SDValue V) {
13785 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13786 return true;
13787 // Match a simple, non-extended load that can be converted to a
13788 // legal {z/s}ext-load.
13789 // TODO: Allow widening of an existing {z/s}ext-load?
13790 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13791 ISD::isUNINDEXEDLoad(V.getNode()) &&
13792 cast<LoadSDNode>(V)->isSimple() &&
13793 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13794 return false;
13795
13796 // Non-chain users of this value must either be the setcc in this
13797 // sequence or extends that can be folded into the new {z/s}ext-load.
13798 for (SDUse &Use : V->uses()) {
13799 // Skip uses of the chain and the setcc.
13800 SDNode *User = Use.getUser();
13801 if (Use.getResNo() != 0 || User == N0.getNode())
13802 continue;
13803 // Extra users must have exactly the same cast we are about to create.
13804 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13805 // is enhanced similarly.
13806 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13807 return false;
13808 }
13809 return true;
13810 };
13811
13812 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13813 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13814 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13815 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13816 }
13817 }
13818 }
13819
13820 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13821 // Here, T can be 1 or -1, depending on the type of the setcc and
13822 // getBooleanContents().
13823 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13824
13825 // To determine the "true" side of the select, we need to know the high bit
13826 // of the value returned by the setcc if it evaluates to true.
13827 // If the type of the setcc is i1, then the true case of the select is just
13828 // sext(i1 1), that is, -1.
13829 // If the type of the setcc is larger (say, i8) then the value of the high
13830 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13831 // of the appropriate width.
13832 SDValue ExtTrueVal = (SetCCWidth == 1)
13833 ? DAG.getAllOnesConstant(DL, VT)
13834 : DAG.getBoolConstant(true, DL, VT, N00VT);
13835 SDValue Zero = DAG.getConstant(0, DL, VT);
13836 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13837 return SCC;
13838
13839 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13840 EVT SetCCVT = getSetCCResultType(N00VT);
13841 // Don't do this transform for i1 because there's a select transform
13842 // that would reverse it.
13843 // TODO: We should not do this transform at all without a target hook
13844 // because a sext is likely cheaper than a select?
13845 if (SetCCVT.getScalarSizeInBits() != 1 &&
13846 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13847 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13848 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13849 }
13850 }
13851
13852 return SDValue();
13853}
13854
13855SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13856 SDValue N0 = N->getOperand(0);
13857 EVT VT = N->getValueType(0);
13858 SDLoc DL(N);
13859
13860 if (VT.isVector())
13861 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13862 return FoldedVOp;
13863
13864 // sext(undef) = 0 because the top bit will all be the same.
13865 if (N0.isUndef())
13866 return DAG.getConstant(0, DL, VT);
13867
13868 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13869 return Res;
13870
13871 // fold (sext (sext x)) -> (sext x)
13872 // fold (sext (aext x)) -> (sext x)
13873 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13874 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13875
13876 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13877 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13881 N0.getOperand(0));
13882
13883 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13884 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13885 SDValue N00 = N0.getOperand(0);
13886 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13887 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13888 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13889 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13890 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13891 }
13892 }
13893
13894 if (N0.getOpcode() == ISD::TRUNCATE) {
13895 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13896 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13897 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13898 SDNode *oye = N0.getOperand(0).getNode();
13899 if (NarrowLoad.getNode() != N0.getNode()) {
13900 CombineTo(N0.getNode(), NarrowLoad);
13901 // CombineTo deleted the truncate, if needed, but not what's under it.
13902 AddToWorklist(oye);
13903 }
13904 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13905 }
13906
13907 // See if the value being truncated is already sign extended. If so, just
13908 // eliminate the trunc/sext pair.
13909 SDValue Op = N0.getOperand(0);
13910 unsigned OpBits = Op.getScalarValueSizeInBits();
13911 unsigned MidBits = N0.getScalarValueSizeInBits();
13912 unsigned DestBits = VT.getScalarSizeInBits();
13913
13914 if (N0->getFlags().hasNoSignedWrap() ||
13915 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
13916 if (OpBits == DestBits) {
13917 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13918 // bits, it is already ready.
13919 return Op;
13920 }
13921
13922 if (OpBits < DestBits) {
13923 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13924 // bits, just sext from i32.
13925 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13926 }
13927
13928 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13929 // bits, just truncate to i32.
13931 Flags.setNoSignedWrap(true);
13932 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
13933 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
13934 }
13935
13936 // fold (sext (truncate x)) -> (sextinreg x).
13937 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13938 N0.getValueType())) {
13939 if (OpBits < DestBits)
13940 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13941 else if (OpBits > DestBits)
13942 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13943 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13944 DAG.getValueType(N0.getValueType()));
13945 }
13946 }
13947
13948 // Try to simplify (sext (load x)).
13949 if (SDValue foldedExt =
13950 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13952 return foldedExt;
13953
13954 if (SDValue foldedExt =
13955 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13957 return foldedExt;
13958
13959 // fold (sext (load x)) to multiple smaller sextloads.
13960 // Only on illegal but splittable vectors.
13961 if (SDValue ExtLoad = CombineExtLoad(N))
13962 return ExtLoad;
13963
13964 // Try to simplify (sext (sextload x)).
13965 if (SDValue foldedExt = tryToFoldExtOfExtload(
13966 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13967 return foldedExt;
13968
13969 // Try to simplify (sext (atomic_load x)).
13970 if (SDValue foldedExt =
13971 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13972 return foldedExt;
13973
13974 // fold (sext (and/or/xor (load x), cst)) ->
13975 // (and/or/xor (sextload x), (sext cst))
13976 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13977 isa<LoadSDNode>(N0.getOperand(0)) &&
13978 N0.getOperand(1).getOpcode() == ISD::Constant &&
13979 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13980 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13981 EVT MemVT = LN00->getMemoryVT();
13982 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13983 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13985 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13986 ISD::SIGN_EXTEND, SetCCs, TLI);
13987 if (DoXform) {
13988 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13989 LN00->getChain(), LN00->getBasePtr(),
13990 LN00->getMemoryVT(),
13991 LN00->getMemOperand());
13993 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13994 ExtLoad, DAG.getConstant(Mask, DL, VT));
13995 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13996 bool NoReplaceTruncAnd = !N0.hasOneUse();
13997 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13998 CombineTo(N, And);
13999 // If N0 has multiple uses, change other uses as well.
14000 if (NoReplaceTruncAnd) {
14001 SDValue TruncAnd =
14003 CombineTo(N0.getNode(), TruncAnd);
14004 }
14005 if (NoReplaceTrunc) {
14006 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14007 } else {
14008 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14009 LN00->getValueType(0), ExtLoad);
14010 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14011 }
14012 return SDValue(N,0); // Return N so it doesn't get rechecked!
14013 }
14014 }
14015 }
14016
14017 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14018 return V;
14019
14020 if (SDValue V = foldSextSetcc(N))
14021 return V;
14022
14023 // fold (sext x) -> (zext x) if the sign bit is known zero.
14024 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14025 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14026 DAG.SignBitIsZero(N0))
14027 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14028
14029 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14030 return NewVSel;
14031
14032 // Eliminate this sign extend by doing a negation in the destination type:
14033 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14034 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14038 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14039 return DAG.getNegative(Zext, DL, VT);
14040 }
14041 // Eliminate this sign extend by doing a decrement in the destination type:
14042 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14043 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14047 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14048 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14049 }
14050
14051 // fold sext (not i1 X) -> add (zext i1 X), -1
14052 // TODO: This could be extended to handle bool vectors.
14053 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14054 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14055 TLI.isOperationLegal(ISD::ADD, VT)))) {
14056 // If we can eliminate the 'not', the sext form should be better
14057 if (SDValue NewXor = visitXOR(N0.getNode())) {
14058 // Returning N0 is a form of in-visit replacement that may have
14059 // invalidated N0.
14060 if (NewXor.getNode() == N0.getNode()) {
14061 // Return SDValue here as the xor should have already been replaced in
14062 // this sext.
14063 return SDValue();
14064 }
14065
14066 // Return a new sext with the new xor.
14067 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14068 }
14069
14070 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14071 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14072 }
14073
14074 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14075 return Res;
14076
14077 return SDValue();
14078}
14079
14080/// Given an extending node with a pop-count operand, if the target does not
14081/// support a pop-count in the narrow source type but does support it in the
14082/// destination type, widen the pop-count to the destination type.
14083static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14084 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14085 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14086 "Expected extend op");
14087
14088 SDValue CtPop = Extend->getOperand(0);
14089 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14090 return SDValue();
14091
14092 EVT VT = Extend->getValueType(0);
14093 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14096 return SDValue();
14097
14098 // zext (ctpop X) --> ctpop (zext X)
14099 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14100 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14101}
14102
14103// If we have (zext (abs X)) where X is a type that will be promoted by type
14104// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14105static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14106 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14107
14108 EVT VT = Extend->getValueType(0);
14109 if (VT.isVector())
14110 return SDValue();
14111
14112 SDValue Abs = Extend->getOperand(0);
14113 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14114 return SDValue();
14115
14116 EVT AbsVT = Abs.getValueType();
14117 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14118 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14120 return SDValue();
14121
14122 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14123
14124 SDValue SExt =
14125 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14126 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14127 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14128}
14129
14130SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14131 SDValue N0 = N->getOperand(0);
14132 EVT VT = N->getValueType(0);
14133 SDLoc DL(N);
14134
14135 if (VT.isVector())
14136 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14137 return FoldedVOp;
14138
14139 // zext(undef) = 0
14140 if (N0.isUndef())
14141 return DAG.getConstant(0, DL, VT);
14142
14143 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14144 return Res;
14145
14146 // fold (zext (zext x)) -> (zext x)
14147 // fold (zext (aext x)) -> (zext x)
14148 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14150 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14151 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14152 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14153 }
14154
14155 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14156 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14159 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14160
14161 // fold (zext (truncate x)) -> (zext x) or
14162 // (zext (truncate x)) -> (truncate x)
14163 // This is valid when the truncated bits of x are already zero.
14164 SDValue Op;
14165 KnownBits Known;
14166 if (isTruncateOf(DAG, N0, Op, Known)) {
14167 APInt TruncatedBits =
14168 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14169 APInt(Op.getScalarValueSizeInBits(), 0) :
14170 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14172 std::min(Op.getScalarValueSizeInBits(),
14173 VT.getScalarSizeInBits()));
14174 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14175 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14176 DAG.salvageDebugInfo(*N0.getNode());
14177
14178 return ZExtOrTrunc;
14179 }
14180 }
14181
14182 // fold (zext (truncate x)) -> (and x, mask)
14183 if (N0.getOpcode() == ISD::TRUNCATE) {
14184 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14185 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14186 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14187 SDNode *oye = N0.getOperand(0).getNode();
14188 if (NarrowLoad.getNode() != N0.getNode()) {
14189 CombineTo(N0.getNode(), NarrowLoad);
14190 // CombineTo deleted the truncate, if needed, but not what's under it.
14191 AddToWorklist(oye);
14192 }
14193 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14194 }
14195
14196 EVT SrcVT = N0.getOperand(0).getValueType();
14197 EVT MinVT = N0.getValueType();
14198
14199 if (N->getFlags().hasNonNeg()) {
14200 SDValue Op = N0.getOperand(0);
14201 unsigned OpBits = SrcVT.getScalarSizeInBits();
14202 unsigned MidBits = MinVT.getScalarSizeInBits();
14203 unsigned DestBits = VT.getScalarSizeInBits();
14204
14205 if (N0->getFlags().hasNoSignedWrap() ||
14206 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14207 if (OpBits == DestBits) {
14208 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14209 // bits, it is already ready.
14210 return Op;
14211 }
14212
14213 if (OpBits < DestBits) {
14214 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14215 // bits, just sext from i32.
14216 // FIXME: This can probably be ZERO_EXTEND nneg?
14217 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14218 }
14219
14220 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14221 // bits, just truncate to i32.
14223 Flags.setNoSignedWrap(true);
14224 Flags.setNoUnsignedWrap(true);
14225 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14226 }
14227 }
14228
14229 // Try to mask before the extension to avoid having to generate a larger mask,
14230 // possibly over several sub-vectors.
14231 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14232 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14234 SDValue Op = N0.getOperand(0);
14235 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14236 AddToWorklist(Op.getNode());
14237 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14238 // Transfer the debug info; the new node is equivalent to N0.
14239 DAG.transferDbgValues(N0, ZExtOrTrunc);
14240 return ZExtOrTrunc;
14241 }
14242 }
14243
14244 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14245 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14246 AddToWorklist(Op.getNode());
14247 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14248 // We may safely transfer the debug info describing the truncate node over
14249 // to the equivalent and operation.
14250 DAG.transferDbgValues(N0, And);
14251 return And;
14252 }
14253 }
14254
14255 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14256 // if either of the casts is not free.
14257 if (N0.getOpcode() == ISD::AND &&
14258 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14259 N0.getOperand(1).getOpcode() == ISD::Constant &&
14260 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14261 !TLI.isZExtFree(N0.getValueType(), VT))) {
14262 SDValue X = N0.getOperand(0).getOperand(0);
14263 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14265 return DAG.getNode(ISD::AND, DL, VT,
14266 X, DAG.getConstant(Mask, DL, VT));
14267 }
14268
14269 // Try to simplify (zext (load x)).
14270 if (SDValue foldedExt = tryToFoldExtOfLoad(
14271 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14272 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14273 return foldedExt;
14274
14275 if (SDValue foldedExt =
14276 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14278 return foldedExt;
14279
14280 // fold (zext (load x)) to multiple smaller zextloads.
14281 // Only on illegal but splittable vectors.
14282 if (SDValue ExtLoad = CombineExtLoad(N))
14283 return ExtLoad;
14284
14285 // Try to simplify (zext (atomic_load x)).
14286 if (SDValue foldedExt =
14287 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14288 return foldedExt;
14289
14290 // fold (zext (and/or/xor (load x), cst)) ->
14291 // (and/or/xor (zextload x), (zext cst))
14292 // Unless (and (load x) cst) will match as a zextload already and has
14293 // additional users, or the zext is already free.
14294 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
14295 isa<LoadSDNode>(N0.getOperand(0)) &&
14296 N0.getOperand(1).getOpcode() == ISD::Constant &&
14297 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14298 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14299 EVT MemVT = LN00->getMemoryVT();
14300 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
14301 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14302 bool DoXform = true;
14304 if (!N0.hasOneUse()) {
14305 if (N0.getOpcode() == ISD::AND) {
14306 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14307 EVT LoadResultTy = AndC->getValueType(0);
14308 EVT ExtVT;
14309 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14310 DoXform = false;
14311 }
14312 }
14313 if (DoXform)
14314 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14315 ISD::ZERO_EXTEND, SetCCs, TLI);
14316 if (DoXform) {
14317 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14318 LN00->getChain(), LN00->getBasePtr(),
14319 LN00->getMemoryVT(),
14320 LN00->getMemOperand());
14322 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14323 ExtLoad, DAG.getConstant(Mask, DL, VT));
14324 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14325 bool NoReplaceTruncAnd = !N0.hasOneUse();
14326 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14327 CombineTo(N, And);
14328 // If N0 has multiple uses, change other uses as well.
14329 if (NoReplaceTruncAnd) {
14330 SDValue TruncAnd =
14332 CombineTo(N0.getNode(), TruncAnd);
14333 }
14334 if (NoReplaceTrunc) {
14335 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14336 } else {
14337 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14338 LN00->getValueType(0), ExtLoad);
14339 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14340 }
14341 return SDValue(N,0); // Return N so it doesn't get rechecked!
14342 }
14343 }
14344 }
14345
14346 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14347 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14348 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14349 return ZExtLoad;
14350
14351 // Try to simplify (zext (zextload x)).
14352 if (SDValue foldedExt = tryToFoldExtOfExtload(
14353 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14354 return foldedExt;
14355
14356 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14357 return V;
14358
14359 if (N0.getOpcode() == ISD::SETCC) {
14360 // Propagate fast-math-flags.
14361 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14362
14363 // Only do this before legalize for now.
14364 if (!LegalOperations && VT.isVector() &&
14365 N0.getValueType().getVectorElementType() == MVT::i1) {
14366 EVT N00VT = N0.getOperand(0).getValueType();
14367 if (getSetCCResultType(N00VT) == N0.getValueType())
14368 return SDValue();
14369
14370 // We know that the # elements of the results is the same as the #
14371 // elements of the compare (and the # elements of the compare result for
14372 // that matter). Check to see that they are the same size. If so, we know
14373 // that the element size of the sext'd result matches the element size of
14374 // the compare operands.
14375 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14376 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14377 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14378 N0.getOperand(1), N0.getOperand(2));
14379 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14380 }
14381
14382 // If the desired elements are smaller or larger than the source
14383 // elements we can use a matching integer vector type and then
14384 // truncate/any extend followed by zext_in_reg.
14385 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14386 SDValue VsetCC =
14387 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14388 N0.getOperand(1), N0.getOperand(2));
14389 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14390 N0.getValueType());
14391 }
14392
14393 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14394 EVT N0VT = N0.getValueType();
14395 EVT N00VT = N0.getOperand(0).getValueType();
14396 if (SDValue SCC = SimplifySelectCC(
14397 DL, N0.getOperand(0), N0.getOperand(1),
14398 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14399 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14400 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14401 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14402 }
14403
14404 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14405 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14406 !TLI.isZExtFree(N0, VT)) {
14407 SDValue ShVal = N0.getOperand(0);
14408 SDValue ShAmt = N0.getOperand(1);
14409 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14410 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14411 if (N0.getOpcode() == ISD::SHL) {
14412 // If the original shl may be shifting out bits, do not perform this
14413 // transformation.
14414 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14415 ShVal.getOperand(0).getValueSizeInBits();
14416 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14417 // If the shift is too large, then see if we can deduce that the
14418 // shift is safe anyway.
14419
14420 // Check if the bits being shifted out are known to be zero.
14421 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
14422 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
14423 return SDValue();
14424 }
14425 }
14426
14427 // Ensure that the shift amount is wide enough for the shifted value.
14428 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14429 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14430
14431 return DAG.getNode(N0.getOpcode(), DL, VT,
14432 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14433 }
14434 }
14435 }
14436
14437 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14438 return NewVSel;
14439
14440 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14441 return NewCtPop;
14442
14443 if (SDValue V = widenAbs(N, DAG))
14444 return V;
14445
14446 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14447 return Res;
14448
14449 // CSE zext nneg with sext if the zext is not free.
14450 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14451 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14452 if (CSENode)
14453 return SDValue(CSENode, 0);
14454 }
14455
14456 return SDValue();
14457}
14458
14459SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14460 SDValue N0 = N->getOperand(0);
14461 EVT VT = N->getValueType(0);
14462 SDLoc DL(N);
14463
14464 // aext(undef) = undef
14465 if (N0.isUndef())
14466 return DAG.getUNDEF(VT);
14467
14468 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14469 return Res;
14470
14471 // fold (aext (aext x)) -> (aext x)
14472 // fold (aext (zext x)) -> (zext x)
14473 // fold (aext (sext x)) -> (sext x)
14474 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14475 N0.getOpcode() == ISD::SIGN_EXTEND) {
14477 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14478 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14479 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14480 }
14481
14482 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14483 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14484 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14488 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14489
14490 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14491 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14492 if (N0.getOpcode() == ISD::TRUNCATE) {
14493 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14494 SDNode *oye = N0.getOperand(0).getNode();
14495 if (NarrowLoad.getNode() != N0.getNode()) {
14496 CombineTo(N0.getNode(), NarrowLoad);
14497 // CombineTo deleted the truncate, if needed, but not what's under it.
14498 AddToWorklist(oye);
14499 }
14500 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14501 }
14502 }
14503
14504 // fold (aext (truncate x))
14505 if (N0.getOpcode() == ISD::TRUNCATE)
14506 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14507
14508 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14509 // if the trunc is not free.
14510 if (N0.getOpcode() == ISD::AND &&
14511 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14512 N0.getOperand(1).getOpcode() == ISD::Constant &&
14513 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14514 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14515 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14516 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14517 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14518 }
14519
14520 // fold (aext (load x)) -> (aext (truncate (extload x)))
14521 // None of the supported targets knows how to perform load and any_ext
14522 // on vectors in one instruction, so attempt to fold to zext instead.
14523 if (VT.isVector()) {
14524 // Try to simplify (zext (load x)).
14525 if (SDValue foldedExt =
14526 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14528 return foldedExt;
14529 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14531 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14532 bool DoXform = true;
14534 if (!N0.hasOneUse())
14535 DoXform =
14536 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14537 if (DoXform) {
14538 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14539 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14540 LN0->getBasePtr(), N0.getValueType(),
14541 LN0->getMemOperand());
14542 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14543 // If the load value is used only by N, replace it via CombineTo N.
14544 bool NoReplaceTrunc = N0.hasOneUse();
14545 CombineTo(N, ExtLoad);
14546 if (NoReplaceTrunc) {
14547 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14548 recursivelyDeleteUnusedNodes(LN0);
14549 } else {
14550 SDValue Trunc =
14551 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14552 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14553 }
14554 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14555 }
14556 }
14557
14558 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14559 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14560 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14561 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14562 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14563 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14564 ISD::LoadExtType ExtType = LN0->getExtensionType();
14565 EVT MemVT = LN0->getMemoryVT();
14566 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14567 SDValue ExtLoad =
14568 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14569 MemVT, LN0->getMemOperand());
14570 CombineTo(N, ExtLoad);
14571 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14572 recursivelyDeleteUnusedNodes(LN0);
14573 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14574 }
14575 }
14576
14577 if (N0.getOpcode() == ISD::SETCC) {
14578 // Propagate fast-math-flags.
14579 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14580
14581 // For vectors:
14582 // aext(setcc) -> vsetcc
14583 // aext(setcc) -> truncate(vsetcc)
14584 // aext(setcc) -> aext(vsetcc)
14585 // Only do this before legalize for now.
14586 if (VT.isVector() && !LegalOperations) {
14587 EVT N00VT = N0.getOperand(0).getValueType();
14588 if (getSetCCResultType(N00VT) == N0.getValueType())
14589 return SDValue();
14590
14591 // We know that the # elements of the results is the same as the
14592 // # elements of the compare (and the # elements of the compare result
14593 // for that matter). Check to see that they are the same size. If so,
14594 // we know that the element size of the sext'd result matches the
14595 // element size of the compare operands.
14596 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14597 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14598 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14599
14600 // If the desired elements are smaller or larger than the source
14601 // elements we can use a matching integer vector type and then
14602 // truncate/any extend
14603 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14604 SDValue VsetCC = DAG.getSetCC(
14605 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14606 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14607 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14608 }
14609
14610 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14611 if (SDValue SCC = SimplifySelectCC(
14612 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14613 DAG.getConstant(0, DL, VT),
14614 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14615 return SCC;
14616 }
14617
14618 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14619 return NewCtPop;
14620
14621 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14622 return Res;
14623
14624 return SDValue();
14625}
14626
14627SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14628 unsigned Opcode = N->getOpcode();
14629 SDValue N0 = N->getOperand(0);
14630 SDValue N1 = N->getOperand(1);
14631 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14632
14633 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14634 if (N0.getOpcode() == Opcode &&
14635 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14636 return N0;
14637
14638 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14639 N0.getOperand(0).getOpcode() == Opcode) {
14640 // We have an assert, truncate, assert sandwich. Make one stronger assert
14641 // by asserting on the smallest asserted type to the larger source type.
14642 // This eliminates the later assert:
14643 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14644 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14645 SDLoc DL(N);
14646 SDValue BigA = N0.getOperand(0);
14647 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14648 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14649 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14650 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14651 BigA.getOperand(0), MinAssertVTVal);
14652 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14653 }
14654
14655 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14656 // than X. Just move the AssertZext in front of the truncate and drop the
14657 // AssertSExt.
14658 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14660 Opcode == ISD::AssertZext) {
14661 SDValue BigA = N0.getOperand(0);
14662 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14663 if (AssertVT.bitsLT(BigA_AssertVT)) {
14664 SDLoc DL(N);
14665 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14666 BigA.getOperand(0), N1);
14667 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14668 }
14669 }
14670
14671 return SDValue();
14672}
14673
14674SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14675 SDLoc DL(N);
14676
14677 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14678 SDValue N0 = N->getOperand(0);
14679
14680 // Fold (assertalign (assertalign x, AL0), AL1) ->
14681 // (assertalign x, max(AL0, AL1))
14682 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14683 return DAG.getAssertAlign(DL, N0.getOperand(0),
14684 std::max(AL, AAN->getAlign()));
14685
14686 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14687 // this assert down to source operands so that those arithmetic ops could be
14688 // exposed to the DAG combining.
14689 switch (N0.getOpcode()) {
14690 default:
14691 break;
14692 case ISD::ADD:
14693 case ISD::SUB: {
14694 unsigned AlignShift = Log2(AL);
14695 SDValue LHS = N0.getOperand(0);
14696 SDValue RHS = N0.getOperand(1);
14697 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14698 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14699 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14700 if (LHSAlignShift < AlignShift)
14701 LHS = DAG.getAssertAlign(DL, LHS, AL);
14702 if (RHSAlignShift < AlignShift)
14703 RHS = DAG.getAssertAlign(DL, RHS, AL);
14704 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14705 }
14706 break;
14707 }
14708 }
14709
14710 return SDValue();
14711}
14712
14713/// If the result of a load is shifted/masked/truncated to an effectively
14714/// narrower type, try to transform the load to a narrower type and/or
14715/// use an extending load.
14716SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14717 unsigned Opc = N->getOpcode();
14718
14720 SDValue N0 = N->getOperand(0);
14721 EVT VT = N->getValueType(0);
14722 EVT ExtVT = VT;
14723
14724 // This transformation isn't valid for vector loads.
14725 if (VT.isVector())
14726 return SDValue();
14727
14728 // The ShAmt variable is used to indicate that we've consumed a right
14729 // shift. I.e. we want to narrow the width of the load by skipping to load the
14730 // ShAmt least significant bits.
14731 unsigned ShAmt = 0;
14732 // A special case is when the least significant bits from the load are masked
14733 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14734 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14735 // the result.
14736 unsigned ShiftedOffset = 0;
14737 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14738 // extended to VT.
14739 if (Opc == ISD::SIGN_EXTEND_INREG) {
14740 ExtType = ISD::SEXTLOAD;
14741 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14742 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14743 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14744 // value, or it may be shifting a higher subword, half or byte into the
14745 // lowest bits.
14746
14747 // Only handle shift with constant shift amount, and the shiftee must be a
14748 // load.
14749 auto *LN = dyn_cast<LoadSDNode>(N0);
14750 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14751 if (!N1C || !LN)
14752 return SDValue();
14753 // If the shift amount is larger than the memory type then we're not
14754 // accessing any of the loaded bytes.
14755 ShAmt = N1C->getZExtValue();
14756 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14757 if (MemoryWidth <= ShAmt)
14758 return SDValue();
14759 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14760 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14761 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14762 // If original load is a SEXTLOAD then we can't simply replace it by a
14763 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14764 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14765 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14766 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14767 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14768 LN->getExtensionType() != ExtType)
14769 return SDValue();
14770 } else if (Opc == ISD::AND) {
14771 // An AND with a constant mask is the same as a truncate + zero-extend.
14772 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14773 if (!AndC)
14774 return SDValue();
14775
14776 const APInt &Mask = AndC->getAPIntValue();
14777 unsigned ActiveBits = 0;
14778 if (Mask.isMask()) {
14779 ActiveBits = Mask.countr_one();
14780 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14781 ShiftedOffset = ShAmt;
14782 } else {
14783 return SDValue();
14784 }
14785
14786 ExtType = ISD::ZEXTLOAD;
14787 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14788 }
14789
14790 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14791 // a right shift. Here we redo some of those checks, to possibly adjust the
14792 // ExtVT even further based on "a masking AND". We could also end up here for
14793 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14794 // need to be done here as well.
14795 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14796 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14797 // Bail out when the SRL has more than one use. This is done for historical
14798 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14799 // check below? And maybe it could be non-profitable to do the transform in
14800 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14801 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14802 if (!SRL.hasOneUse())
14803 return SDValue();
14804
14805 // Only handle shift with constant shift amount, and the shiftee must be a
14806 // load.
14807 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14808 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14809 if (!SRL1C || !LN)
14810 return SDValue();
14811
14812 // If the shift amount is larger than the input type then we're not
14813 // accessing any of the loaded bytes. If the load was a zextload/extload
14814 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14815 ShAmt = SRL1C->getZExtValue();
14816 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14817 if (ShAmt >= MemoryWidth)
14818 return SDValue();
14819
14820 // Because a SRL must be assumed to *need* to zero-extend the high bits
14821 // (as opposed to anyext the high bits), we can't combine the zextload
14822 // lowering of SRL and an sextload.
14823 if (LN->getExtensionType() == ISD::SEXTLOAD)
14824 return SDValue();
14825
14826 // Avoid reading outside the memory accessed by the original load (could
14827 // happened if we only adjust the load base pointer by ShAmt). Instead we
14828 // try to narrow the load even further. The typical scenario here is:
14829 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14830 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14831 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14832 // Don't replace sextload by zextload.
14833 if (ExtType == ISD::SEXTLOAD)
14834 return SDValue();
14835 // Narrow the load.
14836 ExtType = ISD::ZEXTLOAD;
14837 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14838 }
14839
14840 // If the SRL is only used by a masking AND, we may be able to adjust
14841 // the ExtVT to make the AND redundant.
14842 SDNode *Mask = *(SRL->user_begin());
14843 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14844 isa<ConstantSDNode>(Mask->getOperand(1))) {
14845 unsigned Offset, ActiveBits;
14846 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14847 if (ShiftMask.isMask()) {
14848 EVT MaskedVT =
14849 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14850 // If the mask is smaller, recompute the type.
14851 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14852 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14853 ExtVT = MaskedVT;
14854 } else if (ExtType == ISD::ZEXTLOAD &&
14855 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14856 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14857 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14858 // If the mask is shifted we can use a narrower load and a shl to insert
14859 // the trailing zeros.
14860 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14861 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14862 ExtVT = MaskedVT;
14863 ShAmt = Offset + ShAmt;
14864 ShiftedOffset = Offset;
14865 }
14866 }
14867 }
14868
14869 N0 = SRL.getOperand(0);
14870 }
14871
14872 // If the load is shifted left (and the result isn't shifted back right), we
14873 // can fold a truncate through the shift. The typical scenario is that N
14874 // points at a TRUNCATE here so the attempted fold is:
14875 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14876 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14877 unsigned ShLeftAmt = 0;
14878 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14879 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
14880 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14881 ShLeftAmt = N01->getZExtValue();
14882 N0 = N0.getOperand(0);
14883 }
14884 }
14885
14886 // If we haven't found a load, we can't narrow it.
14887 if (!isa<LoadSDNode>(N0))
14888 return SDValue();
14889
14890 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14891 // Reducing the width of a volatile load is illegal. For atomics, we may be
14892 // able to reduce the width provided we never widen again. (see D66309)
14893 if (!LN0->isSimple() ||
14894 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14895 return SDValue();
14896
14897 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14898 unsigned LVTStoreBits =
14900 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14901 return LVTStoreBits - EVTStoreBits - ShAmt;
14902 };
14903
14904 // We need to adjust the pointer to the load by ShAmt bits in order to load
14905 // the correct bytes.
14906 unsigned PtrAdjustmentInBits =
14907 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14908
14909 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14910 SDLoc DL(LN0);
14911 // The original load itself didn't wrap, so an offset within it doesn't.
14912 SDValue NewPtr =
14915 AddToWorklist(NewPtr.getNode());
14916
14917 SDValue Load;
14918 if (ExtType == ISD::NON_EXTLOAD)
14919 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14920 LN0->getPointerInfo().getWithOffset(PtrOff),
14921 LN0->getOriginalAlign(),
14922 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14923 else
14924 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14925 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14926 LN0->getOriginalAlign(),
14927 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14928
14929 // Replace the old load's chain with the new load's chain.
14930 WorklistRemover DeadNodes(*this);
14931 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14932
14933 // Shift the result left, if we've swallowed a left shift.
14935 if (ShLeftAmt != 0) {
14936 // If the shift amount is as large as the result size (but, presumably,
14937 // no larger than the source) then the useful bits of the result are
14938 // zero; we can't simply return the shortened shift, because the result
14939 // of that operation is undefined.
14940 if (ShLeftAmt >= VT.getScalarSizeInBits())
14941 Result = DAG.getConstant(0, DL, VT);
14942 else
14943 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
14944 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
14945 }
14946
14947 if (ShiftedOffset != 0) {
14948 // We're using a shifted mask, so the load now has an offset. This means
14949 // that data has been loaded into the lower bytes than it would have been
14950 // before, so we need to shl the loaded data into the correct position in the
14951 // register.
14952 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14953 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14954 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14955 }
14956
14957 // Return the new loaded value.
14958 return Result;
14959}
14960
14961SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14962 SDValue N0 = N->getOperand(0);
14963 SDValue N1 = N->getOperand(1);
14964 EVT VT = N->getValueType(0);
14965 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14966 unsigned VTBits = VT.getScalarSizeInBits();
14967 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14968 SDLoc DL(N);
14969
14970 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14971 if (N0.isUndef())
14972 return DAG.getConstant(0, DL, VT);
14973
14974 // fold (sext_in_reg c1) -> c1
14975 if (SDValue C =
14977 return C;
14978
14979 // If the input is already sign extended, just drop the extension.
14980 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14981 return N0;
14982
14983 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14984 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14985 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14986 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
14987
14988 // fold (sext_in_reg (sext x)) -> (sext x)
14989 // fold (sext_in_reg (aext x)) -> (sext x)
14990 // if x is small enough or if we know that x has more than 1 sign bit and the
14991 // sign_extend_inreg is extending from one of them.
14992 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14993 SDValue N00 = N0.getOperand(0);
14994 unsigned N00Bits = N00.getScalarValueSizeInBits();
14995 if ((N00Bits <= ExtVTBits ||
14996 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14997 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14998 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
14999 }
15000
15001 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15002 // if x is small enough or if we know that x has more than 1 sign bit and the
15003 // sign_extend_inreg is extending from one of them.
15005 SDValue N00 = N0.getOperand(0);
15006 unsigned N00Bits = N00.getScalarValueSizeInBits();
15007 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
15008 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
15009 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15010 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
15011 if ((N00Bits == ExtVTBits ||
15012 (!IsZext && (N00Bits < ExtVTBits ||
15013 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15014 (!LegalOperations ||
15016 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15017 }
15018
15019 // fold (sext_in_reg (zext x)) -> (sext x)
15020 // iff we are extending the source sign bit.
15021 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15022 SDValue N00 = N0.getOperand(0);
15023 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15024 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15025 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15026 }
15027
15028 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15029 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15030 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15031
15032 // fold operands of sext_in_reg based on knowledge that the top bits are not
15033 // demanded.
15035 return SDValue(N, 0);
15036
15037 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15038 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15039 if (SDValue NarrowLoad = reduceLoadWidth(N))
15040 return NarrowLoad;
15041
15042 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15043 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15044 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15045 if (N0.getOpcode() == ISD::SRL) {
15046 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15047 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15048 // We can turn this into an SRA iff the input to the SRL is already sign
15049 // extended enough.
15050 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15051 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15052 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15053 N0.getOperand(1));
15054 }
15055 }
15056
15057 // fold (sext_inreg (extload x)) -> (sextload x)
15058 // If sextload is not supported by target, we can only do the combine when
15059 // load has one use. Doing otherwise can block folding the extload with other
15060 // extends that the target does support.
15062 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15063 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15064 N0.hasOneUse()) ||
15065 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15066 auto *LN0 = cast<LoadSDNode>(N0);
15067 SDValue ExtLoad =
15068 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15069 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15070 CombineTo(N, ExtLoad);
15071 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15072 AddToWorklist(ExtLoad.getNode());
15073 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15074 }
15075
15076 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15078 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15079 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15080 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15081 auto *LN0 = cast<LoadSDNode>(N0);
15082 SDValue ExtLoad =
15083 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15084 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15085 CombineTo(N, ExtLoad);
15086 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15087 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15088 }
15089
15090 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15091 // ignore it if the masked load is already sign extended
15092 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15093 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15094 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15095 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15096 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15097 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15098 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15099 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15100 CombineTo(N, ExtMaskedLoad);
15101 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15102 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15103 }
15104 }
15105
15106 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15107 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15108 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15110 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15111 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15112
15113 SDValue ExtLoad = DAG.getMaskedGather(
15114 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15115 GN0->getIndexType(), ISD::SEXTLOAD);
15116
15117 CombineTo(N, ExtLoad);
15118 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15119 AddToWorklist(ExtLoad.getNode());
15120 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15121 }
15122 }
15123
15124 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15125 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15126 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15127 N0.getOperand(1), false))
15128 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15129 }
15130
15131 // Fold (iM_signext_inreg
15132 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15133 // from iN)
15134 // -> (extract_subvector (signext iN_v to iM))
15135 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15137 SDValue InnerExt = N0.getOperand(0);
15138 EVT InnerExtVT = InnerExt->getValueType(0);
15139 SDValue Extendee = InnerExt->getOperand(0);
15140
15141 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15142 (!LegalOperations ||
15143 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15144 SDValue SignExtExtendee =
15145 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15146 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15147 N0.getOperand(1));
15148 }
15149 }
15150
15151 return SDValue();
15152}
15153
15155 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15156 bool LegalOperations) {
15157 unsigned InregOpcode = N->getOpcode();
15158 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15159
15160 SDValue Src = N->getOperand(0);
15161 EVT VT = N->getValueType(0);
15162 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15163 Src.getValueType().getVectorElementType(),
15165
15166 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15167 "Expected EXTEND_VECTOR_INREG dag node in input!");
15168
15169 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15170 // FIXME: one-use check may be overly restrictive
15171 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15172 return SDValue();
15173
15174 // Profitability check: we must be extending exactly one of it's operands.
15175 // FIXME: this is probably overly restrictive.
15176 Src = Src.getOperand(0);
15177 if (Src.getValueType() != SrcVT)
15178 return SDValue();
15179
15180 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15181 return SDValue();
15182
15183 return DAG.getNode(Opcode, DL, VT, Src);
15184}
15185
15186SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15187 SDValue N0 = N->getOperand(0);
15188 EVT VT = N->getValueType(0);
15189 SDLoc DL(N);
15190
15191 if (N0.isUndef()) {
15192 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15193 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15194 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15195 ? DAG.getUNDEF(VT)
15196 : DAG.getConstant(0, DL, VT);
15197 }
15198
15199 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15200 return Res;
15201
15203 return SDValue(N, 0);
15204
15206 LegalOperations))
15207 return R;
15208
15209 return SDValue();
15210}
15211
15212SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15213 EVT VT = N->getValueType(0);
15214 SDValue N0 = N->getOperand(0);
15215
15216 SDValue FPVal;
15217 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15219 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15220 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15221 DAG.getValueType(VT.getScalarType()));
15222
15223 return SDValue();
15224}
15225
15226/// Detect patterns of truncation with unsigned saturation:
15227///
15228/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15229/// Return the source value x to be truncated or SDValue() if the pattern was
15230/// not matched.
15231///
15233 unsigned NumDstBits = VT.getScalarSizeInBits();
15234 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15235 // Saturation with truncation. We truncate from InVT to VT.
15236 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15237
15238 SDValue Min;
15239 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15240 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15241 return Min;
15242
15243 return SDValue();
15244}
15245
15246/// Detect patterns of truncation with signed saturation:
15247/// (truncate (smin (smax (x, signed_min_of_dest_type),
15248/// signed_max_of_dest_type)) to dest_type)
15249/// or:
15250/// (truncate (smax (smin (x, signed_max_of_dest_type),
15251/// signed_min_of_dest_type)) to dest_type).
15252///
15253/// Return the source value to be truncated or SDValue() if the pattern was not
15254/// matched.
15256 unsigned NumDstBits = VT.getScalarSizeInBits();
15257 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15258 // Saturation with truncation. We truncate from InVT to VT.
15259 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15260
15261 SDValue Val;
15262 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
15263 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
15264
15265 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
15266 m_SpecificInt(SignedMax))))
15267 return Val;
15268
15269 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
15270 m_SpecificInt(SignedMin))))
15271 return Val;
15272
15273 return SDValue();
15274}
15275
15276/// Detect patterns of truncation with unsigned saturation:
15278 const SDLoc &DL) {
15279 unsigned NumDstBits = VT.getScalarSizeInBits();
15280 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15281 // Saturation with truncation. We truncate from InVT to VT.
15282 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15283
15284 SDValue Val;
15285 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15286 // Min == 0, Max is unsigned max of destination type.
15287 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
15288 m_Zero())))
15289 return Val;
15290
15291 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
15292 m_SpecificInt(UnsignedMax))))
15293 return Val;
15294
15295 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
15296 m_SpecificInt(UnsignedMax))))
15297 return Val;
15298
15299 return SDValue();
15300}
15301
15302static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
15303 SDLoc &DL, const TargetLowering &TLI,
15304 SelectionDAG &DAG) {
15305 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15306 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
15307 TLI.isTypeDesirableForOp(Opc, VT));
15308 };
15309
15310 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
15311 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
15312 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
15313 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
15314 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15315 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15316 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15317 } else if (Src.getOpcode() == ISD::UMIN) {
15318 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15319 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15320 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15321 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
15322 if (SDValue USatVal = detectUSatUPattern(Src, VT))
15323 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
15324 }
15325
15326 return SDValue();
15327}
15328
15329SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
15330 SDValue N0 = N->getOperand(0);
15331 EVT VT = N->getValueType(0);
15332 EVT SrcVT = N0.getValueType();
15333 bool isLE = DAG.getDataLayout().isLittleEndian();
15334 SDLoc DL(N);
15335
15336 // trunc(undef) = undef
15337 if (N0.isUndef())
15338 return DAG.getUNDEF(VT);
15339
15340 // fold (truncate (truncate x)) -> (truncate x)
15341 if (N0.getOpcode() == ISD::TRUNCATE)
15342 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15343
15344 // fold saturated truncate
15345 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
15346 return SaturatedTR;
15347
15348 // fold (truncate c1) -> c1
15349 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
15350 return C;
15351
15352 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
15353 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
15354 N0.getOpcode() == ISD::SIGN_EXTEND ||
15355 N0.getOpcode() == ISD::ANY_EXTEND) {
15356 // if the source is smaller than the dest, we still need an extend.
15357 if (N0.getOperand(0).getValueType().bitsLT(VT))
15358 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15359 // if the source is larger than the dest, than we just need the truncate.
15360 if (N0.getOperand(0).getValueType().bitsGT(VT))
15361 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15362 // if the source and dest are the same type, we can drop both the extend
15363 // and the truncate.
15364 return N0.getOperand(0);
15365 }
15366
15367 // Try to narrow a truncate-of-sext_in_reg to the destination type:
15368 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
15369 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15370 N0.hasOneUse()) {
15371 SDValue X = N0.getOperand(0);
15372 SDValue ExtVal = N0.getOperand(1);
15373 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
15374 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
15375 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
15376 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
15377 }
15378 }
15379
15380 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
15381 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
15382 return SDValue();
15383
15384 // Fold extract-and-trunc into a narrow extract. For example:
15385 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
15386 // i32 y = TRUNCATE(i64 x)
15387 // -- becomes --
15388 // v16i8 b = BITCAST (v2i64 val)
15389 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
15390 //
15391 // Note: We only run this optimization after type legalization (which often
15392 // creates this pattern) and before operation legalization after which
15393 // we need to be more careful about the vector instructions that we generate.
15394 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
15395 N0->hasOneUse()) {
15396 EVT TrTy = N->getValueType(0);
15397 SDValue Src = N0;
15398
15399 // Check for cases where we shift down an upper element before truncation.
15400 int EltOffset = 0;
15401 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
15402 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
15403 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
15404 Src = Src.getOperand(0);
15405 EltOffset = *ShAmt / TrTy.getSizeInBits();
15406 }
15407 }
15408 }
15409
15410 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15411 EVT VecTy = Src.getOperand(0).getValueType();
15412 EVT ExTy = Src.getValueType();
15413
15414 auto EltCnt = VecTy.getVectorElementCount();
15415 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
15416 auto NewEltCnt = EltCnt * SizeRatio;
15417
15418 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
15419 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
15420
15421 SDValue EltNo = Src->getOperand(1);
15422 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
15423 int Elt = EltNo->getAsZExtVal();
15424 int Index = isLE ? (Elt * SizeRatio + EltOffset)
15425 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
15426 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
15427 DAG.getBitcast(NVT, Src.getOperand(0)),
15428 DAG.getVectorIdxConstant(Index, DL));
15429 }
15430 }
15431 }
15432
15433 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
15434 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
15435 TLI.isTruncateFree(SrcVT, VT)) {
15436 if (!LegalOperations ||
15437 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
15438 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
15439 SDLoc SL(N0);
15440 SDValue Cond = N0.getOperand(0);
15441 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15442 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15443 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15444 }
15445 }
15446
15447 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15448 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15449 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15450 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15451 SDValue Amt = N0.getOperand(1);
15452 KnownBits Known = DAG.computeKnownBits(Amt);
15453 unsigned Size = VT.getScalarSizeInBits();
15454 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15455 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15456 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15457 if (AmtVT != Amt.getValueType()) {
15458 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15459 AddToWorklist(Amt.getNode());
15460 }
15461 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15462 }
15463 }
15464
15465 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15466 return V;
15467
15468 if (SDValue ABD = foldABSToABD(N, DL))
15469 return ABD;
15470
15471 // Attempt to pre-truncate BUILD_VECTOR sources.
15472 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15473 N0.hasOneUse() &&
15474 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15475 // Avoid creating illegal types if running after type legalizer.
15476 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15477 EVT SVT = VT.getScalarType();
15478 SmallVector<SDValue, 8> TruncOps;
15479 for (const SDValue &Op : N0->op_values()) {
15480 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15481 TruncOps.push_back(TruncOp);
15482 }
15483 return DAG.getBuildVector(VT, DL, TruncOps);
15484 }
15485
15486 // trunc (splat_vector x) -> splat_vector (trunc x)
15487 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15488 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15489 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15490 EVT SVT = VT.getScalarType();
15491 return DAG.getSplatVector(
15492 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15493 }
15494
15495 // Fold a series of buildvector, bitcast, and truncate if possible.
15496 // For example fold
15497 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15498 // (2xi32 (buildvector x, y)).
15499 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15500 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15502 N0.getOperand(0).hasOneUse()) {
15503 SDValue BuildVect = N0.getOperand(0);
15504 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15505 EVT TruncVecEltTy = VT.getVectorElementType();
15506
15507 // Check that the element types match.
15508 if (BuildVectEltTy == TruncVecEltTy) {
15509 // Now we only need to compute the offset of the truncated elements.
15510 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15511 unsigned TruncVecNumElts = VT.getVectorNumElements();
15512 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15513 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
15514
15515 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15516 "Invalid number of elements");
15517
15519 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
15520 i += TruncEltOffset)
15521 Opnds.push_back(BuildVect.getOperand(i));
15522
15523 return DAG.getBuildVector(VT, DL, Opnds);
15524 }
15525 }
15526
15527 // fold (truncate (load x)) -> (smaller load x)
15528 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15529 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15530 if (SDValue Reduced = reduceLoadWidth(N))
15531 return Reduced;
15532
15533 // Handle the case where the truncated result is at least as wide as the
15534 // loaded type.
15535 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15536 auto *LN0 = cast<LoadSDNode>(N0);
15537 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15538 SDValue NewLoad = DAG.getExtLoad(
15539 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15540 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15541 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15542 return NewLoad;
15543 }
15544 }
15545 }
15546
15547 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15548 // where ... are all 'undef'.
15549 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15551 SDValue V;
15552 unsigned Idx = 0;
15553 unsigned NumDefs = 0;
15554
15555 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15556 SDValue X = N0.getOperand(i);
15557 if (!X.isUndef()) {
15558 V = X;
15559 Idx = i;
15560 NumDefs++;
15561 }
15562 // Stop if more than one members are non-undef.
15563 if (NumDefs > 1)
15564 break;
15565
15568 X.getValueType().getVectorElementCount()));
15569 }
15570
15571 if (NumDefs == 0)
15572 return DAG.getUNDEF(VT);
15573
15574 if (NumDefs == 1) {
15575 assert(V.getNode() && "The single defined operand is empty!");
15577 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15578 if (i != Idx) {
15579 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15580 continue;
15581 }
15582 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15583 AddToWorklist(NV.getNode());
15584 Opnds.push_back(NV);
15585 }
15586 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15587 }
15588 }
15589
15590 // Fold truncate of a bitcast of a vector to an extract of the low vector
15591 // element.
15592 //
15593 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15594 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15595 SDValue VecSrc = N0.getOperand(0);
15596 EVT VecSrcVT = VecSrc.getValueType();
15597 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15598 (!LegalOperations ||
15599 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15600 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15601 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15603 }
15604 }
15605
15606 // Simplify the operands using demanded-bits information.
15608 return SDValue(N, 0);
15609
15610 // fold (truncate (extract_subvector(ext x))) ->
15611 // (extract_subvector x)
15612 // TODO: This can be generalized to cover cases where the truncate and extract
15613 // do not fully cancel each other out.
15614 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15615 SDValue N00 = N0.getOperand(0);
15616 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15617 N00.getOpcode() == ISD::ZERO_EXTEND ||
15618 N00.getOpcode() == ISD::ANY_EXTEND) {
15619 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15621 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15622 N00.getOperand(0), N0.getOperand(1));
15623 }
15624 }
15625
15626 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15627 return NewVSel;
15628
15629 // Narrow a suitable binary operation with a non-opaque constant operand by
15630 // moving it ahead of the truncate. This is limited to pre-legalization
15631 // because targets may prefer a wider type during later combines and invert
15632 // this transform.
15633 switch (N0.getOpcode()) {
15634 case ISD::ADD:
15635 case ISD::SUB:
15636 case ISD::MUL:
15637 case ISD::AND:
15638 case ISD::OR:
15639 case ISD::XOR:
15640 if (!LegalOperations && N0.hasOneUse() &&
15641 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15642 isConstantOrConstantVector(N0.getOperand(1), true))) {
15643 // TODO: We already restricted this to pre-legalization, but for vectors
15644 // we are extra cautious to not create an unsupported operation.
15645 // Target-specific changes are likely needed to avoid regressions here.
15646 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15647 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15648 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15649 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15650 }
15651 }
15652 break;
15653 case ISD::ADDE:
15654 case ISD::UADDO_CARRY:
15655 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15656 // (trunc uaddo_carry(X, Y, Carry)) ->
15657 // (uaddo_carry trunc(X), trunc(Y), Carry)
15658 // When the adde's carry is not used.
15659 // We only do for uaddo_carry before legalize operation
15660 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15661 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15662 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15663 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15664 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15665 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15666 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15667 }
15668 break;
15669 case ISD::USUBSAT:
15670 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15671 // enough to know that the upper bits are zero we must ensure that we don't
15672 // introduce an extra truncate.
15673 if (!LegalOperations && N0.hasOneUse() &&
15676 VT.getScalarSizeInBits() &&
15677 hasOperation(N0.getOpcode(), VT)) {
15678 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15679 DAG, DL);
15680 }
15681 break;
15682 }
15683
15684 return SDValue();
15685}
15686
15687static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15688 SDValue Elt = N->getOperand(i);
15689 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15690 return Elt.getNode();
15691 return Elt.getOperand(Elt.getResNo()).getNode();
15692}
15693
15694/// build_pair (load, load) -> load
15695/// if load locations are consecutive.
15696SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15697 assert(N->getOpcode() == ISD::BUILD_PAIR);
15698
15699 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15700 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15701
15702 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15703 // most significant part in elt 1. So when combining into one large load, we
15704 // need to consider the endianness.
15705 if (DAG.getDataLayout().isBigEndian())
15706 std::swap(LD1, LD2);
15707
15708 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15709 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15710 LD1->getAddressSpace() != LD2->getAddressSpace())
15711 return SDValue();
15712
15713 unsigned LD1Fast = 0;
15714 EVT LD1VT = LD1->getValueType(0);
15715 unsigned LD1Bytes = LD1VT.getStoreSize();
15716 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15717 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15718 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15719 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15720 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15721 LD1->getPointerInfo(), LD1->getAlign());
15722
15723 return SDValue();
15724}
15725
15726static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15727 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15728 // and Lo parts; on big-endian machines it doesn't.
15729 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15730}
15731
15732SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15733 const TargetLowering &TLI) {
15734 // If this is not a bitcast to an FP type or if the target doesn't have
15735 // IEEE754-compliant FP logic, we're done.
15736 EVT VT = N->getValueType(0);
15737 SDValue N0 = N->getOperand(0);
15738 EVT SourceVT = N0.getValueType();
15739
15740 if (!VT.isFloatingPoint())
15741 return SDValue();
15742
15743 // TODO: Handle cases where the integer constant is a different scalar
15744 // bitwidth to the FP.
15745 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15746 return SDValue();
15747
15748 unsigned FPOpcode;
15749 APInt SignMask;
15750 switch (N0.getOpcode()) {
15751 case ISD::AND:
15752 FPOpcode = ISD::FABS;
15753 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15754 break;
15755 case ISD::XOR:
15756 FPOpcode = ISD::FNEG;
15757 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15758 break;
15759 case ISD::OR:
15760 FPOpcode = ISD::FABS;
15761 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15762 break;
15763 default:
15764 return SDValue();
15765 }
15766
15767 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15768 return SDValue();
15769
15770 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15771 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15772 // removing this would require more changes.
15773 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15774 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
15775 return true;
15776
15777 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15778 };
15779
15780 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15781 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15782 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15783 // fneg (fabs X)
15784 SDValue LogicOp0 = N0.getOperand(0);
15785 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15786 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15787 IsBitCastOrFree(LogicOp0, VT)) {
15788 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15789 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15790 NumFPLogicOpsConv++;
15791 if (N0.getOpcode() == ISD::OR)
15792 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15793 return FPOp;
15794 }
15795
15796 return SDValue();
15797}
15798
15799SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15800 SDValue N0 = N->getOperand(0);
15801 EVT VT = N->getValueType(0);
15802
15803 if (N0.isUndef())
15804 return DAG.getUNDEF(VT);
15805
15806 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15807 // Only do this before legalize types, unless both types are integer and the
15808 // scalar type is legal. Only do this before legalize ops, since the target
15809 // maybe depending on the bitcast.
15810 // First check to see if this is all constant.
15811 // TODO: Support FP bitcasts after legalize types.
15812 if (VT.isVector() &&
15813 (!LegalTypes ||
15814 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15815 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15816 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15817 cast<BuildVectorSDNode>(N0)->isConstant())
15818 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15820
15821 // If the input is a constant, let getNode fold it.
15822 if (isIntOrFPConstant(N0)) {
15823 // If we can't allow illegal operations, we need to check that this is just
15824 // a fp -> int or int -> conversion and that the resulting operation will
15825 // be legal.
15826 if (!LegalOperations ||
15827 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15829 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15830 TLI.isOperationLegal(ISD::Constant, VT))) {
15831 SDValue C = DAG.getBitcast(VT, N0);
15832 if (C.getNode() != N)
15833 return C;
15834 }
15835 }
15836
15837 // (conv (conv x, t1), t2) -> (conv x, t2)
15838 if (N0.getOpcode() == ISD::BITCAST)
15839 return DAG.getBitcast(VT, N0.getOperand(0));
15840
15841 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15842 // iff the current bitwise logicop type isn't legal
15843 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15844 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15845 auto IsFreeBitcast = [VT](SDValue V) {
15846 return (V.getOpcode() == ISD::BITCAST &&
15847 V.getOperand(0).getValueType() == VT) ||
15849 V->hasOneUse());
15850 };
15851 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15852 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15853 DAG.getBitcast(VT, N0.getOperand(0)),
15854 DAG.getBitcast(VT, N0.getOperand(1)));
15855 }
15856
15857 // fold (conv (load x)) -> (load (conv*)x)
15858 // If the resultant load doesn't need a higher alignment than the original!
15859 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15860 // Do not remove the cast if the types differ in endian layout.
15862 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15863 // If the load is volatile, we only want to change the load type if the
15864 // resulting load is legal. Otherwise we might increase the number of
15865 // memory accesses. We don't care if the original type was legal or not
15866 // as we assume software couldn't rely on the number of accesses of an
15867 // illegal type.
15868 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15869 TLI.isOperationLegal(ISD::LOAD, VT))) {
15870 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15871
15872 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15873 *LN0->getMemOperand())) {
15874 SDValue Load =
15875 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15876 LN0->getMemOperand());
15877 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15878 return Load;
15879 }
15880 }
15881
15882 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15883 return V;
15884
15885 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15886 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15887 //
15888 // For ppc_fp128:
15889 // fold (bitcast (fneg x)) ->
15890 // flipbit = signbit
15891 // (xor (bitcast x) (build_pair flipbit, flipbit))
15892 //
15893 // fold (bitcast (fabs x)) ->
15894 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15895 // (xor (bitcast x) (build_pair flipbit, flipbit))
15896 // This often reduces constant pool loads.
15897 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15898 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15899 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15900 !N0.getValueType().isVector()) {
15901 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15902 AddToWorklist(NewConv.getNode());
15903
15904 SDLoc DL(N);
15905 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15906 assert(VT.getSizeInBits() == 128);
15907 SDValue SignBit = DAG.getConstant(
15908 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15909 SDValue FlipBit;
15910 if (N0.getOpcode() == ISD::FNEG) {
15911 FlipBit = SignBit;
15912 AddToWorklist(FlipBit.getNode());
15913 } else {
15914 assert(N0.getOpcode() == ISD::FABS);
15915 SDValue Hi =
15916 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15918 SDLoc(NewConv)));
15919 AddToWorklist(Hi.getNode());
15920 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15921 AddToWorklist(FlipBit.getNode());
15922 }
15923 SDValue FlipBits =
15924 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15925 AddToWorklist(FlipBits.getNode());
15926 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15927 }
15928 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15929 if (N0.getOpcode() == ISD::FNEG)
15930 return DAG.getNode(ISD::XOR, DL, VT,
15931 NewConv, DAG.getConstant(SignBit, DL, VT));
15932 assert(N0.getOpcode() == ISD::FABS);
15933 return DAG.getNode(ISD::AND, DL, VT,
15934 NewConv, DAG.getConstant(~SignBit, DL, VT));
15935 }
15936
15937 // fold (bitconvert (fcopysign cst, x)) ->
15938 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15939 // Note that we don't handle (copysign x, cst) because this can always be
15940 // folded to an fneg or fabs.
15941 //
15942 // For ppc_fp128:
15943 // fold (bitcast (fcopysign cst, x)) ->
15944 // flipbit = (and (extract_element
15945 // (xor (bitcast cst), (bitcast x)), 0),
15946 // signbit)
15947 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15948 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15949 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15950 !VT.isVector()) {
15951 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15952 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15953 if (isTypeLegal(IntXVT)) {
15954 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15955 AddToWorklist(X.getNode());
15956
15957 // If X has a different width than the result/lhs, sext it or truncate it.
15958 unsigned VTWidth = VT.getSizeInBits();
15959 if (OrigXWidth < VTWidth) {
15960 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15961 AddToWorklist(X.getNode());
15962 } else if (OrigXWidth > VTWidth) {
15963 // To get the sign bit in the right place, we have to shift it right
15964 // before truncating.
15965 SDLoc DL(X);
15966 X = DAG.getNode(ISD::SRL, DL,
15967 X.getValueType(), X,
15968 DAG.getConstant(OrigXWidth-VTWidth, DL,
15969 X.getValueType()));
15970 AddToWorklist(X.getNode());
15971 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15972 AddToWorklist(X.getNode());
15973 }
15974
15975 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15976 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15977 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15978 AddToWorklist(Cst.getNode());
15979 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15980 AddToWorklist(X.getNode());
15981 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15982 AddToWorklist(XorResult.getNode());
15983 SDValue XorResult64 = DAG.getNode(
15984 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15986 SDLoc(XorResult)));
15987 AddToWorklist(XorResult64.getNode());
15988 SDValue FlipBit =
15989 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15990 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15991 AddToWorklist(FlipBit.getNode());
15992 SDValue FlipBits =
15993 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15994 AddToWorklist(FlipBits.getNode());
15995 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15996 }
15997 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15998 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15999 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16000 AddToWorklist(X.getNode());
16001
16002 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16003 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16004 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16005 AddToWorklist(Cst.getNode());
16006
16007 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16008 }
16009 }
16010
16011 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16012 if (N0.getOpcode() == ISD::BUILD_PAIR)
16013 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16014 return CombineLD;
16015
16016 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16017 // => int_vt (any_extend elt_vt:x)
16018 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16019 SDValue SrcScalar = N0.getOperand(0);
16020 if (SrcScalar.getValueType().isScalarInteger())
16021 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16022 }
16023
16024 // Remove double bitcasts from shuffles - this is often a legacy of
16025 // XformToShuffleWithZero being used to combine bitmaskings (of
16026 // float vectors bitcast to integer vectors) into shuffles.
16027 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16028 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16029 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16032 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16033
16034 // If operands are a bitcast, peek through if it casts the original VT.
16035 // If operands are a constant, just bitcast back to original VT.
16036 auto PeekThroughBitcast = [&](SDValue Op) {
16037 if (Op.getOpcode() == ISD::BITCAST &&
16038 Op.getOperand(0).getValueType() == VT)
16039 return SDValue(Op.getOperand(0));
16040 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16041 return DAG.getBitcast(VT, Op);
16042 return SDValue();
16043 };
16044
16045 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16046 // the result type of this bitcast. This would eliminate at least one
16047 // bitcast. See the transform in InstCombine.
16048 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16049 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16050 if (!(SV0 && SV1))
16051 return SDValue();
16052
16053 int MaskScale =
16055 SmallVector<int, 8> NewMask;
16056 for (int M : SVN->getMask())
16057 for (int i = 0; i != MaskScale; ++i)
16058 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16059
16060 SDValue LegalShuffle =
16061 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16062 if (LegalShuffle)
16063 return LegalShuffle;
16064 }
16065
16066 return SDValue();
16067}
16068
16069SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16070 EVT VT = N->getValueType(0);
16071 return CombineConsecutiveLoads(N, VT);
16072}
16073
16074SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16075 SDValue N0 = N->getOperand(0);
16076
16077 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16078 return N0;
16079
16080 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16081 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16082 // example https://reviews.llvm.org/D136529#4120959.
16083 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16084 return SDValue();
16085
16086 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16087 // Try to push freeze through instructions that propagate but don't produce
16088 // poison as far as possible. If an operand of freeze follows three
16089 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16090 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16091 // the freeze through to the operands that are not guaranteed non-poison.
16092 // NOTE: we will strip poison-generating flags, so ignore them here.
16093 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16094 /*ConsiderFlags*/ false) ||
16095 N0->getNumValues() != 1 || !N0->hasOneUse())
16096 return SDValue();
16097
16098 bool AllowMultipleMaybePoisonOperands =
16099 N0.getOpcode() == ISD::SELECT_CC ||
16100 N0.getOpcode() == ISD::SETCC ||
16101 N0.getOpcode() == ISD::BUILD_VECTOR ||
16102 N0.getOpcode() == ISD::BUILD_PAIR ||
16105
16106 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16107 // ones" or "constant" into something that depends on FrozenUndef. We can
16108 // instead pick undef values to keep those properties, while at the same time
16109 // folding away the freeze.
16110 // If we implement a more general solution for folding away freeze(undef) in
16111 // the future, then this special handling can be removed.
16112 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16113 SDLoc DL(N0);
16114 EVT VT = N0.getValueType();
16116 return DAG.getAllOnesConstant(DL, VT);
16119 for (const SDValue &Op : N0->op_values())
16120 NewVecC.push_back(
16121 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16122 return DAG.getBuildVector(VT, DL, NewVecC);
16123 }
16124 }
16125
16126 SmallSet<SDValue, 8> MaybePoisonOperands;
16127 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16128 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16129 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
16130 /*Depth*/ 1))
16131 continue;
16132 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16133 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16134 if (IsNewMaybePoisonOperand)
16135 MaybePoisonOperandNumbers.push_back(OpNo);
16136 if (!HadMaybePoisonOperands)
16137 continue;
16138 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16139 // Multiple maybe-poison ops when not allowed - bail out.
16140 return SDValue();
16141 }
16142 }
16143 // NOTE: the whole op may be not guaranteed to not be undef or poison because
16144 // it could create undef or poison due to it's poison-generating flags.
16145 // So not finding any maybe-poison operands is fine.
16146
16147 for (unsigned OpNo : MaybePoisonOperandNumbers) {
16148 // N0 can mutate during iteration, so make sure to refetch the maybe poison
16149 // operands via the operand numbers. The typical scenario is that we have
16150 // something like this
16151 // t262: i32 = freeze t181
16152 // t150: i32 = ctlz_zero_undef t262
16153 // t184: i32 = ctlz_zero_undef t181
16154 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16155 // When freezing the t181 operand we get t262 back, and then the
16156 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16157 // also recursively replace t184 by t150.
16158 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16159 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16160 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
16161 continue;
16162 // First, freeze each offending operand.
16163 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16164 // Then, change all other uses of unfrozen operand to use frozen operand.
16165 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16166 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16167 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16168 // But, that also updated the use in the freeze we just created, thus
16169 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16170 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
16171 MaybePoisonOperand);
16172 }
16173 }
16174
16175 // This node has been merged with another.
16176 if (N->getOpcode() == ISD::DELETED_NODE)
16177 return SDValue(N, 0);
16178
16179 // The whole node may have been updated, so the value we were holding
16180 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16181 N0 = N->getOperand(0);
16182
16183 // Finally, recreate the node, it's operands were updated to use
16184 // frozen operands, so we just need to use it's "original" operands.
16185 SmallVector<SDValue> Ops(N0->ops());
16186 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
16187 for (SDValue &Op : Ops) {
16188 if (Op.getOpcode() == ISD::UNDEF)
16189 Op = DAG.getFreeze(Op);
16190 }
16191
16192 SDValue R;
16193 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
16194 // Special case handling for ShuffleVectorSDNode nodes.
16195 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
16196 SVN->getMask());
16197 } else {
16198 // NOTE: this strips poison generating flags.
16199 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
16200 }
16201 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
16202 "Can't create node that may be undef/poison!");
16203 return R;
16204}
16205
16206/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
16207/// operands. DstEltVT indicates the destination element value type.
16208SDValue DAGCombiner::
16209ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
16210 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
16211
16212 // If this is already the right type, we're done.
16213 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
16214
16215 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
16216 unsigned DstBitSize = DstEltVT.getSizeInBits();
16217
16218 // If this is a conversion of N elements of one type to N elements of another
16219 // type, convert each element. This handles FP<->INT cases.
16220 if (SrcBitSize == DstBitSize) {
16222 for (SDValue Op : BV->op_values()) {
16223 // If the vector element type is not legal, the BUILD_VECTOR operands
16224 // are promoted and implicitly truncated. Make that explicit here.
16225 if (Op.getValueType() != SrcEltVT)
16226 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
16227 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
16228 AddToWorklist(Ops.back().getNode());
16229 }
16230 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
16232 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
16233 }
16234
16235 // Otherwise, we're growing or shrinking the elements. To avoid having to
16236 // handle annoying details of growing/shrinking FP values, we convert them to
16237 // int first.
16238 if (SrcEltVT.isFloatingPoint()) {
16239 // Convert the input float vector to a int vector where the elements are the
16240 // same sizes.
16241 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
16242 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
16243 SrcEltVT = IntVT;
16244 }
16245
16246 // Now we know the input is an integer vector. If the output is a FP type,
16247 // convert to integer first, then to FP of the right size.
16248 if (DstEltVT.isFloatingPoint()) {
16249 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
16250 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
16251
16252 // Next, convert to FP elements of the same size.
16253 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
16254 }
16255
16256 // Okay, we know the src/dst types are both integers of differing types.
16257 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
16258
16259 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
16260 // BuildVectorSDNode?
16261 auto *BVN = cast<BuildVectorSDNode>(BV);
16262
16263 // Extract the constant raw bit data.
16264 BitVector UndefElements;
16265 SmallVector<APInt> RawBits;
16266 bool IsLE = DAG.getDataLayout().isLittleEndian();
16267 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
16268 return SDValue();
16269
16270 SDLoc DL(BV);
16272 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
16273 if (UndefElements[I])
16274 Ops.push_back(DAG.getUNDEF(DstEltVT));
16275 else
16276 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
16277 }
16278
16279 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
16280 return DAG.getBuildVector(VT, DL, Ops);
16281}
16282
16283// Returns true if floating point contraction is allowed on the FMUL-SDValue
16284// `N`
16286 assert(N.getOpcode() == ISD::FMUL);
16287
16288 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
16289 N->getFlags().hasAllowContract();
16290}
16291
16292// Returns true if `N` can assume no infinities involved in its computation.
16294 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16295}
16296
16297/// Try to perform FMA combining on a given FADD node.
16298template <class MatchContextClass>
16299SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
16300 SDValue N0 = N->getOperand(0);
16301 SDValue N1 = N->getOperand(1);
16302 EVT VT = N->getValueType(0);
16303 SDLoc SL(N);
16304 MatchContextClass matcher(DAG, TLI, N);
16305 const TargetOptions &Options = DAG.getTarget().Options;
16306
16307 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16308
16309 // Floating-point multiply-add with intermediate rounding.
16310 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16311 // FIXME: Add VP_FMAD opcode.
16312 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16313
16314 // Floating-point multiply-add without intermediate rounding.
16315 bool HasFMA =
16316 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16318
16319 // No valid opcode, do not combine.
16320 if (!HasFMAD && !HasFMA)
16321 return SDValue();
16322
16323 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16324 Options.UnsafeFPMath || HasFMAD);
16325 // If the addition is not contractable, do not combine.
16326 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16327 return SDValue();
16328
16329 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
16330 // beneficial. It does not reduce latency. It increases register pressure. It
16331 // replaces an fadd with an fma which is a more complex instruction, so is
16332 // likely to have a larger encoding, use more functional units, etc.
16333 if (N0 == N1)
16334 return SDValue();
16335
16336 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16337 return SDValue();
16338
16339 // Always prefer FMAD to FMA for precision.
16340 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16342
16343 auto isFusedOp = [&](SDValue N) {
16344 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16345 };
16346
16347 // Is the node an FMUL and contractable either due to global flags or
16348 // SDNodeFlags.
16349 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16350 if (!matcher.match(N, ISD::FMUL))
16351 return false;
16352 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16353 };
16354 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
16355 // prefer to fold the multiply with fewer uses.
16357 if (N0->use_size() > N1->use_size())
16358 std::swap(N0, N1);
16359 }
16360
16361 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
16362 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
16363 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
16364 N0.getOperand(1), N1);
16365 }
16366
16367 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
16368 // Note: Commutes FADD operands.
16369 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
16370 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
16371 N1.getOperand(1), N0);
16372 }
16373
16374 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
16375 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
16376 // This also works with nested fma instructions:
16377 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
16378 // fma A, B, (fma C, D, fma (E, F, G))
16379 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
16380 // fma A, B, (fma C, D, fma (E, F, G)).
16381 // This requires reassociation because it changes the order of operations.
16382 bool CanReassociate =
16383 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16384 if (CanReassociate) {
16385 SDValue FMA, E;
16386 if (isFusedOp(N0) && N0.hasOneUse()) {
16387 FMA = N0;
16388 E = N1;
16389 } else if (isFusedOp(N1) && N1.hasOneUse()) {
16390 FMA = N1;
16391 E = N0;
16392 }
16393
16394 SDValue TmpFMA = FMA;
16395 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
16396 SDValue FMul = TmpFMA->getOperand(2);
16397 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
16398 SDValue C = FMul.getOperand(0);
16399 SDValue D = FMul.getOperand(1);
16400 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
16402 // Replacing the inner FMul could cause the outer FMA to be simplified
16403 // away.
16404 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
16405 }
16406
16407 TmpFMA = TmpFMA->getOperand(2);
16408 }
16409 }
16410
16411 // Look through FP_EXTEND nodes to do more combining.
16412
16413 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
16414 if (matcher.match(N0, ISD::FP_EXTEND)) {
16415 SDValue N00 = N0.getOperand(0);
16416 if (isContractableFMUL(N00) &&
16417 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16418 N00.getValueType())) {
16419 return matcher.getNode(
16420 PreferredFusedOpcode, SL, VT,
16421 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16422 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
16423 }
16424 }
16425
16426 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
16427 // Note: Commutes FADD operands.
16428 if (matcher.match(N1, ISD::FP_EXTEND)) {
16429 SDValue N10 = N1.getOperand(0);
16430 if (isContractableFMUL(N10) &&
16431 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16432 N10.getValueType())) {
16433 return matcher.getNode(
16434 PreferredFusedOpcode, SL, VT,
16435 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
16436 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16437 }
16438 }
16439
16440 // More folding opportunities when target permits.
16441 if (Aggressive) {
16442 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
16443 // -> (fma x, y, (fma (fpext u), (fpext v), z))
16444 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16445 SDValue Z) {
16446 return matcher.getNode(
16447 PreferredFusedOpcode, SL, VT, X, Y,
16448 matcher.getNode(PreferredFusedOpcode, SL, VT,
16449 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16450 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16451 };
16452 if (isFusedOp(N0)) {
16453 SDValue N02 = N0.getOperand(2);
16454 if (matcher.match(N02, ISD::FP_EXTEND)) {
16455 SDValue N020 = N02.getOperand(0);
16456 if (isContractableFMUL(N020) &&
16457 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16458 N020.getValueType())) {
16459 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
16460 N020.getOperand(0), N020.getOperand(1),
16461 N1);
16462 }
16463 }
16464 }
16465
16466 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16467 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16468 // FIXME: This turns two single-precision and one double-precision
16469 // operation into two double-precision operations, which might not be
16470 // interesting for all targets, especially GPUs.
16471 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16472 SDValue Z) {
16473 return matcher.getNode(
16474 PreferredFusedOpcode, SL, VT,
16475 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16476 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16477 matcher.getNode(PreferredFusedOpcode, SL, VT,
16478 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16479 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16480 };
16481 if (N0.getOpcode() == ISD::FP_EXTEND) {
16482 SDValue N00 = N0.getOperand(0);
16483 if (isFusedOp(N00)) {
16484 SDValue N002 = N00.getOperand(2);
16485 if (isContractableFMUL(N002) &&
16486 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16487 N00.getValueType())) {
16488 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16489 N002.getOperand(0), N002.getOperand(1),
16490 N1);
16491 }
16492 }
16493 }
16494
16495 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16496 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16497 if (isFusedOp(N1)) {
16498 SDValue N12 = N1.getOperand(2);
16499 if (N12.getOpcode() == ISD::FP_EXTEND) {
16500 SDValue N120 = N12.getOperand(0);
16501 if (isContractableFMUL(N120) &&
16502 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16503 N120.getValueType())) {
16504 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16505 N120.getOperand(0), N120.getOperand(1),
16506 N0);
16507 }
16508 }
16509 }
16510
16511 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16512 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16513 // FIXME: This turns two single-precision and one double-precision
16514 // operation into two double-precision operations, which might not be
16515 // interesting for all targets, especially GPUs.
16516 if (N1.getOpcode() == ISD::FP_EXTEND) {
16517 SDValue N10 = N1.getOperand(0);
16518 if (isFusedOp(N10)) {
16519 SDValue N102 = N10.getOperand(2);
16520 if (isContractableFMUL(N102) &&
16521 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16522 N10.getValueType())) {
16523 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16524 N102.getOperand(0), N102.getOperand(1),
16525 N0);
16526 }
16527 }
16528 }
16529 }
16530
16531 return SDValue();
16532}
16533
16534/// Try to perform FMA combining on a given FSUB node.
16535template <class MatchContextClass>
16536SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16537 SDValue N0 = N->getOperand(0);
16538 SDValue N1 = N->getOperand(1);
16539 EVT VT = N->getValueType(0);
16540 SDLoc SL(N);
16541 MatchContextClass matcher(DAG, TLI, N);
16542 const TargetOptions &Options = DAG.getTarget().Options;
16543
16544 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16545
16546 // Floating-point multiply-add with intermediate rounding.
16547 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16548 // FIXME: Add VP_FMAD opcode.
16549 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16550
16551 // Floating-point multiply-add without intermediate rounding.
16552 bool HasFMA =
16553 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16555
16556 // No valid opcode, do not combine.
16557 if (!HasFMAD && !HasFMA)
16558 return SDValue();
16559
16560 const SDNodeFlags Flags = N->getFlags();
16561 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16562 Options.UnsafeFPMath || HasFMAD);
16563
16564 // If the subtraction is not contractable, do not combine.
16565 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16566 return SDValue();
16567
16568 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16569 return SDValue();
16570
16571 // Always prefer FMAD to FMA for precision.
16572 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16574 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16575
16576 // Is the node an FMUL and contractable either due to global flags or
16577 // SDNodeFlags.
16578 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16579 if (!matcher.match(N, ISD::FMUL))
16580 return false;
16581 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16582 };
16583
16584 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16585 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16586 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16587 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16588 XY.getOperand(1),
16589 matcher.getNode(ISD::FNEG, SL, VT, Z));
16590 }
16591 return SDValue();
16592 };
16593
16594 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16595 // Note: Commutes FSUB operands.
16596 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16597 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16598 return matcher.getNode(
16599 PreferredFusedOpcode, SL, VT,
16600 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16601 YZ.getOperand(1), X);
16602 }
16603 return SDValue();
16604 };
16605
16606 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16607 // prefer to fold the multiply with fewer uses.
16608 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16609 (N0->use_size() > N1->use_size())) {
16610 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16611 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16612 return V;
16613 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16614 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16615 return V;
16616 } else {
16617 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16618 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16619 return V;
16620 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16621 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16622 return V;
16623 }
16624
16625 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16626 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16627 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16628 SDValue N00 = N0.getOperand(0).getOperand(0);
16629 SDValue N01 = N0.getOperand(0).getOperand(1);
16630 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16631 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16632 matcher.getNode(ISD::FNEG, SL, VT, N1));
16633 }
16634
16635 // Look through FP_EXTEND nodes to do more combining.
16636
16637 // fold (fsub (fpext (fmul x, y)), z)
16638 // -> (fma (fpext x), (fpext y), (fneg z))
16639 if (matcher.match(N0, ISD::FP_EXTEND)) {
16640 SDValue N00 = N0.getOperand(0);
16641 if (isContractableFMUL(N00) &&
16642 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16643 N00.getValueType())) {
16644 return matcher.getNode(
16645 PreferredFusedOpcode, SL, VT,
16646 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16647 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16648 matcher.getNode(ISD::FNEG, SL, VT, N1));
16649 }
16650 }
16651
16652 // fold (fsub x, (fpext (fmul y, z)))
16653 // -> (fma (fneg (fpext y)), (fpext z), x)
16654 // Note: Commutes FSUB operands.
16655 if (matcher.match(N1, ISD::FP_EXTEND)) {
16656 SDValue N10 = N1.getOperand(0);
16657 if (isContractableFMUL(N10) &&
16658 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16659 N10.getValueType())) {
16660 return matcher.getNode(
16661 PreferredFusedOpcode, SL, VT,
16662 matcher.getNode(
16663 ISD::FNEG, SL, VT,
16664 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16665 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16666 }
16667 }
16668
16669 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16670 // -> (fneg (fma (fpext x), (fpext y), z))
16671 // Note: This could be removed with appropriate canonicalization of the
16672 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16673 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16674 // from implementing the canonicalization in visitFSUB.
16675 if (matcher.match(N0, ISD::FP_EXTEND)) {
16676 SDValue N00 = N0.getOperand(0);
16677 if (matcher.match(N00, ISD::FNEG)) {
16678 SDValue N000 = N00.getOperand(0);
16679 if (isContractableFMUL(N000) &&
16680 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16681 N00.getValueType())) {
16682 return matcher.getNode(
16683 ISD::FNEG, SL, VT,
16684 matcher.getNode(
16685 PreferredFusedOpcode, SL, VT,
16686 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16687 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16688 N1));
16689 }
16690 }
16691 }
16692
16693 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16694 // -> (fneg (fma (fpext x)), (fpext y), z)
16695 // Note: This could be removed with appropriate canonicalization of the
16696 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16697 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16698 // from implementing the canonicalization in visitFSUB.
16699 if (matcher.match(N0, ISD::FNEG)) {
16700 SDValue N00 = N0.getOperand(0);
16701 if (matcher.match(N00, ISD::FP_EXTEND)) {
16702 SDValue N000 = N00.getOperand(0);
16703 if (isContractableFMUL(N000) &&
16704 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16705 N000.getValueType())) {
16706 return matcher.getNode(
16707 ISD::FNEG, SL, VT,
16708 matcher.getNode(
16709 PreferredFusedOpcode, SL, VT,
16710 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16711 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16712 N1));
16713 }
16714 }
16715 }
16716
16717 auto isReassociable = [&Options](SDNode *N) {
16718 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16719 };
16720
16721 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16722 &isReassociable](SDValue N) {
16723 return isContractableFMUL(N) && isReassociable(N.getNode());
16724 };
16725
16726 auto isFusedOp = [&](SDValue N) {
16727 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16728 };
16729
16730 // More folding opportunities when target permits.
16731 if (Aggressive && isReassociable(N)) {
16732 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16733 // fold (fsub (fma x, y, (fmul u, v)), z)
16734 // -> (fma x, y (fma u, v, (fneg z)))
16735 if (CanFuse && isFusedOp(N0) &&
16736 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16737 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16738 return matcher.getNode(
16739 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16740 matcher.getNode(PreferredFusedOpcode, SL, VT,
16741 N0.getOperand(2).getOperand(0),
16742 N0.getOperand(2).getOperand(1),
16743 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16744 }
16745
16746 // fold (fsub x, (fma y, z, (fmul u, v)))
16747 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16748 if (CanFuse && isFusedOp(N1) &&
16749 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16750 N1->hasOneUse() && NoSignedZero) {
16751 SDValue N20 = N1.getOperand(2).getOperand(0);
16752 SDValue N21 = N1.getOperand(2).getOperand(1);
16753 return matcher.getNode(
16754 PreferredFusedOpcode, SL, VT,
16755 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16756 N1.getOperand(1),
16757 matcher.getNode(PreferredFusedOpcode, SL, VT,
16758 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16759 }
16760
16761 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16762 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16763 if (isFusedOp(N0) && N0->hasOneUse()) {
16764 SDValue N02 = N0.getOperand(2);
16765 if (matcher.match(N02, ISD::FP_EXTEND)) {
16766 SDValue N020 = N02.getOperand(0);
16767 if (isContractableAndReassociableFMUL(N020) &&
16768 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16769 N020.getValueType())) {
16770 return matcher.getNode(
16771 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16772 matcher.getNode(
16773 PreferredFusedOpcode, SL, VT,
16774 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16775 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16776 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16777 }
16778 }
16779 }
16780
16781 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16782 // -> (fma (fpext x), (fpext y),
16783 // (fma (fpext u), (fpext v), (fneg z)))
16784 // FIXME: This turns two single-precision and one double-precision
16785 // operation into two double-precision operations, which might not be
16786 // interesting for all targets, especially GPUs.
16787 if (matcher.match(N0, ISD::FP_EXTEND)) {
16788 SDValue N00 = N0.getOperand(0);
16789 if (isFusedOp(N00)) {
16790 SDValue N002 = N00.getOperand(2);
16791 if (isContractableAndReassociableFMUL(N002) &&
16792 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16793 N00.getValueType())) {
16794 return matcher.getNode(
16795 PreferredFusedOpcode, SL, VT,
16796 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16797 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16798 matcher.getNode(
16799 PreferredFusedOpcode, SL, VT,
16800 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16801 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16802 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16803 }
16804 }
16805 }
16806
16807 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16808 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16809 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16810 N1->hasOneUse()) {
16811 SDValue N120 = N1.getOperand(2).getOperand(0);
16812 if (isContractableAndReassociableFMUL(N120) &&
16813 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16814 N120.getValueType())) {
16815 SDValue N1200 = N120.getOperand(0);
16816 SDValue N1201 = N120.getOperand(1);
16817 return matcher.getNode(
16818 PreferredFusedOpcode, SL, VT,
16819 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16820 N1.getOperand(1),
16821 matcher.getNode(
16822 PreferredFusedOpcode, SL, VT,
16823 matcher.getNode(ISD::FNEG, SL, VT,
16824 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16825 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16826 }
16827 }
16828
16829 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16830 // -> (fma (fneg (fpext y)), (fpext z),
16831 // (fma (fneg (fpext u)), (fpext v), x))
16832 // FIXME: This turns two single-precision and one double-precision
16833 // operation into two double-precision operations, which might not be
16834 // interesting for all targets, especially GPUs.
16835 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16836 SDValue CvtSrc = N1.getOperand(0);
16837 SDValue N100 = CvtSrc.getOperand(0);
16838 SDValue N101 = CvtSrc.getOperand(1);
16839 SDValue N102 = CvtSrc.getOperand(2);
16840 if (isContractableAndReassociableFMUL(N102) &&
16841 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16842 CvtSrc.getValueType())) {
16843 SDValue N1020 = N102.getOperand(0);
16844 SDValue N1021 = N102.getOperand(1);
16845 return matcher.getNode(
16846 PreferredFusedOpcode, SL, VT,
16847 matcher.getNode(ISD::FNEG, SL, VT,
16848 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16849 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16850 matcher.getNode(
16851 PreferredFusedOpcode, SL, VT,
16852 matcher.getNode(ISD::FNEG, SL, VT,
16853 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16854 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16855 }
16856 }
16857 }
16858
16859 return SDValue();
16860}
16861
16862/// Try to perform FMA combining on a given FMUL node based on the distributive
16863/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16864/// subtraction instead of addition).
16865SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16866 SDValue N0 = N->getOperand(0);
16867 SDValue N1 = N->getOperand(1);
16868 EVT VT = N->getValueType(0);
16869 SDLoc SL(N);
16870
16871 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16872
16873 const TargetOptions &Options = DAG.getTarget().Options;
16874
16875 // The transforms below are incorrect when x == 0 and y == inf, because the
16876 // intermediate multiplication produces a nan.
16877 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16878 if (!hasNoInfs(Options, FAdd))
16879 return SDValue();
16880
16881 // Floating-point multiply-add without intermediate rounding.
16882 bool HasFMA =
16884 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16886
16887 // Floating-point multiply-add with intermediate rounding. This can result
16888 // in a less precise result due to the changed rounding order.
16889 bool HasFMAD = Options.UnsafeFPMath &&
16890 (LegalOperations && TLI.isFMADLegal(DAG, N));
16891
16892 // No valid opcode, do not combine.
16893 if (!HasFMAD && !HasFMA)
16894 return SDValue();
16895
16896 // Always prefer FMAD to FMA for precision.
16897 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16899
16900 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16901 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16902 auto FuseFADD = [&](SDValue X, SDValue Y) {
16903 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16904 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16905 if (C->isExactlyValue(+1.0))
16906 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16907 Y);
16908 if (C->isExactlyValue(-1.0))
16909 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16910 DAG.getNode(ISD::FNEG, SL, VT, Y));
16911 }
16912 }
16913 return SDValue();
16914 };
16915
16916 if (SDValue FMA = FuseFADD(N0, N1))
16917 return FMA;
16918 if (SDValue FMA = FuseFADD(N1, N0))
16919 return FMA;
16920
16921 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16922 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16923 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16924 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16925 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16926 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16927 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16928 if (C0->isExactlyValue(+1.0))
16929 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16930 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16931 Y);
16932 if (C0->isExactlyValue(-1.0))
16933 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16934 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16935 DAG.getNode(ISD::FNEG, SL, VT, Y));
16936 }
16937 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16938 if (C1->isExactlyValue(+1.0))
16939 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16940 DAG.getNode(ISD::FNEG, SL, VT, Y));
16941 if (C1->isExactlyValue(-1.0))
16942 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16943 Y);
16944 }
16945 }
16946 return SDValue();
16947 };
16948
16949 if (SDValue FMA = FuseFSUB(N0, N1))
16950 return FMA;
16951 if (SDValue FMA = FuseFSUB(N1, N0))
16952 return FMA;
16953
16954 return SDValue();
16955}
16956
16957SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16958 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16959
16960 // FADD -> FMA combines:
16961 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16962 if (Fused.getOpcode() != ISD::DELETED_NODE)
16963 AddToWorklist(Fused.getNode());
16964 return Fused;
16965 }
16966 return SDValue();
16967}
16968
16969SDValue DAGCombiner::visitFADD(SDNode *N) {
16970 SDValue N0 = N->getOperand(0);
16971 SDValue N1 = N->getOperand(1);
16972 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
16973 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
16974 EVT VT = N->getValueType(0);
16975 SDLoc DL(N);
16976 const TargetOptions &Options = DAG.getTarget().Options;
16977 SDNodeFlags Flags = N->getFlags();
16978 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16979
16980 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16981 return R;
16982
16983 // fold (fadd c1, c2) -> c1 + c2
16984 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16985 return C;
16986
16987 // canonicalize constant to RHS
16988 if (N0CFP && !N1CFP)
16989 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16990
16991 // fold vector ops
16992 if (VT.isVector())
16993 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16994 return FoldedVOp;
16995
16996 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16997 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16998 if (N1C && N1C->isZero())
16999 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17000 return N0;
17001
17002 if (SDValue NewSel = foldBinOpIntoSelect(N))
17003 return NewSel;
17004
17005 // fold (fadd A, (fneg B)) -> (fsub A, B)
17006 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17007 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17008 N1, DAG, LegalOperations, ForCodeSize))
17009 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17010
17011 // fold (fadd (fneg A), B) -> (fsub B, A)
17012 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17013 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17014 N0, DAG, LegalOperations, ForCodeSize))
17015 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17016
17017 auto isFMulNegTwo = [](SDValue FMul) {
17018 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17019 return false;
17020 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17021 return C && C->isExactlyValue(-2.0);
17022 };
17023
17024 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17025 if (isFMulNegTwo(N0)) {
17026 SDValue B = N0.getOperand(0);
17027 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17028 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17029 }
17030 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17031 if (isFMulNegTwo(N1)) {
17032 SDValue B = N1.getOperand(0);
17033 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17034 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17035 }
17036
17037 // No FP constant should be created after legalization as Instruction
17038 // Selection pass has a hard time dealing with FP constants.
17039 bool AllowNewConst = (Level < AfterLegalizeDAG);
17040
17041 // If nnan is enabled, fold lots of things.
17042 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17043 // If allowed, fold (fadd (fneg x), x) -> 0.0
17044 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17045 return DAG.getConstantFP(0.0, DL, VT);
17046
17047 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17048 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17049 return DAG.getConstantFP(0.0, DL, VT);
17050 }
17051
17052 // If 'unsafe math' or reassoc and nsz, fold lots of things.
17053 // TODO: break out portions of the transformations below for which Unsafe is
17054 // considered and which do not require both nsz and reassoc
17055 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17056 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17057 AllowNewConst) {
17058 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17059 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17061 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17062 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17063 }
17064
17065 // We can fold chains of FADD's of the same value into multiplications.
17066 // This transform is not safe in general because we are reducing the number
17067 // of rounding steps.
17068 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17069 if (N0.getOpcode() == ISD::FMUL) {
17070 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17071 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17072
17073 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17074 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17075 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17076 DAG.getConstantFP(1.0, DL, VT));
17077 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17078 }
17079
17080 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17081 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17082 N1.getOperand(0) == N1.getOperand(1) &&
17083 N0.getOperand(0) == N1.getOperand(0)) {
17084 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17085 DAG.getConstantFP(2.0, DL, VT));
17086 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17087 }
17088 }
17089
17090 if (N1.getOpcode() == ISD::FMUL) {
17091 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17092 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17093
17094 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17095 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17096 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17097 DAG.getConstantFP(1.0, DL, VT));
17098 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17099 }
17100
17101 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17102 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17103 N0.getOperand(0) == N0.getOperand(1) &&
17104 N1.getOperand(0) == N0.getOperand(0)) {
17105 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17106 DAG.getConstantFP(2.0, DL, VT));
17107 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17108 }
17109 }
17110
17111 if (N0.getOpcode() == ISD::FADD) {
17112 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17113 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17114 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17115 (N0.getOperand(0) == N1)) {
17116 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17117 DAG.getConstantFP(3.0, DL, VT));
17118 }
17119 }
17120
17121 if (N1.getOpcode() == ISD::FADD) {
17122 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17123 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17124 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17125 N1.getOperand(0) == N0) {
17126 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17127 DAG.getConstantFP(3.0, DL, VT));
17128 }
17129 }
17130
17131 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17132 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17133 N0.getOperand(0) == N0.getOperand(1) &&
17134 N1.getOperand(0) == N1.getOperand(1) &&
17135 N0.getOperand(0) == N1.getOperand(0)) {
17136 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17137 DAG.getConstantFP(4.0, DL, VT));
17138 }
17139 }
17140
17141 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17142 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17143 VT, N0, N1, Flags))
17144 return SD;
17145 } // enable-unsafe-fp-math
17146
17147 // FADD -> FMA combines:
17148 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17149 if (Fused.getOpcode() != ISD::DELETED_NODE)
17150 AddToWorklist(Fused.getNode());
17151 return Fused;
17152 }
17153 return SDValue();
17154}
17155
17156SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17157 SDValue Chain = N->getOperand(0);
17158 SDValue N0 = N->getOperand(1);
17159 SDValue N1 = N->getOperand(2);
17160 EVT VT = N->getValueType(0);
17161 EVT ChainVT = N->getValueType(1);
17162 SDLoc DL(N);
17163 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17164
17165 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17166 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17167 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17168 N1, DAG, LegalOperations, ForCodeSize)) {
17169 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17170 {Chain, N0, NegN1});
17171 }
17172
17173 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17174 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17175 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17176 N0, DAG, LegalOperations, ForCodeSize)) {
17177 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17178 {Chain, N1, NegN0});
17179 }
17180 return SDValue();
17181}
17182
17183SDValue DAGCombiner::visitFSUB(SDNode *N) {
17184 SDValue N0 = N->getOperand(0);
17185 SDValue N1 = N->getOperand(1);
17186 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17187 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17188 EVT VT = N->getValueType(0);
17189 SDLoc DL(N);
17190 const TargetOptions &Options = DAG.getTarget().Options;
17191 const SDNodeFlags Flags = N->getFlags();
17192 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17193
17194 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17195 return R;
17196
17197 // fold (fsub c1, c2) -> c1-c2
17198 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17199 return C;
17200
17201 // fold vector ops
17202 if (VT.isVector())
17203 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17204 return FoldedVOp;
17205
17206 if (SDValue NewSel = foldBinOpIntoSelect(N))
17207 return NewSel;
17208
17209 // (fsub A, 0) -> A
17210 if (N1CFP && N1CFP->isZero()) {
17211 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17212 Flags.hasNoSignedZeros()) {
17213 return N0;
17214 }
17215 }
17216
17217 if (N0 == N1) {
17218 // (fsub x, x) -> 0.0
17219 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17220 return DAG.getConstantFP(0.0f, DL, VT);
17221 }
17222
17223 // (fsub -0.0, N1) -> -N1
17224 if (N0CFP && N0CFP->isZero()) {
17225 if (N0CFP->isNegative() ||
17226 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17227 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17228 // flushed to zero, unless all users treat denorms as zero (DAZ).
17229 // FIXME: This transform will change the sign of a NaN and the behavior
17230 // of a signaling NaN. It is only valid when a NoNaN flag is present.
17231 DenormalMode DenormMode = DAG.getDenormalMode(VT);
17232 if (DenormMode == DenormalMode::getIEEE()) {
17233 if (SDValue NegN1 =
17234 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17235 return NegN1;
17236 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17237 return DAG.getNode(ISD::FNEG, DL, VT, N1);
17238 }
17239 }
17240 }
17241
17242 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17243 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17244 N1.getOpcode() == ISD::FADD) {
17245 // X - (X + Y) -> -Y
17246 if (N0 == N1->getOperand(0))
17247 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17248 // X - (Y + X) -> -Y
17249 if (N0 == N1->getOperand(1))
17250 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17251 }
17252
17253 // fold (fsub A, (fneg B)) -> (fadd A, B)
17254 if (SDValue NegN1 =
17255 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17256 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
17257
17258 // FSUB -> FMA combines:
17259 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
17260 AddToWorklist(Fused.getNode());
17261 return Fused;
17262 }
17263
17264 return SDValue();
17265}
17266
17267// Transform IEEE Floats:
17268// (fmul C, (uitofp Pow2))
17269// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17270// (fdiv C, (uitofp Pow2))
17271// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17272//
17273// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
17274// there is no need for more than an add/sub.
17275//
17276// This is valid under the following circumstances:
17277// 1) We are dealing with IEEE floats
17278// 2) C is normal
17279// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
17280// TODO: Much of this could also be used for generating `ldexp` on targets the
17281// prefer it.
17282SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
17283 EVT VT = N->getValueType(0);
17284 SDValue ConstOp, Pow2Op;
17285
17286 std::optional<int> Mantissa;
17287 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
17288 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17289 return false;
17290
17291 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17292 Pow2Op = N->getOperand(1 - ConstOpIdx);
17293 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
17294 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
17295 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
17296 return false;
17297
17298 Pow2Op = Pow2Op.getOperand(0);
17299
17300 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
17301 // TODO: We could use knownbits to make this bound more precise.
17302 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
17303
17304 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
17305 if (CFP == nullptr)
17306 return false;
17307
17308 const APFloat &APF = CFP->getValueAPF();
17309
17310 // Make sure we have normal/ieee constant.
17311 if (!APF.isNormal() || !APF.isIEEE())
17312 return false;
17313
17314 // Make sure the floats exponent is within the bounds that this transform
17315 // produces bitwise equals value.
17316 int CurExp = ilogb(APF);
17317 // FMul by pow2 will only increase exponent.
17318 int MinExp =
17319 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17320 // FDiv by pow2 will only decrease exponent.
17321 int MaxExp =
17322 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17323 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
17325 return false;
17326
17327 // Finally make sure we actually know the mantissa for the float type.
17328 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
17329 if (!Mantissa)
17330 Mantissa = ThisMantissa;
17331
17332 return *Mantissa == ThisMantissa && ThisMantissa > 0;
17333 };
17334
17335 // TODO: We may be able to include undefs.
17336 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
17337 };
17338
17339 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
17340 return SDValue();
17341
17342 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
17343 return SDValue();
17344
17345 // Get log2 after all other checks have taken place. This is because
17346 // BuildLogBase2 may create a new node.
17347 SDLoc DL(N);
17348 // Get Log2 type with same bitwidth as the float type (VT).
17349 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
17350 if (VT.isVector())
17351 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
17353
17354 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
17355 /*InexpensiveOnly*/ true, NewIntVT);
17356 if (!Log2)
17357 return SDValue();
17358
17359 // Perform actual transform.
17360 SDValue MantissaShiftCnt =
17361 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
17362 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
17363 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
17364 // cast. We could implement that by handle here to handle the casts.
17365 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
17366 SDValue ResAsInt =
17367 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
17368 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
17369 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
17370 return ResAsFP;
17371}
17372
17373SDValue DAGCombiner::visitFMUL(SDNode *N) {
17374 SDValue N0 = N->getOperand(0);
17375 SDValue N1 = N->getOperand(1);
17376 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17377 EVT VT = N->getValueType(0);
17378 SDLoc DL(N);
17379 const TargetOptions &Options = DAG.getTarget().Options;
17380 const SDNodeFlags Flags = N->getFlags();
17381 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17382
17383 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17384 return R;
17385
17386 // fold (fmul c1, c2) -> c1*c2
17387 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
17388 return C;
17389
17390 // canonicalize constant to RHS
17393 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
17394
17395 // fold vector ops
17396 if (VT.isVector())
17397 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17398 return FoldedVOp;
17399
17400 if (SDValue NewSel = foldBinOpIntoSelect(N))
17401 return NewSel;
17402
17403 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
17404 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
17406 N0.getOpcode() == ISD::FMUL) {
17407 SDValue N00 = N0.getOperand(0);
17408 SDValue N01 = N0.getOperand(1);
17409 // Avoid an infinite loop by making sure that N00 is not a constant
17410 // (the inner multiply has not been constant folded yet).
17413 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
17414 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
17415 }
17416 }
17417
17418 // Match a special-case: we convert X * 2.0 into fadd.
17419 // fmul (fadd X, X), C -> fmul X, 2.0 * C
17420 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
17421 N0.getOperand(0) == N0.getOperand(1)) {
17422 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
17423 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
17424 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
17425 }
17426
17427 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
17428 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
17429 VT, N0, N1, Flags))
17430 return SD;
17431 }
17432
17433 // fold (fmul X, 2.0) -> (fadd X, X)
17434 if (N1CFP && N1CFP->isExactlyValue(+2.0))
17435 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
17436
17437 // fold (fmul X, -1.0) -> (fsub -0.0, X)
17438 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
17439 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
17440 return DAG.getNode(ISD::FSUB, DL, VT,
17441 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
17442 }
17443 }
17444
17445 // -N0 * -N1 --> N0 * N1
17450 SDValue NegN0 =
17451 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17452 if (NegN0) {
17453 HandleSDNode NegN0Handle(NegN0);
17454 SDValue NegN1 =
17455 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17456 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17458 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
17459 }
17460
17461 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17462 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17463 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17464 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17465 TLI.isOperationLegal(ISD::FABS, VT)) {
17466 SDValue Select = N0, X = N1;
17467 if (Select.getOpcode() != ISD::SELECT)
17468 std::swap(Select, X);
17469
17470 SDValue Cond = Select.getOperand(0);
17471 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17472 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17473
17474 if (TrueOpnd && FalseOpnd &&
17475 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17476 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17477 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17478 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17479 switch (CC) {
17480 default: break;
17481 case ISD::SETOLT:
17482 case ISD::SETULT:
17483 case ISD::SETOLE:
17484 case ISD::SETULE:
17485 case ISD::SETLT:
17486 case ISD::SETLE:
17487 std::swap(TrueOpnd, FalseOpnd);
17488 [[fallthrough]];
17489 case ISD::SETOGT:
17490 case ISD::SETUGT:
17491 case ISD::SETOGE:
17492 case ISD::SETUGE:
17493 case ISD::SETGT:
17494 case ISD::SETGE:
17495 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17496 TLI.isOperationLegal(ISD::FNEG, VT))
17497 return DAG.getNode(ISD::FNEG, DL, VT,
17498 DAG.getNode(ISD::FABS, DL, VT, X));
17499 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17500 return DAG.getNode(ISD::FABS, DL, VT, X);
17501
17502 break;
17503 }
17504 }
17505 }
17506
17507 // FMUL -> FMA combines:
17508 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17509 AddToWorklist(Fused.getNode());
17510 return Fused;
17511 }
17512
17513 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17514 // able to run.
17515 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17516 return R;
17517
17518 return SDValue();
17519}
17520
17521template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17522 SDValue N0 = N->getOperand(0);
17523 SDValue N1 = N->getOperand(1);
17524 SDValue N2 = N->getOperand(2);
17525 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17526 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17527 EVT VT = N->getValueType(0);
17528 SDLoc DL(N);
17529 const TargetOptions &Options = DAG.getTarget().Options;
17530 // FMA nodes have flags that propagate to the created nodes.
17531 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17532 MatchContextClass matcher(DAG, TLI, N);
17533
17534 // Constant fold FMA.
17535 if (SDValue C =
17536 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
17537 return C;
17538
17539 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17544 SDValue NegN0 =
17545 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17546 if (NegN0) {
17547 HandleSDNode NegN0Handle(NegN0);
17548 SDValue NegN1 =
17549 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17550 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17552 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17553 }
17554
17555 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17556 if (Options.UnsafeFPMath) {
17557 if (N0CFP && N0CFP->isZero())
17558 return N2;
17559 if (N1CFP && N1CFP->isZero())
17560 return N2;
17561 }
17562
17563 // FIXME: Support splat of constant.
17564 if (N0CFP && N0CFP->isExactlyValue(1.0))
17565 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
17566 if (N1CFP && N1CFP->isExactlyValue(1.0))
17567 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17568
17569 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17572 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
17573
17574 bool CanReassociate =
17575 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17576 if (CanReassociate) {
17577 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17578 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17581 return matcher.getNode(
17582 ISD::FMUL, DL, VT, N0,
17583 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17584 }
17585
17586 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17587 if (matcher.match(N0, ISD::FMUL) &&
17590 return matcher.getNode(
17591 ISD::FMA, DL, VT, N0.getOperand(0),
17592 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17593 }
17594 }
17595
17596 // (fma x, -1, y) -> (fadd (fneg x), y)
17597 // FIXME: Support splat of constant.
17598 if (N1CFP) {
17599 if (N1CFP->isExactlyValue(1.0))
17600 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17601
17602 if (N1CFP->isExactlyValue(-1.0) &&
17603 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17604 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17605 AddToWorklist(RHSNeg.getNode());
17606 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17607 }
17608
17609 // fma (fneg x), K, y -> fma x -K, y
17610 if (matcher.match(N0, ISD::FNEG) &&
17612 (N1.hasOneUse() &&
17613 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17614 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17615 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17616 }
17617 }
17618
17619 // FIXME: Support splat of constant.
17620 if (CanReassociate) {
17621 // (fma x, c, x) -> (fmul x, (c+1))
17622 if (N1CFP && N0 == N2) {
17623 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17624 matcher.getNode(ISD::FADD, DL, VT, N1,
17625 DAG.getConstantFP(1.0, DL, VT)));
17626 }
17627
17628 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17629 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17630 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17631 matcher.getNode(ISD::FADD, DL, VT, N1,
17632 DAG.getConstantFP(-1.0, DL, VT)));
17633 }
17634 }
17635
17636 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17637 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17638 if (!TLI.isFNegFree(VT))
17640 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17641 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17642 return SDValue();
17643}
17644
17645SDValue DAGCombiner::visitFMAD(SDNode *N) {
17646 SDValue N0 = N->getOperand(0);
17647 SDValue N1 = N->getOperand(1);
17648 SDValue N2 = N->getOperand(2);
17649 EVT VT = N->getValueType(0);
17650 SDLoc DL(N);
17651
17652 // Constant fold FMAD.
17653 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
17654 return C;
17655
17656 return SDValue();
17657}
17658
17659// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17660// reciprocal.
17661// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17662// Notice that this is not always beneficial. One reason is different targets
17663// may have different costs for FDIV and FMUL, so sometimes the cost of two
17664// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17665// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17666SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17667 // TODO: Limit this transform based on optsize/minsize - it always creates at
17668 // least 1 extra instruction. But the perf win may be substantial enough
17669 // that only minsize should restrict this.
17670 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17671 const SDNodeFlags Flags = N->getFlags();
17672 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17673 return SDValue();
17674
17675 // Skip if current node is a reciprocal/fneg-reciprocal.
17676 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17677 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17678 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17679 return SDValue();
17680
17681 // Exit early if the target does not want this transform or if there can't
17682 // possibly be enough uses of the divisor to make the transform worthwhile.
17683 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17684
17685 // For splat vectors, scale the number of uses by the splat factor. If we can
17686 // convert the division into a scalar op, that will likely be much faster.
17687 unsigned NumElts = 1;
17688 EVT VT = N->getValueType(0);
17689 if (VT.isVector() && DAG.isSplatValue(N1))
17690 NumElts = VT.getVectorMinNumElements();
17691
17692 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17693 return SDValue();
17694
17695 // Find all FDIV users of the same divisor.
17696 // Use a set because duplicates may be present in the user list.
17698 for (auto *U : N1->users()) {
17699 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17700 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17701 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17702 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17703 U->getFlags().hasAllowReassociation() &&
17704 U->getFlags().hasNoSignedZeros())
17705 continue;
17706
17707 // This division is eligible for optimization only if global unsafe math
17708 // is enabled or if this division allows reciprocal formation.
17709 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17710 Users.insert(U);
17711 }
17712 }
17713
17714 // Now that we have the actual number of divisor uses, make sure it meets
17715 // the minimum threshold specified by the target.
17716 if ((Users.size() * NumElts) < MinUses)
17717 return SDValue();
17718
17719 SDLoc DL(N);
17720 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17721 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17722
17723 // Dividend / Divisor -> Dividend * Reciprocal
17724 for (auto *U : Users) {
17725 SDValue Dividend = U->getOperand(0);
17726 if (Dividend != FPOne) {
17727 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17728 Reciprocal, Flags);
17729 CombineTo(U, NewNode);
17730 } else if (U != Reciprocal.getNode()) {
17731 // In the absence of fast-math-flags, this user node is always the
17732 // same node as Reciprocal, but with FMF they may be different nodes.
17733 CombineTo(U, Reciprocal);
17734 }
17735 }
17736 return SDValue(N, 0); // N was replaced.
17737}
17738
17739SDValue DAGCombiner::visitFDIV(SDNode *N) {
17740 SDValue N0 = N->getOperand(0);
17741 SDValue N1 = N->getOperand(1);
17742 EVT VT = N->getValueType(0);
17743 SDLoc DL(N);
17744 const TargetOptions &Options = DAG.getTarget().Options;
17745 SDNodeFlags Flags = N->getFlags();
17746 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17747
17748 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17749 return R;
17750
17751 // fold (fdiv c1, c2) -> c1/c2
17752 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17753 return C;
17754
17755 // fold vector ops
17756 if (VT.isVector())
17757 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17758 return FoldedVOp;
17759
17760 if (SDValue NewSel = foldBinOpIntoSelect(N))
17761 return NewSel;
17762
17764 return V;
17765
17766 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17767 // the loss is acceptable with AllowReciprocal.
17768 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17769 // Compute the reciprocal 1.0 / c2.
17770 const APFloat &N1APF = N1CFP->getValueAPF();
17771 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17773 // Only do the transform if the reciprocal is a legal fp immediate that
17774 // isn't too nasty (eg NaN, denormal, ...).
17775 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17776 (st == APFloat::opInexact &&
17777 (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17778 (!LegalOperations ||
17779 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17780 // backend)... we should handle this gracefully after Legalize.
17781 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17783 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17784 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17785 DAG.getConstantFP(Recip, DL, VT));
17786 }
17787
17788 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17789 // If this FDIV is part of a reciprocal square root, it may be folded
17790 // into a target-specific square root estimate instruction.
17791 if (N1.getOpcode() == ISD::FSQRT) {
17792 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17793 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17794 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17795 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17796 if (SDValue RV =
17797 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17798 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17799 AddToWorklist(RV.getNode());
17800 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17801 }
17802 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17803 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17804 if (SDValue RV =
17805 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17806 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17807 AddToWorklist(RV.getNode());
17808 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17809 }
17810 } else if (N1.getOpcode() == ISD::FMUL) {
17811 // Look through an FMUL. Even though this won't remove the FDIV directly,
17812 // it's still worthwhile to get rid of the FSQRT if possible.
17813 SDValue Sqrt, Y;
17814 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17815 Sqrt = N1.getOperand(0);
17816 Y = N1.getOperand(1);
17817 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17818 Sqrt = N1.getOperand(1);
17819 Y = N1.getOperand(0);
17820 }
17821 if (Sqrt.getNode()) {
17822 // If the other multiply operand is known positive, pull it into the
17823 // sqrt. That will eliminate the division if we convert to an estimate.
17824 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17825 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17826 SDValue A;
17827 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17828 A = Y.getOperand(0);
17829 else if (Y == Sqrt.getOperand(0))
17830 A = Y;
17831 if (A) {
17832 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17833 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17834 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17835 SDValue AAZ =
17836 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17837 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17838 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17839
17840 // Estimate creation failed. Clean up speculatively created nodes.
17841 recursivelyDeleteUnusedNodes(AAZ.getNode());
17842 }
17843 }
17844
17845 // We found a FSQRT, so try to make this fold:
17846 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17847 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17848 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17849 AddToWorklist(Div.getNode());
17850 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17851 }
17852 }
17853 }
17854
17855 // Fold into a reciprocal estimate and multiply instead of a real divide.
17856 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17857 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17858 return RV;
17859 }
17860
17861 // Fold X/Sqrt(X) -> Sqrt(X)
17862 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17863 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17864 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17865 return N1;
17866
17867 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17872 SDValue NegN0 =
17873 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17874 if (NegN0) {
17875 HandleSDNode NegN0Handle(NegN0);
17876 SDValue NegN1 =
17877 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17878 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17880 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
17881 }
17882
17883 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17884 return R;
17885
17886 return SDValue();
17887}
17888
17889SDValue DAGCombiner::visitFREM(SDNode *N) {
17890 SDValue N0 = N->getOperand(0);
17891 SDValue N1 = N->getOperand(1);
17892 EVT VT = N->getValueType(0);
17893 SDNodeFlags Flags = N->getFlags();
17894 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17895 SDLoc DL(N);
17896
17897 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17898 return R;
17899
17900 // fold (frem c1, c2) -> fmod(c1,c2)
17901 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17902 return C;
17903
17904 if (SDValue NewSel = foldBinOpIntoSelect(N))
17905 return NewSel;
17906
17907 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17908 // power of 2.
17909 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17913 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17914 bool NeedsCopySign =
17915 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17916 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17917 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17918 SDValue MLA;
17920 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17921 N1, N0);
17922 } else {
17923 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17924 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17925 }
17926 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17927 }
17928
17929 return SDValue();
17930}
17931
17932SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17933 SDNodeFlags Flags = N->getFlags();
17934 const TargetOptions &Options = DAG.getTarget().Options;
17935
17936 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17937 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17938 if (!Flags.hasApproximateFuncs() ||
17939 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17940 return SDValue();
17941
17942 SDValue N0 = N->getOperand(0);
17943 if (TLI.isFsqrtCheap(N0, DAG))
17944 return SDValue();
17945
17946 // FSQRT nodes have flags that propagate to the created nodes.
17947 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17948 // transform the fdiv, we may produce a sub-optimal estimate sequence
17949 // because the reciprocal calculation may not have to filter out a
17950 // 0.0 input.
17951 return buildSqrtEstimate(N0, Flags);
17952}
17953
17954/// copysign(x, fp_extend(y)) -> copysign(x, y)
17955/// copysign(x, fp_round(y)) -> copysign(x, y)
17956/// Operands to the functions are the type of X and Y respectively.
17957static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17958 // Always fold no-op FP casts.
17959 if (XTy == YTy)
17960 return true;
17961
17962 // Do not optimize out type conversion of f128 type yet.
17963 // For some targets like x86_64, configuration is changed to keep one f128
17964 // value in one SSE register, but instruction selection cannot handle
17965 // FCOPYSIGN on SSE registers yet.
17966 if (YTy == MVT::f128)
17967 return false;
17968
17970}
17971
17973 SDValue N1 = N->getOperand(1);
17974 if (N1.getOpcode() != ISD::FP_EXTEND &&
17975 N1.getOpcode() != ISD::FP_ROUND)
17976 return false;
17977 EVT N1VT = N1->getValueType(0);
17978 EVT N1Op0VT = N1->getOperand(0).getValueType();
17979 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17980}
17981
17982SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17983 SDValue N0 = N->getOperand(0);
17984 SDValue N1 = N->getOperand(1);
17985 EVT VT = N->getValueType(0);
17986 SDLoc DL(N);
17987
17988 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17989 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
17990 return C;
17991
17992 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17993 const APFloat &V = N1C->getValueAPF();
17994 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17995 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17996 if (!V.isNegative()) {
17997 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17998 return DAG.getNode(ISD::FABS, DL, VT, N0);
17999 } else {
18000 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18001 return DAG.getNode(ISD::FNEG, DL, VT,
18002 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
18003 }
18004 }
18005
18006 // copysign(fabs(x), y) -> copysign(x, y)
18007 // copysign(fneg(x), y) -> copysign(x, y)
18008 // copysign(copysign(x,z), y) -> copysign(x, y)
18009 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
18010 N0.getOpcode() == ISD::FCOPYSIGN)
18011 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1);
18012
18013 // copysign(x, abs(y)) -> abs(x)
18014 if (N1.getOpcode() == ISD::FABS)
18015 return DAG.getNode(ISD::FABS, DL, VT, N0);
18016
18017 // copysign(x, copysign(y,z)) -> copysign(x, z)
18018 if (N1.getOpcode() == ISD::FCOPYSIGN)
18019 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1));
18020
18021 // copysign(x, fp_extend(y)) -> copysign(x, y)
18022 // copysign(x, fp_round(y)) -> copysign(x, y)
18024 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18025
18026 // We only take the sign bit from the sign operand.
18027 EVT SignVT = N1.getValueType();
18028 if (SimplifyDemandedBits(N1,
18030 return SDValue(N, 0);
18031
18032 // We only take the non-sign bits from the value operand
18033 if (SimplifyDemandedBits(N0,
18035 return SDValue(N, 0);
18036
18037 return SDValue();
18038}
18039
18040SDValue DAGCombiner::visitFPOW(SDNode *N) {
18041 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18042 if (!ExponentC)
18043 return SDValue();
18044 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18045
18046 // Try to convert x ** (1/3) into cube root.
18047 // TODO: Handle the various flavors of long double.
18048 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18049 // Some range near 1/3 should be fine.
18050 EVT VT = N->getValueType(0);
18051 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18052 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18053 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18054 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18055 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18056 // For regular numbers, rounding may cause the results to differ.
18057 // Therefore, we require { nsz ninf nnan afn } for this transform.
18058 // TODO: We could select out the special cases if we don't have nsz/ninf.
18059 SDNodeFlags Flags = N->getFlags();
18060 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18061 !Flags.hasApproximateFuncs())
18062 return SDValue();
18063
18064 // Do not create a cbrt() libcall if the target does not have it, and do not
18065 // turn a pow that has lowering support into a cbrt() libcall.
18066 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18069 return SDValue();
18070
18071 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18072 }
18073
18074 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18075 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18076 // TODO: This could be extended (using a target hook) to handle smaller
18077 // power-of-2 fractional exponents.
18078 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18079 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18080 if (ExponentIs025 || ExponentIs075) {
18081 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18082 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18083 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18084 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18085 // For regular numbers, rounding may cause the results to differ.
18086 // Therefore, we require { nsz ninf afn } for this transform.
18087 // TODO: We could select out the special cases if we don't have nsz/ninf.
18088 SDNodeFlags Flags = N->getFlags();
18089
18090 // We only need no signed zeros for the 0.25 case.
18091 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18092 !Flags.hasApproximateFuncs())
18093 return SDValue();
18094
18095 // Don't double the number of libcalls. We are trying to inline fast code.
18097 return SDValue();
18098
18099 // Assume that libcalls are the smallest code.
18100 // TODO: This restriction should probably be lifted for vectors.
18101 if (ForCodeSize)
18102 return SDValue();
18103
18104 // pow(X, 0.25) --> sqrt(sqrt(X))
18105 SDLoc DL(N);
18106 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18107 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18108 if (ExponentIs025)
18109 return SqrtSqrt;
18110 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18111 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18112 }
18113
18114 return SDValue();
18115}
18116
18118 const TargetLowering &TLI) {
18119 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18120 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18121 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18122 // conversions would return +0.0.
18123 // FIXME: We should be able to use node-level FMF here.
18124 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18125 EVT VT = N->getValueType(0);
18126 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18128 return SDValue();
18129
18130 // fptosi/fptoui round towards zero, so converting from FP to integer and
18131 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18132 SDValue N0 = N->getOperand(0);
18133 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18134 N0.getOperand(0).getValueType() == VT)
18135 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18136
18137 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18138 N0.getOperand(0).getValueType() == VT)
18139 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18140
18141 return SDValue();
18142}
18143
18144SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18145 SDValue N0 = N->getOperand(0);
18146 EVT VT = N->getValueType(0);
18147 EVT OpVT = N0.getValueType();
18148 SDLoc DL(N);
18149
18150 // [us]itofp(undef) = 0, because the result value is bounded.
18151 if (N0.isUndef())
18152 return DAG.getConstantFP(0.0, DL, VT);
18153
18154 // fold (sint_to_fp c1) -> c1fp
18155 // ...but only if the target supports immediate floating-point values
18156 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18157 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18158 return C;
18159
18160 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18161 // but UINT_TO_FP is legal on this target, try to convert.
18162 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18163 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18164 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18165 if (DAG.SignBitIsZero(N0))
18166 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18167 }
18168
18169 // The next optimizations are desirable only if SELECT_CC can be lowered.
18170 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18171 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18172 !VT.isVector() &&
18173 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18174 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18175 DAG.getConstantFP(0.0, DL, VT));
18176
18177 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18178 // (select (setcc x, y, cc), 1.0, 0.0)
18179 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18180 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18181 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18182 return DAG.getSelect(DL, VT, N0.getOperand(0),
18183 DAG.getConstantFP(1.0, DL, VT),
18184 DAG.getConstantFP(0.0, DL, VT));
18185
18186 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18187 return FTrunc;
18188
18189 return SDValue();
18190}
18191
18192SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18193 SDValue N0 = N->getOperand(0);
18194 EVT VT = N->getValueType(0);
18195 EVT OpVT = N0.getValueType();
18196 SDLoc DL(N);
18197
18198 // [us]itofp(undef) = 0, because the result value is bounded.
18199 if (N0.isUndef())
18200 return DAG.getConstantFP(0.0, DL, VT);
18201
18202 // fold (uint_to_fp c1) -> c1fp
18203 // ...but only if the target supports immediate floating-point values
18204 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18205 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18206 return C;
18207
18208 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18209 // but SINT_TO_FP is legal on this target, try to convert.
18210 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18211 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18212 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18213 if (DAG.SignBitIsZero(N0))
18214 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18215 }
18216
18217 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18218 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18219 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18220 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18221 DAG.getConstantFP(0.0, DL, VT));
18222
18223 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18224 return FTrunc;
18225
18226 return SDValue();
18227}
18228
18229// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18231 SDValue N0 = N->getOperand(0);
18232 EVT VT = N->getValueType(0);
18233
18234 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18235 return SDValue();
18236
18237 SDValue Src = N0.getOperand(0);
18238 EVT SrcVT = Src.getValueType();
18239 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18240 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18241
18242 // We can safely assume the conversion won't overflow the output range,
18243 // because (for example) (uint8_t)18293.f is undefined behavior.
18244
18245 // Since we can assume the conversion won't overflow, our decision as to
18246 // whether the input will fit in the float should depend on the minimum
18247 // of the input range and output range.
18248
18249 // This means this is also safe for a signed input and unsigned output, since
18250 // a negative input would lead to undefined behavior.
18251 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18252 unsigned OutputSize = (int)VT.getScalarSizeInBits();
18253 unsigned ActualSize = std::min(InputSize, OutputSize);
18254 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18255
18256 // We can only fold away the float conversion if the input range can be
18257 // represented exactly in the float range.
18258 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18259 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
18260 unsigned ExtOp =
18261 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18262 return DAG.getNode(ExtOp, DL, VT, Src);
18263 }
18264 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
18265 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
18266 return DAG.getBitcast(VT, Src);
18267 }
18268 return SDValue();
18269}
18270
18271SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
18272 SDValue N0 = N->getOperand(0);
18273 EVT VT = N->getValueType(0);
18274 SDLoc DL(N);
18275
18276 // fold (fp_to_sint undef) -> undef
18277 if (N0.isUndef())
18278 return DAG.getUNDEF(VT);
18279
18280 // fold (fp_to_sint c1fp) -> c1
18281 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
18282 return C;
18283
18284 return FoldIntToFPToInt(N, DL, DAG);
18285}
18286
18287SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
18288 SDValue N0 = N->getOperand(0);
18289 EVT VT = N->getValueType(0);
18290 SDLoc DL(N);
18291
18292 // fold (fp_to_uint undef) -> undef
18293 if (N0.isUndef())
18294 return DAG.getUNDEF(VT);
18295
18296 // fold (fp_to_uint c1fp) -> c1
18297 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
18298 return C;
18299
18300 return FoldIntToFPToInt(N, DL, DAG);
18301}
18302
18303SDValue DAGCombiner::visitXROUND(SDNode *N) {
18304 SDValue N0 = N->getOperand(0);
18305 EVT VT = N->getValueType(0);
18306
18307 // fold (lrint|llrint undef) -> undef
18308 // fold (lround|llround undef) -> undef
18309 if (N0.isUndef())
18310 return DAG.getUNDEF(VT);
18311
18312 // fold (lrint|llrint c1fp) -> c1
18313 // fold (lround|llround c1fp) -> c1
18314 if (SDValue C =
18315 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18316 return C;
18317
18318 return SDValue();
18319}
18320
18321SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
18322 SDValue N0 = N->getOperand(0);
18323 SDValue N1 = N->getOperand(1);
18324 EVT VT = N->getValueType(0);
18325 SDLoc DL(N);
18326
18327 // fold (fp_round c1fp) -> c1fp
18328 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
18329 return C;
18330
18331 // fold (fp_round (fp_extend x)) -> x
18332 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
18333 return N0.getOperand(0);
18334
18335 // fold (fp_round (fp_round x)) -> (fp_round x)
18336 if (N0.getOpcode() == ISD::FP_ROUND) {
18337 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18338 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
18339
18340 // Avoid folding legal fp_rounds into non-legal ones.
18341 if (!hasOperation(ISD::FP_ROUND, VT))
18342 return SDValue();
18343
18344 // Skip this folding if it results in an fp_round from f80 to f16.
18345 //
18346 // f80 to f16 always generates an expensive (and as yet, unimplemented)
18347 // libcall to __truncxfhf2 instead of selecting native f16 conversion
18348 // instructions from f32 or f64. Moreover, the first (value-preserving)
18349 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
18350 // x86.
18351 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
18352 return SDValue();
18353
18354 // If the first fp_round isn't a value preserving truncation, it might
18355 // introduce a tie in the second fp_round, that wouldn't occur in the
18356 // single-step fp_round we want to fold to.
18357 // In other words, double rounding isn't the same as rounding.
18358 // Also, this is a value preserving truncation iff both fp_round's are.
18359 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
18360 return DAG.getNode(
18361 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
18362 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
18363 }
18364
18365 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
18366 // Note: From a legality perspective, this is a two step transform. First,
18367 // we duplicate the fp_round to the arguments of the copysign, then we
18368 // eliminate the fp_round on Y. The second step requires an additional
18369 // predicate to match the implementation above.
18370 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
18372 N0.getValueType())) {
18373 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
18374 N0.getOperand(0), N1);
18375 AddToWorklist(Tmp.getNode());
18376 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
18377 }
18378
18379 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18380 return NewVSel;
18381
18382 return SDValue();
18383}
18384
18385SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
18386 SDValue N0 = N->getOperand(0);
18387 EVT VT = N->getValueType(0);
18388 SDLoc DL(N);
18389
18390 if (VT.isVector())
18391 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
18392 return FoldedVOp;
18393
18394 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
18395 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
18396 return SDValue();
18397
18398 // fold (fp_extend c1fp) -> c1fp
18399 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
18400 return C;
18401
18402 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
18403 if (N0.getOpcode() == ISD::FP16_TO_FP &&
18405 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
18406
18407 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
18408 // value of X.
18409 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
18410 SDValue In = N0.getOperand(0);
18411 if (In.getValueType() == VT) return In;
18412 if (VT.bitsLT(In.getValueType()))
18413 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
18414 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
18415 }
18416
18417 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
18418 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
18420 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
18421 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
18422 LN0->getChain(),
18423 LN0->getBasePtr(), N0.getValueType(),
18424 LN0->getMemOperand());
18425 CombineTo(N, ExtLoad);
18426 CombineTo(
18427 N0.getNode(),
18428 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
18429 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
18430 ExtLoad.getValue(1));
18431 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18432 }
18433
18434 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18435 return NewVSel;
18436
18437 return SDValue();
18438}
18439
18440SDValue DAGCombiner::visitFCEIL(SDNode *N) {
18441 SDValue N0 = N->getOperand(0);
18442 EVT VT = N->getValueType(0);
18443
18444 // fold (fceil c1) -> fceil(c1)
18445 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
18446 return C;
18447
18448 return SDValue();
18449}
18450
18451SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
18452 SDValue N0 = N->getOperand(0);
18453 EVT VT = N->getValueType(0);
18454
18455 // fold (ftrunc c1) -> ftrunc(c1)
18456 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
18457 return C;
18458
18459 // fold ftrunc (known rounded int x) -> x
18460 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
18461 // likely to be generated to extract integer from a rounded floating value.
18462 switch (N0.getOpcode()) {
18463 default: break;
18464 case ISD::FRINT:
18465 case ISD::FTRUNC:
18466 case ISD::FNEARBYINT:
18467 case ISD::FROUNDEVEN:
18468 case ISD::FFLOOR:
18469 case ISD::FCEIL:
18470 return N0;
18471 }
18472
18473 return SDValue();
18474}
18475
18476SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18477 SDValue N0 = N->getOperand(0);
18478
18479 // fold (ffrexp c1) -> ffrexp(c1)
18481 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18482 return SDValue();
18483}
18484
18485SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18486 SDValue N0 = N->getOperand(0);
18487 EVT VT = N->getValueType(0);
18488
18489 // fold (ffloor c1) -> ffloor(c1)
18490 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
18491 return C;
18492
18493 return SDValue();
18494}
18495
18496SDValue DAGCombiner::visitFNEG(SDNode *N) {
18497 SDValue N0 = N->getOperand(0);
18498 EVT VT = N->getValueType(0);
18499 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18500
18501 // Constant fold FNEG.
18502 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
18503 return C;
18504
18505 if (SDValue NegN0 =
18506 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18507 return NegN0;
18508
18509 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18510 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18511 // know it was called from a context with a nsz flag if the input fsub does
18512 // not.
18513 if (N0.getOpcode() == ISD::FSUB &&
18515 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18516 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18517 N0.getOperand(0));
18518 }
18519
18520 if (SDValue Cast = foldSignChangeInBitcast(N))
18521 return Cast;
18522
18523 return SDValue();
18524}
18525
18526SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18527 SDValue N0 = N->getOperand(0);
18528 SDValue N1 = N->getOperand(1);
18529 EVT VT = N->getValueType(0);
18530 const SDNodeFlags Flags = N->getFlags();
18531 unsigned Opc = N->getOpcode();
18532 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18533 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18534 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18535
18536 // Constant fold.
18537 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18538 return C;
18539
18540 // Canonicalize to constant on RHS.
18543 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18544
18545 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18546 const APFloat &AF = N1CFP->getValueAPF();
18547
18548 // minnum(X, nan) -> X
18549 // maxnum(X, nan) -> X
18550 // minimum(X, nan) -> nan
18551 // maximum(X, nan) -> nan
18552 if (AF.isNaN())
18553 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18554
18555 // In the following folds, inf can be replaced with the largest finite
18556 // float, if the ninf flag is set.
18557 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18558 // minnum(X, -inf) -> -inf
18559 // maxnum(X, +inf) -> +inf
18560 // minimum(X, -inf) -> -inf if nnan
18561 // maximum(X, +inf) -> +inf if nnan
18562 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18563 return N->getOperand(1);
18564
18565 // minnum(X, +inf) -> X if nnan
18566 // maxnum(X, -inf) -> X if nnan
18567 // minimum(X, +inf) -> X
18568 // maximum(X, -inf) -> X
18569 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18570 return N->getOperand(0);
18571 }
18572 }
18573
18574 if (SDValue SD = reassociateReduction(
18575 PropagatesNaN
18578 Opc, SDLoc(N), VT, N0, N1, Flags))
18579 return SD;
18580
18581 return SDValue();
18582}
18583
18584SDValue DAGCombiner::visitFABS(SDNode *N) {
18585 SDValue N0 = N->getOperand(0);
18586 EVT VT = N->getValueType(0);
18587 SDLoc DL(N);
18588
18589 // fold (fabs c1) -> fabs(c1)
18590 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
18591 return C;
18592
18593 // fold (fabs (fabs x)) -> (fabs x)
18594 if (N0.getOpcode() == ISD::FABS)
18595 return N->getOperand(0);
18596
18597 // fold (fabs (fneg x)) -> (fabs x)
18598 // fold (fabs (fcopysign x, y)) -> (fabs x)
18599 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18600 return DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
18601
18602 if (SDValue Cast = foldSignChangeInBitcast(N))
18603 return Cast;
18604
18605 return SDValue();
18606}
18607
18608SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18609 SDValue Chain = N->getOperand(0);
18610 SDValue N1 = N->getOperand(1);
18611 SDValue N2 = N->getOperand(2);
18612
18613 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18614 // nondeterministic jumps).
18615 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18616 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18617 N1->getOperand(0), N2, N->getFlags());
18618 }
18619
18620 // Variant of the previous fold where there is a SETCC in between:
18621 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18622 // =>
18623 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18624 // =>
18625 // BRCOND(SETCC(X, CONST, Cond))
18626 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18627 // isn't equivalent to true or false.
18628 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18629 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18630 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18631 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18632 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18633 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18634 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18635 bool Updated = false;
18636
18637 // Is 'X Cond C' always true or false?
18638 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18639 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18640 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18641 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18642 (Cond == ISD::SETGT && C->isMaxSignedValue());
18643 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18644 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18645 (Cond == ISD::SETUGE && C->isZero()) ||
18646 (Cond == ISD::SETGE && C->isMinSignedValue());
18647 return True || False;
18648 };
18649
18650 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18651 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18652 S0 = S0->getOperand(0);
18653 Updated = true;
18654 }
18655 }
18656 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18657 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18658 S1 = S1->getOperand(0);
18659 Updated = true;
18660 }
18661 }
18662
18663 if (Updated)
18664 return DAG.getNode(
18665 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18666 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
18667 N->getFlags());
18668 }
18669
18670 // If N is a constant we could fold this into a fallthrough or unconditional
18671 // branch. However that doesn't happen very often in normal code, because
18672 // Instcombine/SimplifyCFG should have handled the available opportunities.
18673 // If we did this folding here, it would be necessary to update the
18674 // MachineBasicBlock CFG, which is awkward.
18675
18676 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18677 // on the target.
18678 if (N1.getOpcode() == ISD::SETCC &&
18680 N1.getOperand(0).getValueType())) {
18681 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18682 Chain, N1.getOperand(2),
18683 N1.getOperand(0), N1.getOperand(1), N2);
18684 }
18685
18686 if (N1.hasOneUse()) {
18687 // rebuildSetCC calls visitXor which may change the Chain when there is a
18688 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18689 HandleSDNode ChainHandle(Chain);
18690 if (SDValue NewN1 = rebuildSetCC(N1))
18691 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18692 ChainHandle.getValue(), NewN1, N2, N->getFlags());
18693 }
18694
18695 return SDValue();
18696}
18697
18698SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18699 if (N.getOpcode() == ISD::SRL ||
18700 (N.getOpcode() == ISD::TRUNCATE &&
18701 (N.getOperand(0).hasOneUse() &&
18702 N.getOperand(0).getOpcode() == ISD::SRL))) {
18703 // Look pass the truncate.
18704 if (N.getOpcode() == ISD::TRUNCATE)
18705 N = N.getOperand(0);
18706
18707 // Match this pattern so that we can generate simpler code:
18708 //
18709 // %a = ...
18710 // %b = and i32 %a, 2
18711 // %c = srl i32 %b, 1
18712 // brcond i32 %c ...
18713 //
18714 // into
18715 //
18716 // %a = ...
18717 // %b = and i32 %a, 2
18718 // %c = setcc eq %b, 0
18719 // brcond %c ...
18720 //
18721 // This applies only when the AND constant value has one bit set and the
18722 // SRL constant is equal to the log2 of the AND constant. The back-end is
18723 // smart enough to convert the result into a TEST/JMP sequence.
18724 SDValue Op0 = N.getOperand(0);
18725 SDValue Op1 = N.getOperand(1);
18726
18727 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18728 SDValue AndOp1 = Op0.getOperand(1);
18729
18730 if (AndOp1.getOpcode() == ISD::Constant) {
18731 const APInt &AndConst = AndOp1->getAsAPIntVal();
18732
18733 if (AndConst.isPowerOf2() &&
18734 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18735 SDLoc DL(N);
18736 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18737 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18738 ISD::SETNE);
18739 }
18740 }
18741 }
18742 }
18743
18744 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18745 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18746 if (N.getOpcode() == ISD::XOR) {
18747 // Because we may call this on a speculatively constructed
18748 // SimplifiedSetCC Node, we need to simplify this node first.
18749 // Ideally this should be folded into SimplifySetCC and not
18750 // here. For now, grab a handle to N so we don't lose it from
18751 // replacements interal to the visit.
18752 HandleSDNode XORHandle(N);
18753 while (N.getOpcode() == ISD::XOR) {
18754 SDValue Tmp = visitXOR(N.getNode());
18755 // No simplification done.
18756 if (!Tmp.getNode())
18757 break;
18758 // Returning N is form in-visit replacement that may invalidated
18759 // N. Grab value from Handle.
18760 if (Tmp.getNode() == N.getNode())
18761 N = XORHandle.getValue();
18762 else // Node simplified. Try simplifying again.
18763 N = Tmp;
18764 }
18765
18766 if (N.getOpcode() != ISD::XOR)
18767 return N;
18768
18769 SDValue Op0 = N->getOperand(0);
18770 SDValue Op1 = N->getOperand(1);
18771
18772 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18773 bool Equal = false;
18774 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18775 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18776 Op0.getValueType() == MVT::i1) {
18777 N = Op0;
18778 Op0 = N->getOperand(0);
18779 Op1 = N->getOperand(1);
18780 Equal = true;
18781 }
18782
18783 EVT SetCCVT = N.getValueType();
18784 if (LegalTypes)
18785 SetCCVT = getSetCCResultType(SetCCVT);
18786 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
18787 // it would introduce illegal operations post-legalization as this can
18788 // result in infinite looping between converting xor->setcc here, and
18789 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
18791 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
18792 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
18793 }
18794 }
18795
18796 return SDValue();
18797}
18798
18799// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18800//
18801SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18802 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18803 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18804
18805 // If N is a constant we could fold this into a fallthrough or unconditional
18806 // branch. However that doesn't happen very often in normal code, because
18807 // Instcombine/SimplifyCFG should have handled the available opportunities.
18808 // If we did this folding here, it would be necessary to update the
18809 // MachineBasicBlock CFG, which is awkward.
18810
18811 // Use SimplifySetCC to simplify SETCC's.
18813 CondLHS, CondRHS, CC->get(), SDLoc(N),
18814 false);
18815 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18816
18817 // fold to a simpler setcc
18818 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18819 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18820 N->getOperand(0), Simp.getOperand(2),
18821 Simp.getOperand(0), Simp.getOperand(1),
18822 N->getOperand(4));
18823
18824 return SDValue();
18825}
18826
18827static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18828 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18829 const TargetLowering &TLI) {
18830 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18831 if (LD->isIndexed())
18832 return false;
18833 EVT VT = LD->getMemoryVT();
18834 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18835 return false;
18836 Ptr = LD->getBasePtr();
18837 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18838 if (ST->isIndexed())
18839 return false;
18840 EVT VT = ST->getMemoryVT();
18841 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18842 return false;
18843 Ptr = ST->getBasePtr();
18844 IsLoad = false;
18845 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18846 if (LD->isIndexed())
18847 return false;
18848 EVT VT = LD->getMemoryVT();
18849 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18850 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18851 return false;
18852 Ptr = LD->getBasePtr();
18853 IsMasked = true;
18854 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18855 if (ST->isIndexed())
18856 return false;
18857 EVT VT = ST->getMemoryVT();
18858 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18859 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18860 return false;
18861 Ptr = ST->getBasePtr();
18862 IsLoad = false;
18863 IsMasked = true;
18864 } else {
18865 return false;
18866 }
18867 return true;
18868}
18869
18870/// Try turning a load/store into a pre-indexed load/store when the base
18871/// pointer is an add or subtract and it has other uses besides the load/store.
18872/// After the transformation, the new indexed load/store has effectively folded
18873/// the add/subtract in and all of its other uses are redirected to the
18874/// new load/store.
18875bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18876 if (Level < AfterLegalizeDAG)
18877 return false;
18878
18879 bool IsLoad = true;
18880 bool IsMasked = false;
18881 SDValue Ptr;
18882 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18883 Ptr, TLI))
18884 return false;
18885
18886 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18887 // out. There is no reason to make this a preinc/predec.
18888 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18889 Ptr->hasOneUse())
18890 return false;
18891
18892 // Ask the target to do addressing mode selection.
18896 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18897 return false;
18898
18899 // Backends without true r+i pre-indexed forms may need to pass a
18900 // constant base with a variable offset so that constant coercion
18901 // will work with the patterns in canonical form.
18902 bool Swapped = false;
18903 if (isa<ConstantSDNode>(BasePtr)) {
18904 std::swap(BasePtr, Offset);
18905 Swapped = true;
18906 }
18907
18908 // Don't create a indexed load / store with zero offset.
18910 return false;
18911
18912 // Try turning it into a pre-indexed load / store except when:
18913 // 1) The new base ptr is a frame index.
18914 // 2) If N is a store and the new base ptr is either the same as or is a
18915 // predecessor of the value being stored.
18916 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18917 // that would create a cycle.
18918 // 4) All uses are load / store ops that use it as old base ptr.
18919
18920 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18921 // (plus the implicit offset) to a register to preinc anyway.
18922 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18923 return false;
18924
18925 // Check #2.
18926 if (!IsLoad) {
18927 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18928 : cast<StoreSDNode>(N)->getValue();
18929
18930 // Would require a copy.
18931 if (Val == BasePtr)
18932 return false;
18933
18934 // Would create a cycle.
18935 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18936 return false;
18937 }
18938
18939 // Caches for hasPredecessorHelper.
18942 Worklist.push_back(N);
18943
18944 // If the offset is a constant, there may be other adds of constants that
18945 // can be folded with this one. We should do this to avoid having to keep
18946 // a copy of the original base pointer.
18947 SmallVector<SDNode *, 16> OtherUses;
18949 if (isa<ConstantSDNode>(Offset))
18950 for (SDUse &Use : BasePtr->uses()) {
18951 // Skip the use that is Ptr and uses of other results from BasePtr's
18952 // node (important for nodes that return multiple results).
18953 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18954 continue;
18955
18956 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18957 MaxSteps))
18958 continue;
18959
18960 if (Use.getUser()->getOpcode() != ISD::ADD &&
18961 Use.getUser()->getOpcode() != ISD::SUB) {
18962 OtherUses.clear();
18963 break;
18964 }
18965
18966 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
18967 if (!isa<ConstantSDNode>(Op1)) {
18968 OtherUses.clear();
18969 break;
18970 }
18971
18972 // FIXME: In some cases, we can be smarter about this.
18973 if (Op1.getValueType() != Offset.getValueType()) {
18974 OtherUses.clear();
18975 break;
18976 }
18977
18978 OtherUses.push_back(Use.getUser());
18979 }
18980
18981 if (Swapped)
18982 std::swap(BasePtr, Offset);
18983
18984 // Now check for #3 and #4.
18985 bool RealUse = false;
18986
18987 for (SDNode *User : Ptr->users()) {
18988 if (User == N)
18989 continue;
18990 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
18991 return false;
18992
18993 // If Ptr may be folded in addressing mode of other use, then it's
18994 // not profitable to do this transformation.
18995 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
18996 RealUse = true;
18997 }
18998
18999 if (!RealUse)
19000 return false;
19001
19003 if (!IsMasked) {
19004 if (IsLoad)
19005 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19006 else
19007 Result =
19008 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19009 } else {
19010 if (IsLoad)
19011 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19012 Offset, AM);
19013 else
19014 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19015 Offset, AM);
19016 }
19017 ++PreIndexedNodes;
19018 ++NodesCombined;
19019 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19020 Result.dump(&DAG); dbgs() << '\n');
19021 WorklistRemover DeadNodes(*this);
19022 if (IsLoad) {
19023 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19024 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19025 } else {
19026 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19027 }
19028
19029 // Finally, since the node is now dead, remove it from the graph.
19030 deleteAndRecombine(N);
19031
19032 if (Swapped)
19033 std::swap(BasePtr, Offset);
19034
19035 // Replace other uses of BasePtr that can be updated to use Ptr
19036 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
19037 unsigned OffsetIdx = 1;
19038 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19039 OffsetIdx = 0;
19040 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
19041 BasePtr.getNode() && "Expected BasePtr operand");
19042
19043 // We need to replace ptr0 in the following expression:
19044 // x0 * offset0 + y0 * ptr0 = t0
19045 // knowing that
19046 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19047 //
19048 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19049 // indexed load/store and the expression that needs to be re-written.
19050 //
19051 // Therefore, we have:
19052 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19053
19054 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
19055 const APInt &Offset0 = CN->getAPIntValue();
19056 const APInt &Offset1 = Offset->getAsAPIntVal();
19057 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19058 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19059 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19060 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19061
19062 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19063
19064 APInt CNV = Offset0;
19065 if (X0 < 0) CNV = -CNV;
19066 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19067 else CNV = CNV - Offset1;
19068
19069 SDLoc DL(OtherUses[i]);
19070
19071 // We can now generate the new expression.
19072 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19073 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19074
19075 SDValue NewUse = DAG.getNode(Opcode,
19076 DL,
19077 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
19078 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
19079 deleteAndRecombine(OtherUses[i]);
19080 }
19081
19082 // Replace the uses of Ptr with uses of the updated base value.
19083 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19084 deleteAndRecombine(Ptr.getNode());
19085 AddToWorklist(Result.getNode());
19086
19087 return true;
19088}
19089
19091 SDValue &BasePtr, SDValue &Offset,
19093 SelectionDAG &DAG,
19094 const TargetLowering &TLI) {
19095 if (PtrUse == N ||
19096 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19097 return false;
19098
19099 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19100 return false;
19101
19102 // Don't create a indexed load / store with zero offset.
19104 return false;
19105
19106 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19107 return false;
19108
19111 for (SDNode *User : BasePtr->users()) {
19112 if (User == Ptr.getNode())
19113 continue;
19114
19115 // No if there's a later user which could perform the index instead.
19116 if (isa<MemSDNode>(User)) {
19117 bool IsLoad = true;
19118 bool IsMasked = false;
19119 SDValue OtherPtr;
19121 IsMasked, OtherPtr, TLI)) {
19123 Worklist.push_back(User);
19124 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19125 return false;
19126 }
19127 }
19128
19129 // If all the uses are load / store addresses, then don't do the
19130 // transformation.
19131 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19132 for (SDNode *UserUser : User->users())
19133 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19134 return false;
19135 }
19136 }
19137 return true;
19138}
19139
19141 bool &IsMasked, SDValue &Ptr,
19142 SDValue &BasePtr, SDValue &Offset,
19144 SelectionDAG &DAG,
19145 const TargetLowering &TLI) {
19147 IsMasked, Ptr, TLI) ||
19148 Ptr->hasOneUse())
19149 return nullptr;
19150
19151 // Try turning it into a post-indexed load / store except when
19152 // 1) All uses are load / store ops that use it as base ptr (and
19153 // it may be folded as addressing mmode).
19154 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19155 // nor a successor of N. Otherwise, if Op is folded that would
19156 // create a cycle.
19158 for (SDNode *Op : Ptr->users()) {
19159 // Check for #1.
19160 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19161 continue;
19162
19163 // Check for #2.
19166 // Ptr is predecessor to both N and Op.
19167 Visited.insert(Ptr.getNode());
19168 Worklist.push_back(N);
19169 Worklist.push_back(Op);
19170 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19171 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19172 return Op;
19173 }
19174 return nullptr;
19175}
19176
19177/// Try to combine a load/store with a add/sub of the base pointer node into a
19178/// post-indexed load/store. The transformation folded the add/subtract into the
19179/// new indexed load/store effectively and all of its uses are redirected to the
19180/// new load/store.
19181bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19182 if (Level < AfterLegalizeDAG)
19183 return false;
19184
19185 bool IsLoad = true;
19186 bool IsMasked = false;
19187 SDValue Ptr;
19191 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19192 Offset, AM, DAG, TLI);
19193 if (!Op)
19194 return false;
19195
19197 if (!IsMasked)
19198 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19199 Offset, AM)
19200 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19201 BasePtr, Offset, AM);
19202 else
19203 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19204 BasePtr, Offset, AM)
19206 BasePtr, Offset, AM);
19207 ++PostIndexedNodes;
19208 ++NodesCombined;
19209 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19210 Result.dump(&DAG); dbgs() << '\n');
19211 WorklistRemover DeadNodes(*this);
19212 if (IsLoad) {
19213 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19214 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19215 } else {
19216 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19217 }
19218
19219 // Finally, since the node is now dead, remove it from the graph.
19220 deleteAndRecombine(N);
19221
19222 // Replace the uses of Use with uses of the updated base value.
19224 Result.getValue(IsLoad ? 1 : 0));
19225 deleteAndRecombine(Op);
19226 return true;
19227}
19228
19229/// Return the base-pointer arithmetic from an indexed \p LD.
19230SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
19231 ISD::MemIndexedMode AM = LD->getAddressingMode();
19232 assert(AM != ISD::UNINDEXED);
19233 SDValue BP = LD->getOperand(1);
19234 SDValue Inc = LD->getOperand(2);
19235
19236 // Some backends use TargetConstants for load offsets, but don't expect
19237 // TargetConstants in general ADD nodes. We can convert these constants into
19238 // regular Constants (if the constant is not opaque).
19240 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19241 "Cannot split out indexing using opaque target constants");
19242 if (Inc.getOpcode() == ISD::TargetConstant) {
19243 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
19244 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19245 ConstInc->getValueType(0));
19246 }
19247
19248 unsigned Opc =
19249 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
19250 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
19251}
19252
19254 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
19255}
19256
19257bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
19258 EVT STType = Val.getValueType();
19259 EVT STMemType = ST->getMemoryVT();
19260 if (STType == STMemType)
19261 return true;
19262 if (isTypeLegal(STMemType))
19263 return false; // fail.
19264 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
19265 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
19266 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
19267 return true;
19268 }
19269 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
19270 STType.isInteger() && STMemType.isInteger()) {
19271 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
19272 return true;
19273 }
19274 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
19275 Val = DAG.getBitcast(STMemType, Val);
19276 return true;
19277 }
19278 return false; // fail.
19279}
19280
19281bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
19282 EVT LDMemType = LD->getMemoryVT();
19283 EVT LDType = LD->getValueType(0);
19284 assert(Val.getValueType() == LDMemType &&
19285 "Attempting to extend value of non-matching type");
19286 if (LDType == LDMemType)
19287 return true;
19288 if (LDMemType.isInteger() && LDType.isInteger()) {
19289 switch (LD->getExtensionType()) {
19290 case ISD::NON_EXTLOAD:
19291 Val = DAG.getBitcast(LDType, Val);
19292 return true;
19293 case ISD::EXTLOAD:
19294 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
19295 return true;
19296 case ISD::SEXTLOAD:
19297 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
19298 return true;
19299 case ISD::ZEXTLOAD:
19300 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
19301 return true;
19302 }
19303 }
19304 return false;
19305}
19306
19307StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
19308 int64_t &Offset) {
19309 SDValue Chain = LD->getOperand(0);
19310
19311 // Look through CALLSEQ_START.
19312 if (Chain.getOpcode() == ISD::CALLSEQ_START)
19313 Chain = Chain->getOperand(0);
19314
19315 StoreSDNode *ST = nullptr;
19317 if (Chain.getOpcode() == ISD::TokenFactor) {
19318 // Look for unique store within the TokenFactor.
19319 for (SDValue Op : Chain->ops()) {
19320 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
19321 if (!Store)
19322 continue;
19323 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19324 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19325 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19326 continue;
19327 // Make sure the store is not aliased with any nodes in TokenFactor.
19328 GatherAllAliases(Store, Chain, Aliases);
19329 if (Aliases.empty() ||
19330 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
19331 ST = Store;
19332 break;
19333 }
19334 } else {
19335 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
19336 if (Store) {
19337 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19338 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19339 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19340 ST = Store;
19341 }
19342 }
19343
19344 return ST;
19345}
19346
19347SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
19348 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
19349 return SDValue();
19350 SDValue Chain = LD->getOperand(0);
19351 int64_t Offset;
19352
19353 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
19354 // TODO: Relax this restriction for unordered atomics (see D66309)
19355 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
19356 return SDValue();
19357
19358 EVT LDType = LD->getValueType(0);
19359 EVT LDMemType = LD->getMemoryVT();
19360 EVT STMemType = ST->getMemoryVT();
19361 EVT STType = ST->getValue().getValueType();
19362
19363 // There are two cases to consider here:
19364 // 1. The store is fixed width and the load is scalable. In this case we
19365 // don't know at compile time if the store completely envelops the load
19366 // so we abandon the optimisation.
19367 // 2. The store is scalable and the load is fixed width. We could
19368 // potentially support a limited number of cases here, but there has been
19369 // no cost-benefit analysis to prove it's worth it.
19370 bool LdStScalable = LDMemType.isScalableVT();
19371 if (LdStScalable != STMemType.isScalableVT())
19372 return SDValue();
19373
19374 // If we are dealing with scalable vectors on a big endian platform the
19375 // calculation of offsets below becomes trickier, since we do not know at
19376 // compile time the absolute size of the vector. Until we've done more
19377 // analysis on big-endian platforms it seems better to bail out for now.
19378 if (LdStScalable && DAG.getDataLayout().isBigEndian())
19379 return SDValue();
19380
19381 // Normalize for Endianness. After this Offset=0 will denote that the least
19382 // significant bit in the loaded value maps to the least significant bit in
19383 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
19384 // n:th least significant byte of the stored value.
19385 int64_t OrigOffset = Offset;
19386 if (DAG.getDataLayout().isBigEndian())
19387 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
19388 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
19389 8 -
19390 Offset;
19391
19392 // Check that the stored value cover all bits that are loaded.
19393 bool STCoversLD;
19394
19395 TypeSize LdMemSize = LDMemType.getSizeInBits();
19396 TypeSize StMemSize = STMemType.getSizeInBits();
19397 if (LdStScalable)
19398 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
19399 else
19400 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
19401 StMemSize.getFixedValue());
19402
19403 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
19404 if (LD->isIndexed()) {
19405 // Cannot handle opaque target constants and we must respect the user's
19406 // request not to split indexes from loads.
19407 if (!canSplitIdx(LD))
19408 return SDValue();
19409 SDValue Idx = SplitIndexingFromLoad(LD);
19410 SDValue Ops[] = {Val, Idx, Chain};
19411 return CombineTo(LD, Ops, 3);
19412 }
19413 return CombineTo(LD, Val, Chain);
19414 };
19415
19416 if (!STCoversLD)
19417 return SDValue();
19418
19419 // Memory as copy space (potentially masked).
19420 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
19421 // Simple case: Direct non-truncating forwarding
19422 if (LDType.getSizeInBits() == LdMemSize)
19423 return ReplaceLd(LD, ST->getValue(), Chain);
19424 // Can we model the truncate and extension with an and mask?
19425 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
19426 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
19427 // Mask to size of LDMemType
19428 auto Mask =
19430 StMemSize.getFixedValue()),
19431 SDLoc(ST), STType);
19432 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
19433 return ReplaceLd(LD, Val, Chain);
19434 }
19435 }
19436
19437 // Handle some cases for big-endian that would be Offset 0 and handled for
19438 // little-endian.
19439 SDValue Val = ST->getValue();
19440 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
19441 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
19442 !LDType.isVector() && isTypeLegal(STType) &&
19443 TLI.isOperationLegal(ISD::SRL, STType)) {
19444 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
19445 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
19446 Offset = 0;
19447 }
19448 }
19449
19450 // TODO: Deal with nonzero offset.
19451 if (LD->getBasePtr().isUndef() || Offset != 0)
19452 return SDValue();
19453 // Model necessary truncations / extenstions.
19454 // Truncate Value To Stored Memory Size.
19455 do {
19456 if (!getTruncatedStoreValue(ST, Val))
19457 break;
19458 if (!isTypeLegal(LDMemType))
19459 break;
19460 if (STMemType != LDMemType) {
19461 // TODO: Support vectors? This requires extract_subvector/bitcast.
19462 if (!STMemType.isVector() && !LDMemType.isVector() &&
19463 STMemType.isInteger() && LDMemType.isInteger())
19464 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
19465 else
19466 break;
19467 }
19468 if (!extendLoadedValueToExtension(LD, Val))
19469 break;
19470 return ReplaceLd(LD, Val, Chain);
19471 } while (false);
19472
19473 // On failure, cleanup dead nodes we may have created.
19474 if (Val->use_empty())
19475 deleteAndRecombine(Val.getNode());
19476 return SDValue();
19477}
19478
19479SDValue DAGCombiner::visitLOAD(SDNode *N) {
19480 LoadSDNode *LD = cast<LoadSDNode>(N);
19481 SDValue Chain = LD->getChain();
19482 SDValue Ptr = LD->getBasePtr();
19483
19484 // If load is not volatile and there are no uses of the loaded value (and
19485 // the updated indexed value in case of indexed loads), change uses of the
19486 // chain value into uses of the chain input (i.e. delete the dead load).
19487 // TODO: Allow this for unordered atomics (see D66309)
19488 if (LD->isSimple()) {
19489 if (N->getValueType(1) == MVT::Other) {
19490 // Unindexed loads.
19491 if (!N->hasAnyUseOfValue(0)) {
19492 // It's not safe to use the two value CombineTo variant here. e.g.
19493 // v1, chain2 = load chain1, loc
19494 // v2, chain3 = load chain2, loc
19495 // v3 = add v2, c
19496 // Now we replace use of chain2 with chain1. This makes the second load
19497 // isomorphic to the one we are deleting, and thus makes this load live.
19498 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19499 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19500 dbgs() << "\n");
19501 WorklistRemover DeadNodes(*this);
19502 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19503 AddUsersToWorklist(Chain.getNode());
19504 if (N->use_empty())
19505 deleteAndRecombine(N);
19506
19507 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19508 }
19509 } else {
19510 // Indexed loads.
19511 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19512
19513 // If this load has an opaque TargetConstant offset, then we cannot split
19514 // the indexing into an add/sub directly (that TargetConstant may not be
19515 // valid for a different type of node, and we cannot convert an opaque
19516 // target constant into a regular constant).
19517 bool CanSplitIdx = canSplitIdx(LD);
19518
19519 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19520 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19521 SDValue Index;
19522 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19523 Index = SplitIndexingFromLoad(LD);
19524 // Try to fold the base pointer arithmetic into subsequent loads and
19525 // stores.
19526 AddUsersToWorklist(N);
19527 } else
19528 Index = DAG.getUNDEF(N->getValueType(1));
19529 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19530 dbgs() << "\nWith: "; Undef.dump(&DAG);
19531 dbgs() << " and 2 other values\n");
19532 WorklistRemover DeadNodes(*this);
19533 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19534 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
19535 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19536 deleteAndRecombine(N);
19537 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19538 }
19539 }
19540 }
19541
19542 // If this load is directly stored, replace the load value with the stored
19543 // value.
19544 if (auto V = ForwardStoreValueToDirectLoad(LD))
19545 return V;
19546
19547 // Try to infer better alignment information than the load already has.
19548 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19549 !LD->isAtomic()) {
19550 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19551 if (*Alignment > LD->getAlign() &&
19552 isAligned(*Alignment, LD->getSrcValueOffset())) {
19553 SDValue NewLoad = DAG.getExtLoad(
19554 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19555 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19556 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19557 // NewLoad will always be N as we are only refining the alignment
19558 assert(NewLoad.getNode() == N);
19559 (void)NewLoad;
19560 }
19561 }
19562 }
19563
19564 if (LD->isUnindexed()) {
19565 // Walk up chain skipping non-aliasing memory nodes.
19566 SDValue BetterChain = FindBetterChain(LD, Chain);
19567
19568 // If there is a better chain.
19569 if (Chain != BetterChain) {
19570 SDValue ReplLoad;
19571
19572 // Replace the chain to void dependency.
19573 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19574 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19575 BetterChain, Ptr, LD->getMemOperand());
19576 } else {
19577 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19578 LD->getValueType(0),
19579 BetterChain, Ptr, LD->getMemoryVT(),
19580 LD->getMemOperand());
19581 }
19582
19583 // Create token factor to keep old chain connected.
19584 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19585 MVT::Other, Chain, ReplLoad.getValue(1));
19586
19587 // Replace uses with load result and token factor
19588 return CombineTo(N, ReplLoad.getValue(0), Token);
19589 }
19590 }
19591
19592 // Try transforming N to an indexed load.
19593 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19594 return SDValue(N, 0);
19595
19596 // Try to slice up N to more direct loads if the slices are mapped to
19597 // different register banks or pairing can take place.
19598 if (SliceUpLoad(N))
19599 return SDValue(N, 0);
19600
19601 return SDValue();
19602}
19603
19604namespace {
19605
19606/// Helper structure used to slice a load in smaller loads.
19607/// Basically a slice is obtained from the following sequence:
19608/// Origin = load Ty1, Base
19609/// Shift = srl Ty1 Origin, CstTy Amount
19610/// Inst = trunc Shift to Ty2
19611///
19612/// Then, it will be rewritten into:
19613/// Slice = load SliceTy, Base + SliceOffset
19614/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19615///
19616/// SliceTy is deduced from the number of bits that are actually used to
19617/// build Inst.
19618struct LoadedSlice {
19619 /// Helper structure used to compute the cost of a slice.
19620 struct Cost {
19621 /// Are we optimizing for code size.
19622 bool ForCodeSize = false;
19623
19624 /// Various cost.
19625 unsigned Loads = 0;
19626 unsigned Truncates = 0;
19627 unsigned CrossRegisterBanksCopies = 0;
19628 unsigned ZExts = 0;
19629 unsigned Shift = 0;
19630
19631 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19632
19633 /// Get the cost of one isolated slice.
19634 Cost(const LoadedSlice &LS, bool ForCodeSize)
19635 : ForCodeSize(ForCodeSize), Loads(1) {
19636 EVT TruncType = LS.Inst->getValueType(0);
19637 EVT LoadedType = LS.getLoadedType();
19638 if (TruncType != LoadedType &&
19639 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19640 ZExts = 1;
19641 }
19642
19643 /// Account for slicing gain in the current cost.
19644 /// Slicing provide a few gains like removing a shift or a
19645 /// truncate. This method allows to grow the cost of the original
19646 /// load with the gain from this slice.
19647 void addSliceGain(const LoadedSlice &LS) {
19648 // Each slice saves a truncate.
19649 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19650 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19651 ++Truncates;
19652 // If there is a shift amount, this slice gets rid of it.
19653 if (LS.Shift)
19654 ++Shift;
19655 // If this slice can merge a cross register bank copy, account for it.
19656 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19657 ++CrossRegisterBanksCopies;
19658 }
19659
19660 Cost &operator+=(const Cost &RHS) {
19661 Loads += RHS.Loads;
19662 Truncates += RHS.Truncates;
19663 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19664 ZExts += RHS.ZExts;
19665 Shift += RHS.Shift;
19666 return *this;
19667 }
19668
19669 bool operator==(const Cost &RHS) const {
19670 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19671 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19672 ZExts == RHS.ZExts && Shift == RHS.Shift;
19673 }
19674
19675 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19676
19677 bool operator<(const Cost &RHS) const {
19678 // Assume cross register banks copies are as expensive as loads.
19679 // FIXME: Do we want some more target hooks?
19680 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19681 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19682 // Unless we are optimizing for code size, consider the
19683 // expensive operation first.
19684 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19685 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19686 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19687 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19688 }
19689
19690 bool operator>(const Cost &RHS) const { return RHS < *this; }
19691
19692 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19693
19694 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19695 };
19696
19697 // The last instruction that represent the slice. This should be a
19698 // truncate instruction.
19699 SDNode *Inst;
19700
19701 // The original load instruction.
19702 LoadSDNode *Origin;
19703
19704 // The right shift amount in bits from the original load.
19705 unsigned Shift;
19706
19707 // The DAG from which Origin came from.
19708 // This is used to get some contextual information about legal types, etc.
19709 SelectionDAG *DAG;
19710
19711 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19712 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19713 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19714
19715 /// Get the bits used in a chunk of bits \p BitWidth large.
19716 /// \return Result is \p BitWidth and has used bits set to 1 and
19717 /// not used bits set to 0.
19718 APInt getUsedBits() const {
19719 // Reproduce the trunc(lshr) sequence:
19720 // - Start from the truncated value.
19721 // - Zero extend to the desired bit width.
19722 // - Shift left.
19723 assert(Origin && "No original load to compare against.");
19724 unsigned BitWidth = Origin->getValueSizeInBits(0);
19725 assert(Inst && "This slice is not bound to an instruction");
19726 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19727 "Extracted slice is bigger than the whole type!");
19728 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19729 UsedBits.setAllBits();
19730 UsedBits = UsedBits.zext(BitWidth);
19731 UsedBits <<= Shift;
19732 return UsedBits;
19733 }
19734
19735 /// Get the size of the slice to be loaded in bytes.
19736 unsigned getLoadedSize() const {
19737 unsigned SliceSize = getUsedBits().popcount();
19738 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19739 return SliceSize / 8;
19740 }
19741
19742 /// Get the type that will be loaded for this slice.
19743 /// Note: This may not be the final type for the slice.
19744 EVT getLoadedType() const {
19745 assert(DAG && "Missing context");
19746 LLVMContext &Ctxt = *DAG->getContext();
19747 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19748 }
19749
19750 /// Get the alignment of the load used for this slice.
19751 Align getAlign() const {
19752 Align Alignment = Origin->getAlign();
19753 uint64_t Offset = getOffsetFromBase();
19754 if (Offset != 0)
19755 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19756 return Alignment;
19757 }
19758
19759 /// Check if this slice can be rewritten with legal operations.
19760 bool isLegal() const {
19761 // An invalid slice is not legal.
19762 if (!Origin || !Inst || !DAG)
19763 return false;
19764
19765 // Offsets are for indexed load only, we do not handle that.
19766 if (!Origin->getOffset().isUndef())
19767 return false;
19768
19769 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19770
19771 // Check that the type is legal.
19772 EVT SliceType = getLoadedType();
19773 if (!TLI.isTypeLegal(SliceType))
19774 return false;
19775
19776 // Check that the load is legal for this type.
19777 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19778 return false;
19779
19780 // Check that the offset can be computed.
19781 // 1. Check its type.
19782 EVT PtrType = Origin->getBasePtr().getValueType();
19783 if (PtrType == MVT::Untyped || PtrType.isExtended())
19784 return false;
19785
19786 // 2. Check that it fits in the immediate.
19787 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19788 return false;
19789
19790 // 3. Check that the computation is legal.
19791 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19792 return false;
19793
19794 // Check that the zext is legal if it needs one.
19795 EVT TruncateType = Inst->getValueType(0);
19796 if (TruncateType != SliceType &&
19797 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19798 return false;
19799
19800 return true;
19801 }
19802
19803 /// Get the offset in bytes of this slice in the original chunk of
19804 /// bits.
19805 /// \pre DAG != nullptr.
19806 uint64_t getOffsetFromBase() const {
19807 assert(DAG && "Missing context.");
19808 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19809 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19810 uint64_t Offset = Shift / 8;
19811 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19812 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19813 "The size of the original loaded type is not a multiple of a"
19814 " byte.");
19815 // If Offset is bigger than TySizeInBytes, it means we are loading all
19816 // zeros. This should have been optimized before in the process.
19817 assert(TySizeInBytes > Offset &&
19818 "Invalid shift amount for given loaded size");
19819 if (IsBigEndian)
19820 Offset = TySizeInBytes - Offset - getLoadedSize();
19821 return Offset;
19822 }
19823
19824 /// Generate the sequence of instructions to load the slice
19825 /// represented by this object and redirect the uses of this slice to
19826 /// this new sequence of instructions.
19827 /// \pre this->Inst && this->Origin are valid Instructions and this
19828 /// object passed the legal check: LoadedSlice::isLegal returned true.
19829 /// \return The last instruction of the sequence used to load the slice.
19830 SDValue loadSlice() const {
19831 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19832 const SDValue &OldBaseAddr = Origin->getBasePtr();
19833 SDValue BaseAddr = OldBaseAddr;
19834 // Get the offset in that chunk of bytes w.r.t. the endianness.
19835 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19836 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19837 if (Offset) {
19838 // BaseAddr = BaseAddr + Offset.
19839 EVT ArithType = BaseAddr.getValueType();
19840 SDLoc DL(Origin);
19841 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19842 DAG->getConstant(Offset, DL, ArithType));
19843 }
19844
19845 // Create the type of the loaded slice according to its size.
19846 EVT SliceType = getLoadedType();
19847
19848 // Create the load for the slice.
19849 SDValue LastInst =
19850 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19852 Origin->getMemOperand()->getFlags());
19853 // If the final type is not the same as the loaded type, this means that
19854 // we have to pad with zero. Create a zero extend for that.
19855 EVT FinalType = Inst->getValueType(0);
19856 if (SliceType != FinalType)
19857 LastInst =
19858 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19859 return LastInst;
19860 }
19861
19862 /// Check if this slice can be merged with an expensive cross register
19863 /// bank copy. E.g.,
19864 /// i = load i32
19865 /// f = bitcast i32 i to float
19866 bool canMergeExpensiveCrossRegisterBankCopy() const {
19867 if (!Inst || !Inst->hasOneUse())
19868 return false;
19869 SDNode *User = *Inst->user_begin();
19870 if (User->getOpcode() != ISD::BITCAST)
19871 return false;
19872 assert(DAG && "Missing context");
19873 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19874 EVT ResVT = User->getValueType(0);
19875 const TargetRegisterClass *ResRC =
19876 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
19877 const TargetRegisterClass *ArgRC =
19878 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
19879 User->getOperand(0)->isDivergent());
19880 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19881 return false;
19882
19883 // At this point, we know that we perform a cross-register-bank copy.
19884 // Check if it is expensive.
19886 // Assume bitcasts are cheap, unless both register classes do not
19887 // explicitly share a common sub class.
19888 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19889 return false;
19890
19891 // Check if it will be merged with the load.
19892 // 1. Check the alignment / fast memory access constraint.
19893 unsigned IsFast = 0;
19894 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19895 Origin->getAddressSpace(), getAlign(),
19896 Origin->getMemOperand()->getFlags(), &IsFast) ||
19897 !IsFast)
19898 return false;
19899
19900 // 2. Check that the load is a legal operation for that type.
19901 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19902 return false;
19903
19904 // 3. Check that we do not have a zext in the way.
19905 if (Inst->getValueType(0) != getLoadedType())
19906 return false;
19907
19908 return true;
19909 }
19910};
19911
19912} // end anonymous namespace
19913
19914/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19915/// \p UsedBits looks like 0..0 1..1 0..0.
19916static bool areUsedBitsDense(const APInt &UsedBits) {
19917 // If all the bits are one, this is dense!
19918 if (UsedBits.isAllOnes())
19919 return true;
19920
19921 // Get rid of the unused bits on the right.
19922 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19923 // Get rid of the unused bits on the left.
19924 if (NarrowedUsedBits.countl_zero())
19925 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19926 // Check that the chunk of bits is completely used.
19927 return NarrowedUsedBits.isAllOnes();
19928}
19929
19930/// Check whether or not \p First and \p Second are next to each other
19931/// in memory. This means that there is no hole between the bits loaded
19932/// by \p First and the bits loaded by \p Second.
19933static bool areSlicesNextToEachOther(const LoadedSlice &First,
19934 const LoadedSlice &Second) {
19935 assert(First.Origin == Second.Origin && First.Origin &&
19936 "Unable to match different memory origins.");
19937 APInt UsedBits = First.getUsedBits();
19938 assert((UsedBits & Second.getUsedBits()) == 0 &&
19939 "Slices are not supposed to overlap.");
19940 UsedBits |= Second.getUsedBits();
19941 return areUsedBitsDense(UsedBits);
19942}
19943
19944/// Adjust the \p GlobalLSCost according to the target
19945/// paring capabilities and the layout of the slices.
19946/// \pre \p GlobalLSCost should account for at least as many loads as
19947/// there is in the slices in \p LoadedSlices.
19949 LoadedSlice::Cost &GlobalLSCost) {
19950 unsigned NumberOfSlices = LoadedSlices.size();
19951 // If there is less than 2 elements, no pairing is possible.
19952 if (NumberOfSlices < 2)
19953 return;
19954
19955 // Sort the slices so that elements that are likely to be next to each
19956 // other in memory are next to each other in the list.
19957 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19958 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19959 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19960 });
19961 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19962 // First (resp. Second) is the first (resp. Second) potentially candidate
19963 // to be placed in a paired load.
19964 const LoadedSlice *First = nullptr;
19965 const LoadedSlice *Second = nullptr;
19966 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19967 // Set the beginning of the pair.
19968 First = Second) {
19969 Second = &LoadedSlices[CurrSlice];
19970
19971 // If First is NULL, it means we start a new pair.
19972 // Get to the next slice.
19973 if (!First)
19974 continue;
19975
19976 EVT LoadedType = First->getLoadedType();
19977
19978 // If the types of the slices are different, we cannot pair them.
19979 if (LoadedType != Second->getLoadedType())
19980 continue;
19981
19982 // Check if the target supplies paired loads for this type.
19983 Align RequiredAlignment;
19984 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19985 // move to the next pair, this type is hopeless.
19986 Second = nullptr;
19987 continue;
19988 }
19989 // Check if we meet the alignment requirement.
19990 if (First->getAlign() < RequiredAlignment)
19991 continue;
19992
19993 // Check that both loads are next to each other in memory.
19994 if (!areSlicesNextToEachOther(*First, *Second))
19995 continue;
19996
19997 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19998 --GlobalLSCost.Loads;
19999 // Move to the next pair.
20000 Second = nullptr;
20001 }
20002}
20003
20004/// Check the profitability of all involved LoadedSlice.
20005/// Currently, it is considered profitable if there is exactly two
20006/// involved slices (1) which are (2) next to each other in memory, and
20007/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20008///
20009/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20010/// the elements themselves.
20011///
20012/// FIXME: When the cost model will be mature enough, we can relax
20013/// constraints (1) and (2).
20015 const APInt &UsedBits, bool ForCodeSize) {
20016 unsigned NumberOfSlices = LoadedSlices.size();
20018 return NumberOfSlices > 1;
20019
20020 // Check (1).
20021 if (NumberOfSlices != 2)
20022 return false;
20023
20024 // Check (2).
20025 if (!areUsedBitsDense(UsedBits))
20026 return false;
20027
20028 // Check (3).
20029 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20030 // The original code has one big load.
20031 OrigCost.Loads = 1;
20032 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20033 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20034 // Accumulate the cost of all the slices.
20035 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20036 GlobalSlicingCost += SliceCost;
20037
20038 // Account as cost in the original configuration the gain obtained
20039 // with the current slices.
20040 OrigCost.addSliceGain(LS);
20041 }
20042
20043 // If the target supports paired load, adjust the cost accordingly.
20044 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20045 return OrigCost > GlobalSlicingCost;
20046}
20047
20048/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20049/// operations, split it in the various pieces being extracted.
20050///
20051/// This sort of thing is introduced by SROA.
20052/// This slicing takes care not to insert overlapping loads.
20053/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20054bool DAGCombiner::SliceUpLoad(SDNode *N) {
20055 if (Level < AfterLegalizeDAG)
20056 return false;
20057
20058 LoadSDNode *LD = cast<LoadSDNode>(N);
20059 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20060 !LD->getValueType(0).isInteger())
20061 return false;
20062
20063 // The algorithm to split up a load of a scalable vector into individual
20064 // elements currently requires knowing the length of the loaded type,
20065 // so will need adjusting to work on scalable vectors.
20066 if (LD->getValueType(0).isScalableVector())
20067 return false;
20068
20069 // Keep track of already used bits to detect overlapping values.
20070 // In that case, we will just abort the transformation.
20071 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20072
20073 SmallVector<LoadedSlice, 4> LoadedSlices;
20074
20075 // Check if this load is used as several smaller chunks of bits.
20076 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20077 // of computation for each trunc.
20078 for (SDUse &U : LD->uses()) {
20079 // Skip the uses of the chain.
20080 if (U.getResNo() != 0)
20081 continue;
20082
20083 SDNode *User = U.getUser();
20084 unsigned Shift = 0;
20085
20086 // Check if this is a trunc(lshr).
20087 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20088 isa<ConstantSDNode>(User->getOperand(1))) {
20089 Shift = User->getConstantOperandVal(1);
20090 User = *User->user_begin();
20091 }
20092
20093 // At this point, User is a Truncate, iff we encountered, trunc or
20094 // trunc(lshr).
20095 if (User->getOpcode() != ISD::TRUNCATE)
20096 return false;
20097
20098 // The width of the type must be a power of 2 and greater than 8-bits.
20099 // Otherwise the load cannot be represented in LLVM IR.
20100 // Moreover, if we shifted with a non-8-bits multiple, the slice
20101 // will be across several bytes. We do not support that.
20102 unsigned Width = User->getValueSizeInBits(0);
20103 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20104 return false;
20105
20106 // Build the slice for this chain of computations.
20107 LoadedSlice LS(User, LD, Shift, &DAG);
20108 APInt CurrentUsedBits = LS.getUsedBits();
20109
20110 // Check if this slice overlaps with another.
20111 if ((CurrentUsedBits & UsedBits) != 0)
20112 return false;
20113 // Update the bits used globally.
20114 UsedBits |= CurrentUsedBits;
20115
20116 // Check if the new slice would be legal.
20117 if (!LS.isLegal())
20118 return false;
20119
20120 // Record the slice.
20121 LoadedSlices.push_back(LS);
20122 }
20123
20124 // Abort slicing if it does not seem to be profitable.
20125 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20126 return false;
20127
20128 ++SlicedLoads;
20129
20130 // Rewrite each chain to use an independent load.
20131 // By construction, each chain can be represented by a unique load.
20132
20133 // Prepare the argument for the new token factor for all the slices.
20134 SmallVector<SDValue, 8> ArgChains;
20135 for (const LoadedSlice &LS : LoadedSlices) {
20136 SDValue SliceInst = LS.loadSlice();
20137 CombineTo(LS.Inst, SliceInst, true);
20138 if (SliceInst.getOpcode() != ISD::LOAD)
20139 SliceInst = SliceInst.getOperand(0);
20140 assert(SliceInst->getOpcode() == ISD::LOAD &&
20141 "It takes more than a zext to get to the loaded slice!!");
20142 ArgChains.push_back(SliceInst.getValue(1));
20143 }
20144
20145 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20146 ArgChains);
20147 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20148 AddToWorklist(Chain.getNode());
20149 return true;
20150}
20151
20152/// Check to see if V is (and load (ptr), imm), where the load is having
20153/// specific bytes cleared out. If so, return the byte size being masked out
20154/// and the shift amount.
20155static std::pair<unsigned, unsigned>
20157 std::pair<unsigned, unsigned> Result(0, 0);
20158
20159 // Check for the structure we're looking for.
20160 if (V->getOpcode() != ISD::AND ||
20161 !isa<ConstantSDNode>(V->getOperand(1)) ||
20162 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20163 return Result;
20164
20165 // Check the chain and pointer.
20166 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20167 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20168
20169 // This only handles simple types.
20170 if (V.getValueType() != MVT::i16 &&
20171 V.getValueType() != MVT::i32 &&
20172 V.getValueType() != MVT::i64)
20173 return Result;
20174
20175 // Check the constant mask. Invert it so that the bits being masked out are
20176 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
20177 // follow the sign bit for uniformity.
20178 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20179 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20180 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
20181 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20182 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
20183 if (NotMaskLZ == 64) return Result; // All zero mask.
20184
20185 // See if we have a continuous run of bits. If so, we have 0*1+0*
20186 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20187 return Result;
20188
20189 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20190 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20191 NotMaskLZ -= 64-V.getValueSizeInBits();
20192
20193 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20194 switch (MaskedBytes) {
20195 case 1:
20196 case 2:
20197 case 4: break;
20198 default: return Result; // All one mask, or 5-byte mask.
20199 }
20200
20201 // Verify that the first bit starts at a multiple of mask so that the access
20202 // is aligned the same as the access width.
20203 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20204
20205 // For narrowing to be valid, it must be the case that the load the
20206 // immediately preceding memory operation before the store.
20207 if (LD == Chain.getNode())
20208 ; // ok.
20209 else if (Chain->getOpcode() == ISD::TokenFactor &&
20210 SDValue(LD, 1).hasOneUse()) {
20211 // LD has only 1 chain use so they are no indirect dependencies.
20212 if (!LD->isOperandOf(Chain.getNode()))
20213 return Result;
20214 } else
20215 return Result; // Fail.
20216
20217 Result.first = MaskedBytes;
20218 Result.second = NotMaskTZ/8;
20219 return Result;
20220}
20221
20222/// Check to see if IVal is something that provides a value as specified by
20223/// MaskInfo. If so, replace the specified store with a narrower store of
20224/// truncated IVal.
20225static SDValue
20226ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
20227 SDValue IVal, StoreSDNode *St,
20228 DAGCombiner *DC) {
20229 unsigned NumBytes = MaskInfo.first;
20230 unsigned ByteShift = MaskInfo.second;
20231 SelectionDAG &DAG = DC->getDAG();
20232
20233 // Check to see if IVal is all zeros in the part being masked in by the 'or'
20234 // that uses this. If not, this is not a replacement.
20235 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
20236 ByteShift*8, (ByteShift+NumBytes)*8);
20237 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
20238
20239 // Check that it is legal on the target to do this. It is legal if the new
20240 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
20241 // legalization. If the source type is legal, but the store type isn't, see
20242 // if we can use a truncating store.
20243 MVT VT = MVT::getIntegerVT(NumBytes * 8);
20244 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20245 bool UseTruncStore;
20246 if (DC->isTypeLegal(VT))
20247 UseTruncStore = false;
20248 else if (TLI.isTypeLegal(IVal.getValueType()) &&
20249 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
20250 UseTruncStore = true;
20251 else
20252 return SDValue();
20253
20254 // Can't do this for indexed stores.
20255 if (St->isIndexed())
20256 return SDValue();
20257
20258 // Check that the target doesn't think this is a bad idea.
20259 if (St->getMemOperand() &&
20260 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
20261 *St->getMemOperand()))
20262 return SDValue();
20263
20264 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
20265 // shifted by ByteShift and truncated down to NumBytes.
20266 if (ByteShift) {
20267 SDLoc DL(IVal);
20268 IVal = DAG.getNode(
20269 ISD::SRL, DL, IVal.getValueType(), IVal,
20270 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
20271 }
20272
20273 // Figure out the offset for the store and the alignment of the access.
20274 unsigned StOffset;
20275 if (DAG.getDataLayout().isLittleEndian())
20276 StOffset = ByteShift;
20277 else
20278 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20279
20280 SDValue Ptr = St->getBasePtr();
20281 if (StOffset) {
20282 SDLoc DL(IVal);
20284 }
20285
20286 ++OpsNarrowed;
20287 if (UseTruncStore)
20288 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20289 St->getPointerInfo().getWithOffset(StOffset),
20290 VT, St->getOriginalAlign());
20291
20292 // Truncate down to the new size.
20293 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
20294
20295 return DAG
20296 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20297 St->getPointerInfo().getWithOffset(StOffset),
20298 St->getOriginalAlign());
20299}
20300
20301/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
20302/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
20303/// narrowing the load and store if it would end up being a win for performance
20304/// or code size.
20305SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
20306 StoreSDNode *ST = cast<StoreSDNode>(N);
20307 if (!ST->isSimple())
20308 return SDValue();
20309
20310 SDValue Chain = ST->getChain();
20311 SDValue Value = ST->getValue();
20312 SDValue Ptr = ST->getBasePtr();
20313 EVT VT = Value.getValueType();
20314
20315 if (ST->isTruncatingStore() || VT.isVector())
20316 return SDValue();
20317
20318 unsigned Opc = Value.getOpcode();
20319
20320 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
20321 !Value.hasOneUse())
20322 return SDValue();
20323
20324 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
20325 // is a byte mask indicating a consecutive number of bytes, check to see if
20326 // Y is known to provide just those bytes. If so, we try to replace the
20327 // load + replace + store sequence with a single (narrower) store, which makes
20328 // the load dead.
20330 std::pair<unsigned, unsigned> MaskedLoad;
20331 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
20332 if (MaskedLoad.first)
20333 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20334 Value.getOperand(1), ST,this))
20335 return NewST;
20336
20337 // Or is commutative, so try swapping X and Y.
20338 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
20339 if (MaskedLoad.first)
20340 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20341 Value.getOperand(0), ST,this))
20342 return NewST;
20343 }
20344
20346 return SDValue();
20347
20348 if (Value.getOperand(1).getOpcode() != ISD::Constant)
20349 return SDValue();
20350
20351 SDValue N0 = Value.getOperand(0);
20352 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
20353 Chain == SDValue(N0.getNode(), 1)) {
20354 LoadSDNode *LD = cast<LoadSDNode>(N0);
20355 if (LD->getBasePtr() != Ptr ||
20356 LD->getPointerInfo().getAddrSpace() !=
20357 ST->getPointerInfo().getAddrSpace())
20358 return SDValue();
20359
20360 // Find the type NewVT to narrow the load / op / store to.
20361 SDValue N1 = Value.getOperand(1);
20362 unsigned BitWidth = N1.getValueSizeInBits();
20363 APInt Imm = N1->getAsAPIntVal();
20364 if (Opc == ISD::AND)
20365 Imm.flipAllBits();
20366 if (Imm == 0 || Imm.isAllOnes())
20367 return SDValue();
20368 // Find least/most significant bit that need to be part of the narrowed
20369 // operation. We assume target will need to address/access full bytes, so
20370 // we make sure to align LSB and MSB at byte boundaries.
20371 unsigned BitsPerByteMask = 7u;
20372 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
20373 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
20374 unsigned NewBW = NextPowerOf2(MSB - LSB);
20375 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20376 // The narrowing should be profitable, the load/store operation should be
20377 // legal (or custom) and the store size should be equal to the NewVT width.
20378 while (NewBW < BitWidth &&
20379 (NewVT.getStoreSizeInBits() != NewBW ||
20380 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
20382 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
20383 NewBW = NextPowerOf2(NewBW);
20384 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20385 }
20386 if (NewBW >= BitWidth)
20387 return SDValue();
20388
20389 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
20390 // large enough to cover all bits that should be modified. This type might
20391 // however be larger than really needed (such as i32 while we actually only
20392 // need to modify one byte). Now we need to find our how to align the memory
20393 // accesses to satisfy preferred alignments as well as avoiding to access
20394 // memory outside the store size of the orignal access.
20395
20396 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
20397
20398 // Let ShAmt denote amount of bits to skip, counted from the least
20399 // significant bits of Imm. And let PtrOff how much the pointer needs to be
20400 // offsetted (in bytes) for the new access.
20401 unsigned ShAmt = 0;
20402 uint64_t PtrOff = 0;
20403 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
20404 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
20405 if (ShAmt > LSB)
20406 return SDValue();
20407 if (ShAmt + NewBW < MSB)
20408 continue;
20409
20410 // Calculate PtrOff.
20411 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
20412 ? VTStoreSize - NewBW - ShAmt
20413 : ShAmt;
20414 PtrOff = PtrAdjustmentInBits / 8;
20415
20416 // Now check if narrow access is allowed and fast, considering alignments.
20417 unsigned IsFast = 0;
20418 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20419 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
20420 LD->getAddressSpace(), NewAlign,
20421 LD->getMemOperand()->getFlags(), &IsFast) &&
20422 IsFast)
20423 break;
20424 }
20425 // If loop above did not find any accepted ShAmt we need to exit here.
20426 if (ShAmt + NewBW > VTStoreSize)
20427 return SDValue();
20428
20429 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
20430 if (Opc == ISD::AND)
20431 NewImm.flipAllBits();
20432 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20433 SDValue NewPtr =
20435 SDValue NewLD =
20436 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
20437 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
20438 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20439 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
20440 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
20441 SDValue NewST =
20442 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
20443 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
20444
20445 AddToWorklist(NewPtr.getNode());
20446 AddToWorklist(NewLD.getNode());
20447 AddToWorklist(NewVal.getNode());
20448 WorklistRemover DeadNodes(*this);
20449 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
20450 ++OpsNarrowed;
20451 return NewST;
20452 }
20453
20454 return SDValue();
20455}
20456
20457/// For a given floating point load / store pair, if the load value isn't used
20458/// by any other operations, then consider transforming the pair to integer
20459/// load / store operations if the target deems the transformation profitable.
20460SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
20461 StoreSDNode *ST = cast<StoreSDNode>(N);
20462 SDValue Value = ST->getValue();
20463 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
20464 Value.hasOneUse()) {
20465 LoadSDNode *LD = cast<LoadSDNode>(Value);
20466 EVT VT = LD->getMemoryVT();
20467 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
20468 LD->isNonTemporal() || ST->isNonTemporal() ||
20469 LD->getPointerInfo().getAddrSpace() != 0 ||
20470 ST->getPointerInfo().getAddrSpace() != 0)
20471 return SDValue();
20472
20473 TypeSize VTSize = VT.getSizeInBits();
20474
20475 // We don't know the size of scalable types at compile time so we cannot
20476 // create an integer of the equivalent size.
20477 if (VTSize.isScalable())
20478 return SDValue();
20479
20480 unsigned FastLD = 0, FastST = 0;
20481 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
20482 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
20483 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
20486 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20487 *LD->getMemOperand(), &FastLD) ||
20488 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20489 *ST->getMemOperand(), &FastST) ||
20490 !FastLD || !FastST)
20491 return SDValue();
20492
20493 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
20494 LD->getBasePtr(), LD->getMemOperand());
20495
20496 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
20497 ST->getBasePtr(), ST->getMemOperand());
20498
20499 AddToWorklist(NewLD.getNode());
20500 AddToWorklist(NewST.getNode());
20501 WorklistRemover DeadNodes(*this);
20502 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20503 ++LdStFP2Int;
20504 return NewST;
20505 }
20506
20507 return SDValue();
20508}
20509
20510// This is a helper function for visitMUL to check the profitability
20511// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20512// MulNode is the original multiply, AddNode is (add x, c1),
20513// and ConstNode is c2.
20514//
20515// If the (add x, c1) has multiple uses, we could increase
20516// the number of adds if we make this transformation.
20517// It would only be worth doing this if we can remove a
20518// multiply in the process. Check for that here.
20519// To illustrate:
20520// (A + c1) * c3
20521// (A + c2) * c3
20522// We're checking for cases where we have common "c3 * A" expressions.
20523bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20524 SDValue ConstNode) {
20525 APInt Val;
20526
20527 // If the add only has one use, and the target thinks the folding is
20528 // profitable or does not lead to worse code, this would be OK to do.
20529 if (AddNode->hasOneUse() &&
20530 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20531 return true;
20532
20533 // Walk all the users of the constant with which we're multiplying.
20534 for (SDNode *User : ConstNode->users()) {
20535 if (User == MulNode) // This use is the one we're on right now. Skip it.
20536 continue;
20537
20538 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
20539 SDNode *OtherOp;
20540 SDNode *MulVar = AddNode.getOperand(0).getNode();
20541
20542 // OtherOp is what we're multiplying against the constant.
20543 if (User->getOperand(0) == ConstNode)
20544 OtherOp = User->getOperand(1).getNode();
20545 else
20546 OtherOp = User->getOperand(0).getNode();
20547
20548 // Check to see if multiply is with the same operand of our "add".
20549 //
20550 // ConstNode = CONST
20551 // User = ConstNode * A <-- visiting User. OtherOp is A.
20552 // ...
20553 // AddNode = (A + c1) <-- MulVar is A.
20554 // = AddNode * ConstNode <-- current visiting instruction.
20555 //
20556 // If we make this transformation, we will have a common
20557 // multiply (ConstNode * A) that we can save.
20558 if (OtherOp == MulVar)
20559 return true;
20560
20561 // Now check to see if a future expansion will give us a common
20562 // multiply.
20563 //
20564 // ConstNode = CONST
20565 // AddNode = (A + c1)
20566 // ... = AddNode * ConstNode <-- current visiting instruction.
20567 // ...
20568 // OtherOp = (A + c2)
20569 // User = OtherOp * ConstNode <-- visiting User.
20570 //
20571 // If we make this transformation, we will have a common
20572 // multiply (CONST * A) after we also do the same transformation
20573 // to the "t2" instruction.
20574 if (OtherOp->getOpcode() == ISD::ADD &&
20576 OtherOp->getOperand(0).getNode() == MulVar)
20577 return true;
20578 }
20579 }
20580
20581 // Didn't find a case where this would be profitable.
20582 return false;
20583}
20584
20585SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20586 unsigned NumStores) {
20589 SDLoc StoreDL(StoreNodes[0].MemNode);
20590
20591 for (unsigned i = 0; i < NumStores; ++i) {
20592 Visited.insert(StoreNodes[i].MemNode);
20593 }
20594
20595 // don't include nodes that are children or repeated nodes.
20596 for (unsigned i = 0; i < NumStores; ++i) {
20597 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20598 Chains.push_back(StoreNodes[i].MemNode->getChain());
20599 }
20600
20601 assert(!Chains.empty() && "Chain should have generated a chain");
20602 return DAG.getTokenFactor(StoreDL, Chains);
20603}
20604
20605bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20606 const Value *UnderlyingObj = nullptr;
20607 for (const auto &MemOp : StoreNodes) {
20608 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20609 // Pseudo value like stack frame has its own frame index and size, should
20610 // not use the first store's frame index for other frames.
20611 if (MMO->getPseudoValue())
20612 return false;
20613
20614 if (!MMO->getValue())
20615 return false;
20616
20617 const Value *Obj = getUnderlyingObject(MMO->getValue());
20618
20619 if (UnderlyingObj && UnderlyingObj != Obj)
20620 return false;
20621
20622 if (!UnderlyingObj)
20623 UnderlyingObj = Obj;
20624 }
20625
20626 return true;
20627}
20628
20629bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20630 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20631 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20632 // Make sure we have something to merge.
20633 if (NumStores < 2)
20634 return false;
20635
20636 assert((!UseTrunc || !UseVector) &&
20637 "This optimization cannot emit a vector truncating store");
20638
20639 // The latest Node in the DAG.
20640 SDLoc DL(StoreNodes[0].MemNode);
20641
20642 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20643 unsigned SizeInBits = NumStores * ElementSizeBits;
20644 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20645
20646 std::optional<MachineMemOperand::Flags> Flags;
20647 AAMDNodes AAInfo;
20648 for (unsigned I = 0; I != NumStores; ++I) {
20649 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20650 if (!Flags) {
20651 Flags = St->getMemOperand()->getFlags();
20652 AAInfo = St->getAAInfo();
20653 continue;
20654 }
20655 // Skip merging if there's an inconsistent flag.
20656 if (Flags != St->getMemOperand()->getFlags())
20657 return false;
20658 // Concatenate AA metadata.
20659 AAInfo = AAInfo.concat(St->getAAInfo());
20660 }
20661
20662 EVT StoreTy;
20663 if (UseVector) {
20664 unsigned Elts = NumStores * NumMemElts;
20665 // Get the type for the merged vector store.
20666 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20667 } else
20668 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20669
20670 SDValue StoredVal;
20671 if (UseVector) {
20672 if (IsConstantSrc) {
20673 SmallVector<SDValue, 8> BuildVector;
20674 for (unsigned I = 0; I != NumStores; ++I) {
20675 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20676 SDValue Val = St->getValue();
20677 // If constant is of the wrong type, convert it now. This comes up
20678 // when one of our stores was truncating.
20679 if (MemVT != Val.getValueType()) {
20680 Val = peekThroughBitcasts(Val);
20681 // Deal with constants of wrong size.
20682 if (ElementSizeBits != Val.getValueSizeInBits()) {
20683 auto *C = dyn_cast<ConstantSDNode>(Val);
20684 if (!C)
20685 // Not clear how to truncate FP values.
20686 // TODO: Handle truncation of build_vector constants
20687 return false;
20688
20689 EVT IntMemVT =
20691 Val = DAG.getConstant(C->getAPIntValue()
20692 .zextOrTrunc(Val.getValueSizeInBits())
20693 .zextOrTrunc(ElementSizeBits),
20694 SDLoc(C), IntMemVT);
20695 }
20696 // Make sure correctly size type is the correct type.
20697 Val = DAG.getBitcast(MemVT, Val);
20698 }
20699 BuildVector.push_back(Val);
20700 }
20701 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20703 DL, StoreTy, BuildVector);
20704 } else {
20706 for (unsigned i = 0; i < NumStores; ++i) {
20707 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20709 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20710 // type MemVT. If the underlying value is not the correct
20711 // type, but it is an extraction of an appropriate vector we
20712 // can recast Val to be of the correct type. This may require
20713 // converting between EXTRACT_VECTOR_ELT and
20714 // EXTRACT_SUBVECTOR.
20715 if ((MemVT != Val.getValueType()) &&
20718 EVT MemVTScalarTy = MemVT.getScalarType();
20719 // We may need to add a bitcast here to get types to line up.
20720 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20721 Val = DAG.getBitcast(MemVT, Val);
20722 } else if (MemVT.isVector() &&
20724 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20725 } else {
20726 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20728 SDValue Vec = Val.getOperand(0);
20729 SDValue Idx = Val.getOperand(1);
20730 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20731 }
20732 }
20733 Ops.push_back(Val);
20734 }
20735
20736 // Build the extracted vector elements back into a vector.
20737 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20739 DL, StoreTy, Ops);
20740 }
20741 } else {
20742 // We should always use a vector store when merging extracted vector
20743 // elements, so this path implies a store of constants.
20744 assert(IsConstantSrc && "Merged vector elements should use vector store");
20745
20746 APInt StoreInt(SizeInBits, 0);
20747
20748 // Construct a single integer constant which is made of the smaller
20749 // constant inputs.
20750 bool IsLE = DAG.getDataLayout().isLittleEndian();
20751 for (unsigned i = 0; i < NumStores; ++i) {
20752 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20753 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20754
20755 SDValue Val = St->getValue();
20756 Val = peekThroughBitcasts(Val);
20757 StoreInt <<= ElementSizeBits;
20758 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20759 StoreInt |= C->getAPIntValue()
20760 .zextOrTrunc(ElementSizeBits)
20761 .zextOrTrunc(SizeInBits);
20762 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20763 StoreInt |= C->getValueAPF()
20764 .bitcastToAPInt()
20765 .zextOrTrunc(ElementSizeBits)
20766 .zextOrTrunc(SizeInBits);
20767 // If fp truncation is necessary give up for now.
20768 if (MemVT.getSizeInBits() != ElementSizeBits)
20769 return false;
20770 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20772 // Not yet handled
20773 return false;
20774 } else {
20775 llvm_unreachable("Invalid constant element type");
20776 }
20777 }
20778
20779 // Create the new Load and Store operations.
20780 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20781 }
20782
20783 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20784 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20785 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20786
20787 // make sure we use trunc store if it's necessary to be legal.
20788 // When generate the new widen store, if the first store's pointer info can
20789 // not be reused, discard the pointer info except the address space because
20790 // now the widen store can not be represented by the original pointer info
20791 // which is for the narrow memory object.
20792 SDValue NewStore;
20793 if (!UseTrunc) {
20794 NewStore = DAG.getStore(
20795 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20796 CanReusePtrInfo
20797 ? FirstInChain->getPointerInfo()
20798 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20799 FirstInChain->getAlign(), *Flags, AAInfo);
20800 } else { // Must be realized as a trunc store
20801 EVT LegalizedStoredValTy =
20802 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20803 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20804 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20805 SDValue ExtendedStoreVal =
20806 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20807 LegalizedStoredValTy);
20808 NewStore = DAG.getTruncStore(
20809 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20810 CanReusePtrInfo
20811 ? FirstInChain->getPointerInfo()
20812 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20813 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20814 AAInfo);
20815 }
20816
20817 // Replace all merged stores with the new store.
20818 for (unsigned i = 0; i < NumStores; ++i)
20819 CombineTo(StoreNodes[i].MemNode, NewStore);
20820
20821 AddToWorklist(NewChain.getNode());
20822 return true;
20823}
20824
20825SDNode *
20826DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
20827 SmallVectorImpl<MemOpLink> &StoreNodes) {
20828 // This holds the base pointer, index, and the offset in bytes from the base
20829 // pointer. We must have a base and an offset. Do not handle stores to undef
20830 // base pointers.
20832 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20833 return nullptr;
20834
20836 StoreSource StoreSrc = getStoreSource(Val);
20837 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20838
20839 // Match on loadbaseptr if relevant.
20840 EVT MemVT = St->getMemoryVT();
20841 BaseIndexOffset LBasePtr;
20842 EVT LoadVT;
20843 if (StoreSrc == StoreSource::Load) {
20844 auto *Ld = cast<LoadSDNode>(Val);
20845 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20846 LoadVT = Ld->getMemoryVT();
20847 // Load and store should be the same type.
20848 if (MemVT != LoadVT)
20849 return nullptr;
20850 // Loads must only have one use.
20851 if (!Ld->hasNUsesOfValue(1, 0))
20852 return nullptr;
20853 // The memory operands must not be volatile/indexed/atomic.
20854 // TODO: May be able to relax for unordered atomics (see D66309)
20855 if (!Ld->isSimple() || Ld->isIndexed())
20856 return nullptr;
20857 }
20858 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20859 int64_t &Offset) -> bool {
20860 // The memory operands must not be volatile/indexed/atomic.
20861 // TODO: May be able to relax for unordered atomics (see D66309)
20862 if (!Other->isSimple() || Other->isIndexed())
20863 return false;
20864 // Don't mix temporal stores with non-temporal stores.
20865 if (St->isNonTemporal() != Other->isNonTemporal())
20866 return false;
20868 return false;
20869 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20870 // Allow merging constants of different types as integers.
20871 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20872 : Other->getMemoryVT() != MemVT;
20873 switch (StoreSrc) {
20874 case StoreSource::Load: {
20875 if (NoTypeMatch)
20876 return false;
20877 // The Load's Base Ptr must also match.
20878 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20879 if (!OtherLd)
20880 return false;
20881 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20882 if (LoadVT != OtherLd->getMemoryVT())
20883 return false;
20884 // Loads must only have one use.
20885 if (!OtherLd->hasNUsesOfValue(1, 0))
20886 return false;
20887 // The memory operands must not be volatile/indexed/atomic.
20888 // TODO: May be able to relax for unordered atomics (see D66309)
20889 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20890 return false;
20891 // Don't mix temporal loads with non-temporal loads.
20892 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20893 return false;
20894 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20895 *OtherLd))
20896 return false;
20897 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20898 return false;
20899 break;
20900 }
20901 case StoreSource::Constant:
20902 if (NoTypeMatch)
20903 return false;
20904 if (getStoreSource(OtherBC) != StoreSource::Constant)
20905 return false;
20906 break;
20907 case StoreSource::Extract:
20908 // Do not merge truncated stores here.
20909 if (Other->isTruncatingStore())
20910 return false;
20911 if (!MemVT.bitsEq(OtherBC.getValueType()))
20912 return false;
20913 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20914 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20915 return false;
20916 break;
20917 default:
20918 llvm_unreachable("Unhandled store source for merging");
20919 }
20921 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20922 };
20923
20924 // We are looking for a root node which is an ancestor to all mergable
20925 // stores. We search up through a load, to our root and then down
20926 // through all children. For instance we will find Store{1,2,3} if
20927 // St is Store1, Store2. or Store3 where the root is not a load
20928 // which always true for nonvolatile ops. TODO: Expand
20929 // the search to find all valid candidates through multiple layers of loads.
20930 //
20931 // Root
20932 // |-------|-------|
20933 // Load Load Store3
20934 // | |
20935 // Store1 Store2
20936 //
20937 // FIXME: We should be able to climb and
20938 // descend TokenFactors to find candidates as well.
20939
20940 SDNode *RootNode = St->getChain().getNode();
20941 // Bail out if we already analyzed this root node and found nothing.
20942 if (ChainsWithoutMergeableStores.contains(RootNode))
20943 return nullptr;
20944
20945 // Check if the pair of StoreNode and the RootNode already bail out many
20946 // times which is over the limit in dependence check.
20947 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20948 SDNode *RootNode) -> bool {
20949 auto RootCount = StoreRootCountMap.find(StoreNode);
20950 return RootCount != StoreRootCountMap.end() &&
20951 RootCount->second.first == RootNode &&
20952 RootCount->second.second > StoreMergeDependenceLimit;
20953 };
20954
20955 auto TryToAddCandidate = [&](SDUse &Use) {
20956 // This must be a chain use.
20957 if (Use.getOperandNo() != 0)
20958 return;
20959 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
20961 int64_t PtrDiff;
20962 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20963 !OverLimitInDependenceCheck(OtherStore, RootNode))
20964 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20965 }
20966 };
20967
20968 unsigned NumNodesExplored = 0;
20969 const unsigned MaxSearchNodes = 1024;
20970 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20971 RootNode = Ldn->getChain().getNode();
20972 // Bail out if we already analyzed this root node and found nothing.
20973 if (ChainsWithoutMergeableStores.contains(RootNode))
20974 return nullptr;
20975 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20976 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20977 SDNode *User = I->getUser();
20978 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
20979 for (SDUse &U2 : User->uses())
20980 TryToAddCandidate(U2);
20981 }
20982 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20983 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
20984 TryToAddCandidate(*I);
20985 }
20986 }
20987 } else {
20988 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20989 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20990 TryToAddCandidate(*I);
20991 }
20992
20993 return RootNode;
20994}
20995
20996// We need to check that merging these stores does not cause a loop in the
20997// DAG. Any store candidate may depend on another candidate indirectly through
20998// its operands. Check in parallel by searching up from operands of candidates.
20999bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21000 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21001 SDNode *RootNode) {
21002 // FIXME: We should be able to truncate a full search of
21003 // predecessors by doing a BFS and keeping tabs the originating
21004 // stores from which worklist nodes come from in a similar way to
21005 // TokenFactor simplfication.
21006
21009
21010 // RootNode is a predecessor to all candidates so we need not search
21011 // past it. Add RootNode (peeking through TokenFactors). Do not count
21012 // these towards size check.
21013
21014 Worklist.push_back(RootNode);
21015 while (!Worklist.empty()) {
21016 auto N = Worklist.pop_back_val();
21017 if (!Visited.insert(N).second)
21018 continue; // Already present in Visited.
21019 if (N->getOpcode() == ISD::TokenFactor) {
21020 for (SDValue Op : N->ops())
21021 Worklist.push_back(Op.getNode());
21022 }
21023 }
21024
21025 // Don't count pruning nodes towards max.
21026 unsigned int Max = 1024 + Visited.size();
21027 // Search Ops of store candidates.
21028 for (unsigned i = 0; i < NumStores; ++i) {
21029 SDNode *N = StoreNodes[i].MemNode;
21030 // Of the 4 Store Operands:
21031 // * Chain (Op 0) -> We have already considered these
21032 // in candidate selection, but only by following the
21033 // chain dependencies. We could still have a chain
21034 // dependency to a load, that has a non-chain dep to
21035 // another load, that depends on a store, etc. So it is
21036 // possible to have dependencies that consist of a mix
21037 // of chain and non-chain deps, and we need to include
21038 // chain operands in the analysis here..
21039 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21040 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21041 // but aren't necessarily fromt the same base node, so
21042 // cycles possible (e.g. via indexed store).
21043 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21044 // non-indexed stores). Not constant on all targets (e.g. ARM)
21045 // and so can participate in a cycle.
21046 for (const SDValue &Op : N->op_values())
21047 Worklist.push_back(Op.getNode());
21048 }
21049 // Search through DAG. We can stop early if we find a store node.
21050 for (unsigned i = 0; i < NumStores; ++i)
21051 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21052 Max)) {
21053 // If the searching bail out, record the StoreNode and RootNode in the
21054 // StoreRootCountMap. If we have seen the pair many times over a limit,
21055 // we won't add the StoreNode into StoreNodes set again.
21056 if (Visited.size() >= Max) {
21057 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21058 if (RootCount.first == RootNode)
21059 RootCount.second++;
21060 else
21061 RootCount = {RootNode, 1};
21062 }
21063 return false;
21064 }
21065 return true;
21066}
21067
21068unsigned
21069DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21070 int64_t ElementSizeBytes) const {
21071 while (true) {
21072 // Find a store past the width of the first store.
21073 size_t StartIdx = 0;
21074 while ((StartIdx + 1 < StoreNodes.size()) &&
21075 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21076 StoreNodes[StartIdx + 1].OffsetFromBase)
21077 ++StartIdx;
21078
21079 // Bail if we don't have enough candidates to merge.
21080 if (StartIdx + 1 >= StoreNodes.size())
21081 return 0;
21082
21083 // Trim stores that overlapped with the first store.
21084 if (StartIdx)
21085 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21086
21087 // Scan the memory operations on the chain and find the first
21088 // non-consecutive store memory address.
21089 unsigned NumConsecutiveStores = 1;
21090 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21091 // Check that the addresses are consecutive starting from the second
21092 // element in the list of stores.
21093 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21094 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21095 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21096 break;
21097 NumConsecutiveStores = i + 1;
21098 }
21099 if (NumConsecutiveStores > 1)
21100 return NumConsecutiveStores;
21101
21102 // There are no consecutive stores at the start of the list.
21103 // Remove the first store and try again.
21104 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21105 }
21106}
21107
21108bool DAGCombiner::tryStoreMergeOfConstants(
21109 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21110 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21111 LLVMContext &Context = *DAG.getContext();
21112 const DataLayout &DL = DAG.getDataLayout();
21113 int64_t ElementSizeBytes = MemVT.getStoreSize();
21114 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21115 bool MadeChange = false;
21116
21117 // Store the constants into memory as one consecutive store.
21118 while (NumConsecutiveStores >= 2) {
21119 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21120 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21121 Align FirstStoreAlign = FirstInChain->getAlign();
21122 unsigned LastLegalType = 1;
21123 unsigned LastLegalVectorType = 1;
21124 bool LastIntegerTrunc = false;
21125 bool NonZero = false;
21126 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21127 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21128 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21129 SDValue StoredVal = ST->getValue();
21130 bool IsElementZero = false;
21131 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21132 IsElementZero = C->isZero();
21133 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21134 IsElementZero = C->getConstantFPValue()->isNullValue();
21135 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21136 IsElementZero = true;
21137 if (IsElementZero) {
21138 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21139 FirstZeroAfterNonZero = i;
21140 }
21141 NonZero |= !IsElementZero;
21142
21143 // Find a legal type for the constant store.
21144 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21145 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21146 unsigned IsFast = 0;
21147
21148 // Break early when size is too large to be legal.
21149 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21150 break;
21151
21152 if (TLI.isTypeLegal(StoreTy) &&
21153 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21154 DAG.getMachineFunction()) &&
21155 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21156 *FirstInChain->getMemOperand(), &IsFast) &&
21157 IsFast) {
21158 LastIntegerTrunc = false;
21159 LastLegalType = i + 1;
21160 // Or check whether a truncstore is legal.
21161 } else if (TLI.getTypeAction(Context, StoreTy) ==
21163 EVT LegalizedStoredValTy =
21164 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21165 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21166 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21167 DAG.getMachineFunction()) &&
21168 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21169 *FirstInChain->getMemOperand(), &IsFast) &&
21170 IsFast) {
21171 LastIntegerTrunc = true;
21172 LastLegalType = i + 1;
21173 }
21174 }
21175
21176 // We only use vectors if the target allows it and the function is not
21177 // marked with the noimplicitfloat attribute.
21178 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21179 AllowVectors) {
21180 // Find a legal type for the vector store.
21181 unsigned Elts = (i + 1) * NumMemElts;
21182 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21183 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21184 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21185 TLI.allowsMemoryAccess(Context, DL, Ty,
21186 *FirstInChain->getMemOperand(), &IsFast) &&
21187 IsFast)
21188 LastLegalVectorType = i + 1;
21189 }
21190 }
21191
21192 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
21193 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
21194 bool UseTrunc = LastIntegerTrunc && !UseVector;
21195
21196 // Check if we found a legal integer type that creates a meaningful
21197 // merge.
21198 if (NumElem < 2) {
21199 // We know that candidate stores are in order and of correct
21200 // shape. While there is no mergeable sequence from the
21201 // beginning one may start later in the sequence. The only
21202 // reason a merge of size N could have failed where another of
21203 // the same size would not have, is if the alignment has
21204 // improved or we've dropped a non-zero value. Drop as many
21205 // candidates as we can here.
21206 unsigned NumSkip = 1;
21207 while ((NumSkip < NumConsecutiveStores) &&
21208 (NumSkip < FirstZeroAfterNonZero) &&
21209 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21210 NumSkip++;
21211
21212 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21213 NumConsecutiveStores -= NumSkip;
21214 continue;
21215 }
21216
21217 // Check that we can merge these candidates without causing a cycle.
21218 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21219 RootNode)) {
21220 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21221 NumConsecutiveStores -= NumElem;
21222 continue;
21223 }
21224
21225 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
21226 /*IsConstantSrc*/ true,
21227 UseVector, UseTrunc);
21228
21229 // Remove merged stores for next iteration.
21230 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21231 NumConsecutiveStores -= NumElem;
21232 }
21233 return MadeChange;
21234}
21235
21236bool DAGCombiner::tryStoreMergeOfExtracts(
21237 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21238 EVT MemVT, SDNode *RootNode) {
21239 LLVMContext &Context = *DAG.getContext();
21240 const DataLayout &DL = DAG.getDataLayout();
21241 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21242 bool MadeChange = false;
21243
21244 // Loop on Consecutive Stores on success.
21245 while (NumConsecutiveStores >= 2) {
21246 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21247 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21248 Align FirstStoreAlign = FirstInChain->getAlign();
21249 unsigned NumStoresToMerge = 1;
21250 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21251 // Find a legal type for the vector store.
21252 unsigned Elts = (i + 1) * NumMemElts;
21253 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21254 unsigned IsFast = 0;
21255
21256 // Break early when size is too large to be legal.
21257 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
21258 break;
21259
21260 if (TLI.isTypeLegal(Ty) &&
21261 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21262 TLI.allowsMemoryAccess(Context, DL, Ty,
21263 *FirstInChain->getMemOperand(), &IsFast) &&
21264 IsFast)
21265 NumStoresToMerge = i + 1;
21266 }
21267
21268 // Check if we found a legal integer type creating a meaningful
21269 // merge.
21270 if (NumStoresToMerge < 2) {
21271 // We know that candidate stores are in order and of correct
21272 // shape. While there is no mergeable sequence from the
21273 // beginning one may start later in the sequence. The only
21274 // reason a merge of size N could have failed where another of
21275 // the same size would not have, is if the alignment has
21276 // improved. Drop as many candidates as we can here.
21277 unsigned NumSkip = 1;
21278 while ((NumSkip < NumConsecutiveStores) &&
21279 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21280 NumSkip++;
21281
21282 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21283 NumConsecutiveStores -= NumSkip;
21284 continue;
21285 }
21286
21287 // Check that we can merge these candidates without causing a cycle.
21288 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
21289 RootNode)) {
21290 StoreNodes.erase(StoreNodes.begin(),
21291 StoreNodes.begin() + NumStoresToMerge);
21292 NumConsecutiveStores -= NumStoresToMerge;
21293 continue;
21294 }
21295
21296 MadeChange |= mergeStoresOfConstantsOrVecElts(
21297 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
21298 /*UseVector*/ true, /*UseTrunc*/ false);
21299
21300 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
21301 NumConsecutiveStores -= NumStoresToMerge;
21302 }
21303 return MadeChange;
21304}
21305
21306bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
21307 unsigned NumConsecutiveStores, EVT MemVT,
21308 SDNode *RootNode, bool AllowVectors,
21309 bool IsNonTemporalStore,
21310 bool IsNonTemporalLoad) {
21311 LLVMContext &Context = *DAG.getContext();
21312 const DataLayout &DL = DAG.getDataLayout();
21313 int64_t ElementSizeBytes = MemVT.getStoreSize();
21314 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21315 bool MadeChange = false;
21316
21317 // Look for load nodes which are used by the stored values.
21318 SmallVector<MemOpLink, 8> LoadNodes;
21319
21320 // Find acceptable loads. Loads need to have the same chain (token factor),
21321 // must not be zext, volatile, indexed, and they must be consecutive.
21322 BaseIndexOffset LdBasePtr;
21323
21324 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21325 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21327 LoadSDNode *Ld = cast<LoadSDNode>(Val);
21328
21329 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
21330 // If this is not the first ptr that we check.
21331 int64_t LdOffset = 0;
21332 if (LdBasePtr.getBase().getNode()) {
21333 // The base ptr must be the same.
21334 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
21335 break;
21336 } else {
21337 // Check that all other base pointers are the same as this one.
21338 LdBasePtr = LdPtr;
21339 }
21340
21341 // We found a potential memory operand to merge.
21342 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
21343 }
21344
21345 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
21346 Align RequiredAlignment;
21347 bool NeedRotate = false;
21348 if (LoadNodes.size() == 2) {
21349 // If we have load/store pair instructions and we only have two values,
21350 // don't bother merging.
21351 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
21352 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
21353 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
21354 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
21355 break;
21356 }
21357 // If the loads are reversed, see if we can rotate the halves into place.
21358 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
21359 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
21360 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
21361 if (Offset0 - Offset1 == ElementSizeBytes &&
21362 (hasOperation(ISD::ROTL, PairVT) ||
21363 hasOperation(ISD::ROTR, PairVT))) {
21364 std::swap(LoadNodes[0], LoadNodes[1]);
21365 NeedRotate = true;
21366 }
21367 }
21368 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21369 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21370 Align FirstStoreAlign = FirstInChain->getAlign();
21371 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
21372
21373 // Scan the memory operations on the chain and find the first
21374 // non-consecutive load memory address. These variables hold the index in
21375 // the store node array.
21376
21377 unsigned LastConsecutiveLoad = 1;
21378
21379 // This variable refers to the size and not index in the array.
21380 unsigned LastLegalVectorType = 1;
21381 unsigned LastLegalIntegerType = 1;
21382 bool isDereferenceable = true;
21383 bool DoIntegerTruncate = false;
21384 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
21385 SDValue LoadChain = FirstLoad->getChain();
21386 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
21387 // All loads must share the same chain.
21388 if (LoadNodes[i].MemNode->getChain() != LoadChain)
21389 break;
21390
21391 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
21392 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21393 break;
21394 LastConsecutiveLoad = i;
21395
21396 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
21397 isDereferenceable = false;
21398
21399 // Find a legal type for the vector store.
21400 unsigned Elts = (i + 1) * NumMemElts;
21401 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21402
21403 // Break early when size is too large to be legal.
21404 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21405 break;
21406
21407 unsigned IsFastSt = 0;
21408 unsigned IsFastLd = 0;
21409 // Don't try vector types if we need a rotate. We may still fail the
21410 // legality checks for the integer type, but we can't handle the rotate
21411 // case with vectors.
21412 // FIXME: We could use a shuffle in place of the rotate.
21413 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
21414 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21415 DAG.getMachineFunction()) &&
21416 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21417 *FirstInChain->getMemOperand(), &IsFastSt) &&
21418 IsFastSt &&
21419 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21420 *FirstLoad->getMemOperand(), &IsFastLd) &&
21421 IsFastLd) {
21422 LastLegalVectorType = i + 1;
21423 }
21424
21425 // Find a legal type for the integer store.
21426 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21427 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21428 if (TLI.isTypeLegal(StoreTy) &&
21429 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21430 DAG.getMachineFunction()) &&
21431 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21432 *FirstInChain->getMemOperand(), &IsFastSt) &&
21433 IsFastSt &&
21434 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21435 *FirstLoad->getMemOperand(), &IsFastLd) &&
21436 IsFastLd) {
21437 LastLegalIntegerType = i + 1;
21438 DoIntegerTruncate = false;
21439 // Or check whether a truncstore and extload is legal.
21440 } else if (TLI.getTypeAction(Context, StoreTy) ==
21442 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
21443 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21444 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21445 DAG.getMachineFunction()) &&
21446 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21447 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21448 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
21449 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21450 *FirstInChain->getMemOperand(), &IsFastSt) &&
21451 IsFastSt &&
21452 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21453 *FirstLoad->getMemOperand(), &IsFastLd) &&
21454 IsFastLd) {
21455 LastLegalIntegerType = i + 1;
21456 DoIntegerTruncate = true;
21457 }
21458 }
21459 }
21460
21461 // Only use vector types if the vector type is larger than the integer
21462 // type. If they are the same, use integers.
21463 bool UseVectorTy =
21464 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
21465 unsigned LastLegalType =
21466 std::max(LastLegalVectorType, LastLegalIntegerType);
21467
21468 // We add +1 here because the LastXXX variables refer to location while
21469 // the NumElem refers to array/index size.
21470 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
21471 NumElem = std::min(LastLegalType, NumElem);
21472 Align FirstLoadAlign = FirstLoad->getAlign();
21473
21474 if (NumElem < 2) {
21475 // We know that candidate stores are in order and of correct
21476 // shape. While there is no mergeable sequence from the
21477 // beginning one may start later in the sequence. The only
21478 // reason a merge of size N could have failed where another of
21479 // the same size would not have is if the alignment or either
21480 // the load or store has improved. Drop as many candidates as we
21481 // can here.
21482 unsigned NumSkip = 1;
21483 while ((NumSkip < LoadNodes.size()) &&
21484 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21485 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21486 NumSkip++;
21487 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21488 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
21489 NumConsecutiveStores -= NumSkip;
21490 continue;
21491 }
21492
21493 // Check that we can merge these candidates without causing a cycle.
21494 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21495 RootNode)) {
21496 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21497 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21498 NumConsecutiveStores -= NumElem;
21499 continue;
21500 }
21501
21502 // Find if it is better to use vectors or integers to load and store
21503 // to memory.
21504 EVT JointMemOpVT;
21505 if (UseVectorTy) {
21506 // Find a legal type for the vector store.
21507 unsigned Elts = NumElem * NumMemElts;
21508 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21509 } else {
21510 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21511 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21512 }
21513
21514 SDLoc LoadDL(LoadNodes[0].MemNode);
21515 SDLoc StoreDL(StoreNodes[0].MemNode);
21516
21517 // The merged loads are required to have the same incoming chain, so
21518 // using the first's chain is acceptable.
21519
21520 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21521 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21522 AddToWorklist(NewStoreChain.getNode());
21523
21524 MachineMemOperand::Flags LdMMOFlags =
21525 isDereferenceable ? MachineMemOperand::MODereferenceable
21527 if (IsNonTemporalLoad)
21529
21530 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21531
21532 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21535
21536 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21537
21538 SDValue NewLoad, NewStore;
21539 if (UseVectorTy || !DoIntegerTruncate) {
21540 NewLoad = DAG.getLoad(
21541 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21542 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21543 SDValue StoreOp = NewLoad;
21544 if (NeedRotate) {
21545 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21546 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21547 "Unexpected type for rotate-able load pair");
21548 SDValue RotAmt =
21549 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21550 // Target can convert to the identical ROTR if it does not have ROTL.
21551 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21552 }
21553 NewStore = DAG.getStore(
21554 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21555 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21556 : MachinePointerInfo(FirstStoreAS),
21557 FirstStoreAlign, StMMOFlags);
21558 } else { // This must be the truncstore/extload case
21559 EVT ExtendedTy =
21560 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21561 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21562 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21563 FirstLoad->getPointerInfo(), JointMemOpVT,
21564 FirstLoadAlign, LdMMOFlags);
21565 NewStore = DAG.getTruncStore(
21566 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21567 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21568 : MachinePointerInfo(FirstStoreAS),
21569 JointMemOpVT, FirstInChain->getAlign(),
21570 FirstInChain->getMemOperand()->getFlags());
21571 }
21572
21573 // Transfer chain users from old loads to the new load.
21574 for (unsigned i = 0; i < NumElem; ++i) {
21575 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21577 SDValue(NewLoad.getNode(), 1));
21578 }
21579
21580 // Replace all stores with the new store. Recursively remove corresponding
21581 // values if they are no longer used.
21582 for (unsigned i = 0; i < NumElem; ++i) {
21583 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21584 CombineTo(StoreNodes[i].MemNode, NewStore);
21585 if (Val->use_empty())
21586 recursivelyDeleteUnusedNodes(Val.getNode());
21587 }
21588
21589 MadeChange = true;
21590 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21591 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21592 NumConsecutiveStores -= NumElem;
21593 }
21594 return MadeChange;
21595}
21596
21597bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21598 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21599 return false;
21600
21601 // TODO: Extend this function to merge stores of scalable vectors.
21602 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21603 // store since we know <vscale x 16 x i8> is exactly twice as large as
21604 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21605 EVT MemVT = St->getMemoryVT();
21606 if (MemVT.isScalableVT())
21607 return false;
21608 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21609 return false;
21610
21611 // This function cannot currently deal with non-byte-sized memory sizes.
21612 int64_t ElementSizeBytes = MemVT.getStoreSize();
21613 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21614 return false;
21615
21616 // Do not bother looking at stored values that are not constants, loads, or
21617 // extracted vector elements.
21618 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21619 const StoreSource StoreSrc = getStoreSource(StoredVal);
21620 if (StoreSrc == StoreSource::Unknown)
21621 return false;
21622
21623 SmallVector<MemOpLink, 8> StoreNodes;
21624 // Find potential store merge candidates by searching through chain sub-DAG
21625 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
21626
21627 // Check if there is anything to merge.
21628 if (StoreNodes.size() < 2)
21629 return false;
21630
21631 // Sort the memory operands according to their distance from the
21632 // base pointer.
21633 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21634 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21635 });
21636
21637 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21638 Attribute::NoImplicitFloat);
21639 bool IsNonTemporalStore = St->isNonTemporal();
21640 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21641 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21642
21643 // Store Merge attempts to merge the lowest stores. This generally
21644 // works out as if successful, as the remaining stores are checked
21645 // after the first collection of stores is merged. However, in the
21646 // case that a non-mergeable store is found first, e.g., {p[-2],
21647 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21648 // mergeable cases. To prevent this, we prune such stores from the
21649 // front of StoreNodes here.
21650 bool MadeChange = false;
21651 while (StoreNodes.size() > 1) {
21652 unsigned NumConsecutiveStores =
21653 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21654 // There are no more stores in the list to examine.
21655 if (NumConsecutiveStores == 0)
21656 return MadeChange;
21657
21658 // We have at least 2 consecutive stores. Try to merge them.
21659 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21660 switch (StoreSrc) {
21661 case StoreSource::Constant:
21662 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21663 MemVT, RootNode, AllowVectors);
21664 break;
21665
21666 case StoreSource::Extract:
21667 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21668 MemVT, RootNode);
21669 break;
21670
21671 case StoreSource::Load:
21672 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21673 MemVT, RootNode, AllowVectors,
21674 IsNonTemporalStore, IsNonTemporalLoad);
21675 break;
21676
21677 default:
21678 llvm_unreachable("Unhandled store source type");
21679 }
21680 }
21681
21682 // Remember if we failed to optimize, to save compile time.
21683 if (!MadeChange)
21684 ChainsWithoutMergeableStores.insert(RootNode);
21685
21686 return MadeChange;
21687}
21688
21689SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21690 SDLoc SL(ST);
21691 SDValue ReplStore;
21692
21693 // Replace the chain to avoid dependency.
21694 if (ST->isTruncatingStore()) {
21695 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21696 ST->getBasePtr(), ST->getMemoryVT(),
21697 ST->getMemOperand());
21698 } else {
21699 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21700 ST->getMemOperand());
21701 }
21702
21703 // Create token to keep both nodes around.
21704 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21705 MVT::Other, ST->getChain(), ReplStore);
21706
21707 // Make sure the new and old chains are cleaned up.
21708 AddToWorklist(Token.getNode());
21709
21710 // Don't add users to work list.
21711 return CombineTo(ST, Token, false);
21712}
21713
21714SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21715 SDValue Value = ST->getValue();
21716 if (Value.getOpcode() == ISD::TargetConstantFP)
21717 return SDValue();
21718
21719 if (!ISD::isNormalStore(ST))
21720 return SDValue();
21721
21722 SDLoc DL(ST);
21723
21724 SDValue Chain = ST->getChain();
21725 SDValue Ptr = ST->getBasePtr();
21726
21727 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21728
21729 // NOTE: If the original store is volatile, this transform must not increase
21730 // the number of stores. For example, on x86-32 an f64 can be stored in one
21731 // processor operation but an i64 (which is not legal) requires two. So the
21732 // transform should not be done in this case.
21733
21734 SDValue Tmp;
21735 switch (CFP->getSimpleValueType(0).SimpleTy) {
21736 default:
21737 llvm_unreachable("Unknown FP type");
21738 case MVT::f16: // We don't do this for these yet.
21739 case MVT::bf16:
21740 case MVT::f80:
21741 case MVT::f128:
21742 case MVT::ppcf128:
21743 return SDValue();
21744 case MVT::f32:
21745 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21746 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21747 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21748 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21749 MVT::i32);
21750 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21751 }
21752
21753 return SDValue();
21754 case MVT::f64:
21755 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21756 ST->isSimple()) ||
21757 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21758 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21759 getZExtValue(), SDLoc(CFP), MVT::i64);
21760 return DAG.getStore(Chain, DL, Tmp,
21761 Ptr, ST->getMemOperand());
21762 }
21763
21764 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21765 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21766 // Many FP stores are not made apparent until after legalize, e.g. for
21767 // argument passing. Since this is so common, custom legalize the
21768 // 64-bit integer store into two 32-bit stores.
21770 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21771 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21772 if (DAG.getDataLayout().isBigEndian())
21773 std::swap(Lo, Hi);
21774
21775 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21776 AAMDNodes AAInfo = ST->getAAInfo();
21777
21778 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21779 ST->getOriginalAlign(), MMOFlags, AAInfo);
21781 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21782 ST->getPointerInfo().getWithOffset(4),
21783 ST->getOriginalAlign(), MMOFlags, AAInfo);
21784 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21785 St0, St1);
21786 }
21787
21788 return SDValue();
21789 }
21790}
21791
21792// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21793//
21794// If a store of a load with an element inserted into it has no other
21795// uses in between the chain, then we can consider the vector store
21796// dead and replace it with just the single scalar element store.
21797SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21798 SDLoc DL(ST);
21799 SDValue Value = ST->getValue();
21800 SDValue Ptr = ST->getBasePtr();
21801 SDValue Chain = ST->getChain();
21802 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21803 return SDValue();
21804
21805 SDValue Elt = Value.getOperand(1);
21806 SDValue Idx = Value.getOperand(2);
21807
21808 // If the element isn't byte sized or is implicitly truncated then we can't
21809 // compute an offset.
21810 EVT EltVT = Elt.getValueType();
21811 if (!EltVT.isByteSized() ||
21812 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21813 return SDValue();
21814
21815 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21816 if (!Ld || Ld->getBasePtr() != Ptr ||
21817 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21818 !ISD::isNormalStore(ST) ||
21819 Ld->getAddressSpace() != ST->getAddressSpace() ||
21821 return SDValue();
21822
21823 unsigned IsFast;
21824 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21825 Elt.getValueType(), ST->getAddressSpace(),
21826 ST->getAlign(), ST->getMemOperand()->getFlags(),
21827 &IsFast) ||
21828 !IsFast)
21829 return SDValue();
21830
21831 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21832
21833 // If the offset is a known constant then try to recover the pointer
21834 // info
21835 SDValue NewPtr;
21836 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21837 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21838 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21839 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21840 } else {
21841 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21842 }
21843
21844 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21845 ST->getMemOperand()->getFlags());
21846}
21847
21848SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21849 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21850 SDValue Val = ST->getVal();
21851 EVT VT = Val.getValueType();
21852 EVT MemVT = ST->getMemoryVT();
21853
21854 if (MemVT.bitsLT(VT)) { // Is truncating store
21855 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21856 MemVT.getScalarSizeInBits());
21857 // See if we can simplify the operation with SimplifyDemandedBits, which
21858 // only works if the value has a single use.
21859 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21860 return SDValue(N, 0);
21861 }
21862
21863 return SDValue();
21864}
21865
21866SDValue DAGCombiner::visitSTORE(SDNode *N) {
21867 StoreSDNode *ST = cast<StoreSDNode>(N);
21868 SDValue Chain = ST->getChain();
21869 SDValue Value = ST->getValue();
21870 SDValue Ptr = ST->getBasePtr();
21871
21872 // If this is a store of a bit convert, store the input value if the
21873 // resultant store does not need a higher alignment than the original.
21874 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21875 ST->isUnindexed()) {
21876 EVT SVT = Value.getOperand(0).getValueType();
21877 // If the store is volatile, we only want to change the store type if the
21878 // resulting store is legal. Otherwise we might increase the number of
21879 // memory accesses. We don't care if the original type was legal or not
21880 // as we assume software couldn't rely on the number of accesses of an
21881 // illegal type.
21882 // TODO: May be able to relax for unordered atomics (see D66309)
21883 if (((!LegalOperations && ST->isSimple()) ||
21884 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21885 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21886 DAG, *ST->getMemOperand())) {
21887 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21888 ST->getMemOperand());
21889 }
21890 }
21891
21892 // Turn 'store undef, Ptr' -> nothing.
21893 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21894 return Chain;
21895
21896 // Try to infer better alignment information than the store already has.
21897 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21898 !ST->isAtomic()) {
21899 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21900 if (*Alignment > ST->getAlign() &&
21901 isAligned(*Alignment, ST->getSrcValueOffset())) {
21902 SDValue NewStore =
21903 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21904 ST->getMemoryVT(), *Alignment,
21905 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21906 // NewStore will always be N as we are only refining the alignment
21907 assert(NewStore.getNode() == N);
21908 (void)NewStore;
21909 }
21910 }
21911 }
21912
21913 // Try transforming a pair floating point load / store ops to integer
21914 // load / store ops.
21915 if (SDValue NewST = TransformFPLoadStorePair(N))
21916 return NewST;
21917
21918 // Try transforming several stores into STORE (BSWAP).
21919 if (SDValue Store = mergeTruncStores(ST))
21920 return Store;
21921
21922 if (ST->isUnindexed()) {
21923 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21924 // adjacent stores.
21925 if (findBetterNeighborChains(ST)) {
21926 // replaceStoreChain uses CombineTo, which handled all of the worklist
21927 // manipulation. Return the original node to not do anything else.
21928 return SDValue(ST, 0);
21929 }
21930 Chain = ST->getChain();
21931 }
21932
21933 // FIXME: is there such a thing as a truncating indexed store?
21934 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21935 Value.getValueType().isInteger() &&
21936 (!isa<ConstantSDNode>(Value) ||
21937 !cast<ConstantSDNode>(Value)->isOpaque())) {
21938 // Convert a truncating store of a extension into a standard store.
21939 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21940 Value.getOpcode() == ISD::SIGN_EXTEND ||
21941 Value.getOpcode() == ISD::ANY_EXTEND) &&
21942 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21943 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21944 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21945 ST->getMemOperand());
21946
21947 APInt TruncDemandedBits =
21948 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21949 ST->getMemoryVT().getScalarSizeInBits());
21950
21951 // See if we can simplify the operation with SimplifyDemandedBits, which
21952 // only works if the value has a single use.
21953 AddToWorklist(Value.getNode());
21954 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21955 // Re-visit the store if anything changed and the store hasn't been merged
21956 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21957 // node back to the worklist if necessary, but we also need to re-visit
21958 // the Store node itself.
21959 if (N->getOpcode() != ISD::DELETED_NODE)
21960 AddToWorklist(N);
21961 return SDValue(N, 0);
21962 }
21963
21964 // Otherwise, see if we can simplify the input to this truncstore with
21965 // knowledge that only the low bits are being used. For example:
21966 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21967 if (SDValue Shorter =
21968 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21969 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21970 ST->getMemOperand());
21971
21972 // If we're storing a truncated constant, see if we can simplify it.
21973 // TODO: Move this to targetShrinkDemandedConstant?
21974 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21975 if (!Cst->isOpaque()) {
21976 const APInt &CValue = Cst->getAPIntValue();
21977 APInt NewVal = CValue & TruncDemandedBits;
21978 if (NewVal != CValue) {
21979 SDValue Shorter =
21980 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21981 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21982 ST->getMemoryVT(), ST->getMemOperand());
21983 }
21984 }
21985 }
21986
21987 // If this is a load followed by a store to the same location, then the store
21988 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21989 // TODO: Add big-endian truncate support with test coverage.
21990 // TODO: Can relax for unordered atomics (see D66309)
21991 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21993 : Value;
21994 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21995 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21996 ST->isUnindexed() && ST->isSimple() &&
21997 Ld->getAddressSpace() == ST->getAddressSpace() &&
21998 // There can't be any side effects between the load and store, such as
21999 // a call or store.
22001 // The store is dead, remove it.
22002 return Chain;
22003 }
22004 }
22005
22006 // Try scalarizing vector stores of loads where we only change one element
22007 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22008 return NewST;
22009
22010 // TODO: Can relax for unordered atomics (see D66309)
22011 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22012 if (ST->isUnindexed() && ST->isSimple() &&
22013 ST1->isUnindexed() && ST1->isSimple()) {
22014 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22015 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22016 ST->getAddressSpace() == ST1->getAddressSpace()) {
22017 // If this is a store followed by a store with the same value to the
22018 // same location, then the store is dead/noop.
22019 return Chain;
22020 }
22021
22022 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22023 !ST1->getBasePtr().isUndef() &&
22024 ST->getAddressSpace() == ST1->getAddressSpace()) {
22025 // If we consider two stores and one smaller in size is a scalable
22026 // vector type and another one a bigger size store with a fixed type,
22027 // then we could not allow the scalable store removal because we don't
22028 // know its final size in the end.
22029 if (ST->getMemoryVT().isScalableVector() ||
22030 ST1->getMemoryVT().isScalableVector()) {
22031 if (ST1->getBasePtr() == Ptr &&
22032 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22033 ST->getMemoryVT().getStoreSize())) {
22034 CombineTo(ST1, ST1->getChain());
22035 return SDValue(N, 0);
22036 }
22037 } else {
22038 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22039 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22040 // If this is a store who's preceding store to a subset of the current
22041 // location and no one other node is chained to that store we can
22042 // effectively drop the store. Do not remove stores to undef as they
22043 // may be used as data sinks.
22044 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22045 ChainBase,
22046 ST1->getMemoryVT().getFixedSizeInBits())) {
22047 CombineTo(ST1, ST1->getChain());
22048 return SDValue(N, 0);
22049 }
22050 }
22051 }
22052 }
22053 }
22054
22055 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22056 // truncating store. We can do this even if this is already a truncstore.
22057 if ((Value.getOpcode() == ISD::FP_ROUND ||
22058 Value.getOpcode() == ISD::TRUNCATE) &&
22059 Value->hasOneUse() && ST->isUnindexed() &&
22060 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22061 ST->getMemoryVT(), LegalOperations)) {
22062 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22063 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22064 }
22065
22066 // Always perform this optimization before types are legal. If the target
22067 // prefers, also try this after legalization to catch stores that were created
22068 // by intrinsics or other nodes.
22069 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22070 while (true) {
22071 // There can be multiple store sequences on the same chain.
22072 // Keep trying to merge store sequences until we are unable to do so
22073 // or until we merge the last store on the chain.
22074 bool Changed = mergeConsecutiveStores(ST);
22075 if (!Changed) break;
22076 // Return N as merge only uses CombineTo and no worklist clean
22077 // up is necessary.
22078 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22079 return SDValue(N, 0);
22080 }
22081 }
22082
22083 // Try transforming N to an indexed store.
22084 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22085 return SDValue(N, 0);
22086
22087 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22088 //
22089 // Make sure to do this only after attempting to merge stores in order to
22090 // avoid changing the types of some subset of stores due to visit order,
22091 // preventing their merging.
22092 if (isa<ConstantFPSDNode>(ST->getValue())) {
22093 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22094 return NewSt;
22095 }
22096
22097 if (SDValue NewSt = splitMergedValStore(ST))
22098 return NewSt;
22099
22100 return ReduceLoadOpStoreWidth(N);
22101}
22102
22103SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22104 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22105 if (!LifetimeEnd->hasOffset())
22106 return SDValue();
22107
22108 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
22109 LifetimeEnd->getOffset(), false);
22110
22111 // We walk up the chains to find stores.
22112 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22113 while (!Chains.empty()) {
22114 SDValue Chain = Chains.pop_back_val();
22115 if (!Chain.hasOneUse())
22116 continue;
22117 switch (Chain.getOpcode()) {
22118 case ISD::TokenFactor:
22119 for (unsigned Nops = Chain.getNumOperands(); Nops;)
22120 Chains.push_back(Chain.getOperand(--Nops));
22121 break;
22123 case ISD::LIFETIME_END:
22124 // We can forward past any lifetime start/end that can be proven not to
22125 // alias the node.
22126 if (!mayAlias(Chain.getNode(), N))
22127 Chains.push_back(Chain.getOperand(0));
22128 break;
22129 case ISD::STORE: {
22130 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22131 // TODO: Can relax for unordered atomics (see D66309)
22132 if (!ST->isSimple() || ST->isIndexed())
22133 continue;
22134 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22135 // The bounds of a scalable store are not known until runtime, so this
22136 // store cannot be elided.
22137 if (StoreSize.isScalable())
22138 continue;
22139 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
22140 // If we store purely within object bounds just before its lifetime ends,
22141 // we can remove the store.
22142 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
22143 StoreSize.getFixedValue() * 8)) {
22144 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
22145 dbgs() << "\nwithin LIFETIME_END of : ";
22146 LifetimeEndBase.dump(); dbgs() << "\n");
22147 CombineTo(ST, ST->getChain());
22148 return SDValue(N, 0);
22149 }
22150 }
22151 }
22152 }
22153 return SDValue();
22154}
22155
22156/// For the instruction sequence of store below, F and I values
22157/// are bundled together as an i64 value before being stored into memory.
22158/// Sometimes it is more efficent to generate separate stores for F and I,
22159/// which can remove the bitwise instructions or sink them to colder places.
22160///
22161/// (store (or (zext (bitcast F to i32) to i64),
22162/// (shl (zext I to i64), 32)), addr) -->
22163/// (store F, addr) and (store I, addr+4)
22164///
22165/// Similarly, splitting for other merged store can also be beneficial, like:
22166/// For pair of {i32, i32}, i64 store --> two i32 stores.
22167/// For pair of {i32, i16}, i64 store --> two i32 stores.
22168/// For pair of {i16, i16}, i32 store --> two i16 stores.
22169/// For pair of {i16, i8}, i32 store --> two i16 stores.
22170/// For pair of {i8, i8}, i16 store --> two i8 stores.
22171///
22172/// We allow each target to determine specifically which kind of splitting is
22173/// supported.
22174///
22175/// The store patterns are commonly seen from the simple code snippet below
22176/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
22177/// void goo(const std::pair<int, float> &);
22178/// hoo() {
22179/// ...
22180/// goo(std::make_pair(tmp, ftmp));
22181/// ...
22182/// }
22183///
22184SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
22185 if (OptLevel == CodeGenOptLevel::None)
22186 return SDValue();
22187
22188 // Can't change the number of memory accesses for a volatile store or break
22189 // atomicity for an atomic one.
22190 if (!ST->isSimple())
22191 return SDValue();
22192
22193 SDValue Val = ST->getValue();
22194 SDLoc DL(ST);
22195
22196 // Match OR operand.
22197 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
22198 return SDValue();
22199
22200 // Match SHL operand and get Lower and Higher parts of Val.
22201 SDValue Op1 = Val.getOperand(0);
22202 SDValue Op2 = Val.getOperand(1);
22203 SDValue Lo, Hi;
22204 if (Op1.getOpcode() != ISD::SHL) {
22205 std::swap(Op1, Op2);
22206 if (Op1.getOpcode() != ISD::SHL)
22207 return SDValue();
22208 }
22209 Lo = Op2;
22210 Hi = Op1.getOperand(0);
22211 if (!Op1.hasOneUse())
22212 return SDValue();
22213
22214 // Match shift amount to HalfValBitSize.
22215 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
22216 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
22217 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
22218 return SDValue();
22219
22220 // Lo and Hi are zero-extended from int with size less equal than 32
22221 // to i64.
22222 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
22223 !Lo.getOperand(0).getValueType().isScalarInteger() ||
22224 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
22225 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
22226 !Hi.getOperand(0).getValueType().isScalarInteger() ||
22227 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
22228 return SDValue();
22229
22230 // Use the EVT of low and high parts before bitcast as the input
22231 // of target query.
22232 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
22233 ? Lo.getOperand(0).getValueType()
22234 : Lo.getValueType();
22235 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
22236 ? Hi.getOperand(0).getValueType()
22237 : Hi.getValueType();
22238 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
22239 return SDValue();
22240
22241 // Start to split store.
22242 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22243 AAMDNodes AAInfo = ST->getAAInfo();
22244
22245 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
22246 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
22247 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
22248 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
22249
22250 SDValue Chain = ST->getChain();
22251 SDValue Ptr = ST->getBasePtr();
22252 // Lower value store.
22253 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22254 ST->getOriginalAlign(), MMOFlags, AAInfo);
22255 Ptr =
22256 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
22257 // Higher value store.
22258 SDValue St1 = DAG.getStore(
22259 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
22260 ST->getOriginalAlign(), MMOFlags, AAInfo);
22261 return St1;
22262}
22263
22264// Merge an insertion into an existing shuffle:
22265// (insert_vector_elt (vector_shuffle X, Y, Mask),
22266// .(extract_vector_elt X, N), InsIndex)
22267// --> (vector_shuffle X, Y, NewMask)
22268// and variations where shuffle operands may be CONCAT_VECTORS.
22270 SmallVectorImpl<int> &NewMask, SDValue Elt,
22271 unsigned InsIndex) {
22272 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22273 !isa<ConstantSDNode>(Elt.getOperand(1)))
22274 return false;
22275
22276 // Vec's operand 0 is using indices from 0 to N-1 and
22277 // operand 1 from N to 2N - 1, where N is the number of
22278 // elements in the vectors.
22279 SDValue InsertVal0 = Elt.getOperand(0);
22280 int ElementOffset = -1;
22281
22282 // We explore the inputs of the shuffle in order to see if we find the
22283 // source of the extract_vector_elt. If so, we can use it to modify the
22284 // shuffle rather than perform an insert_vector_elt.
22286 ArgWorkList.emplace_back(Mask.size(), Y);
22287 ArgWorkList.emplace_back(0, X);
22288
22289 while (!ArgWorkList.empty()) {
22290 int ArgOffset;
22291 SDValue ArgVal;
22292 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
22293
22294 if (ArgVal == InsertVal0) {
22295 ElementOffset = ArgOffset;
22296 break;
22297 }
22298
22299 // Peek through concat_vector.
22300 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
22301 int CurrentArgOffset =
22302 ArgOffset + ArgVal.getValueType().getVectorNumElements();
22303 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
22304 for (SDValue Op : reverse(ArgVal->ops())) {
22305 CurrentArgOffset -= Step;
22306 ArgWorkList.emplace_back(CurrentArgOffset, Op);
22307 }
22308
22309 // Make sure we went through all the elements and did not screw up index
22310 // computation.
22311 assert(CurrentArgOffset == ArgOffset);
22312 }
22313 }
22314
22315 // If we failed to find a match, see if we can replace an UNDEF shuffle
22316 // operand.
22317 if (ElementOffset == -1) {
22318 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
22319 return false;
22320 ElementOffset = Mask.size();
22321 Y = InsertVal0;
22322 }
22323
22324 NewMask.assign(Mask.begin(), Mask.end());
22325 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
22326 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
22327 "NewMask[InsIndex] is out of bound");
22328 return true;
22329}
22330
22331// Merge an insertion into an existing shuffle:
22332// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
22333// InsIndex)
22334// --> (vector_shuffle X, Y) and variations where shuffle operands may be
22335// CONCAT_VECTORS.
22336SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
22337 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22338 "Expected extract_vector_elt");
22339 SDValue InsertVal = N->getOperand(1);
22340 SDValue Vec = N->getOperand(0);
22341
22342 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
22343 if (!SVN || !Vec.hasOneUse())
22344 return SDValue();
22345
22346 ArrayRef<int> Mask = SVN->getMask();
22347 SDValue X = Vec.getOperand(0);
22348 SDValue Y = Vec.getOperand(1);
22349
22350 SmallVector<int, 16> NewMask(Mask);
22351 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
22352 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
22353 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
22354 if (LegalShuffle)
22355 return LegalShuffle;
22356 }
22357
22358 return SDValue();
22359}
22360
22361// Convert a disguised subvector insertion into a shuffle:
22362// insert_vector_elt V, (bitcast X from vector type), IdxC -->
22363// bitcast(shuffle (bitcast V), (extended X), Mask)
22364// Note: We do not use an insert_subvector node because that requires a
22365// legal subvector type.
22366SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
22367 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22368 "Expected extract_vector_elt");
22369 SDValue InsertVal = N->getOperand(1);
22370
22371 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
22372 !InsertVal.getOperand(0).getValueType().isVector())
22373 return SDValue();
22374
22375 SDValue SubVec = InsertVal.getOperand(0);
22376 SDValue DestVec = N->getOperand(0);
22377 EVT SubVecVT = SubVec.getValueType();
22378 EVT VT = DestVec.getValueType();
22379 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
22380 // If the source only has a single vector element, the cost of creating adding
22381 // it to a vector is likely to exceed the cost of a insert_vector_elt.
22382 if (NumSrcElts == 1)
22383 return SDValue();
22384 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
22385 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
22386
22387 // Step 1: Create a shuffle mask that implements this insert operation. The
22388 // vector that we are inserting into will be operand 0 of the shuffle, so
22389 // those elements are just 'i'. The inserted subvector is in the first
22390 // positions of operand 1 of the shuffle. Example:
22391 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
22392 SmallVector<int, 16> Mask(NumMaskVals);
22393 for (unsigned i = 0; i != NumMaskVals; ++i) {
22394 if (i / NumSrcElts == InsIndex)
22395 Mask[i] = (i % NumSrcElts) + NumMaskVals;
22396 else
22397 Mask[i] = i;
22398 }
22399
22400 // Bail out if the target can not handle the shuffle we want to create.
22401 EVT SubVecEltVT = SubVecVT.getVectorElementType();
22402 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
22403 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
22404 return SDValue();
22405
22406 // Step 2: Create a wide vector from the inserted source vector by appending
22407 // undefined elements. This is the same size as our destination vector.
22408 SDLoc DL(N);
22409 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
22410 ConcatOps[0] = SubVec;
22411 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
22412
22413 // Step 3: Shuffle in the padded subvector.
22414 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
22415 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
22416 AddToWorklist(PaddedSubV.getNode());
22417 AddToWorklist(DestVecBC.getNode());
22418 AddToWorklist(Shuf.getNode());
22419 return DAG.getBitcast(VT, Shuf);
22420}
22421
22422// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
22423// possible and the new load will be quick. We use more loads but less shuffles
22424// and inserts.
22425SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
22426 EVT VT = N->getValueType(0);
22427
22428 // InsIndex is expected to be the first of last lane.
22429 if (!VT.isFixedLengthVector() ||
22430 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
22431 return SDValue();
22432
22433 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
22434 // depending on the InsIndex.
22435 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
22436 SDValue Scalar = N->getOperand(1);
22437 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
22438 return InsIndex == P.index() || P.value() < 0 ||
22439 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
22440 (InsIndex == VT.getVectorNumElements() - 1 &&
22441 P.value() == (int)P.index() + 1);
22442 }))
22443 return SDValue();
22444
22445 // We optionally skip over an extend so long as both loads are extended in the
22446 // same way from the same type.
22447 unsigned Extend = 0;
22448 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
22449 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
22450 Scalar.getOpcode() == ISD::ANY_EXTEND) {
22451 Extend = Scalar.getOpcode();
22452 Scalar = Scalar.getOperand(0);
22453 }
22454
22455 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
22456 if (!ScalarLoad)
22457 return SDValue();
22458
22459 SDValue Vec = Shuffle->getOperand(0);
22460 if (Extend) {
22461 if (Vec.getOpcode() != Extend)
22462 return SDValue();
22463 Vec = Vec.getOperand(0);
22464 }
22465 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
22466 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
22467 return SDValue();
22468
22469 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
22470 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
22471 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22472 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22473 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
22474 return SDValue();
22475
22476 // Check that the offset between the pointers to produce a single continuous
22477 // load.
22478 if (InsIndex == 0) {
22479 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
22480 -1))
22481 return SDValue();
22482 } else {
22484 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22485 return SDValue();
22486 }
22487
22488 // And that the new unaligned load will be fast.
22489 unsigned IsFast = 0;
22490 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22491 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22492 Vec.getValueType(), VecLoad->getAddressSpace(),
22493 NewAlign, VecLoad->getMemOperand()->getFlags(),
22494 &IsFast) ||
22495 !IsFast)
22496 return SDValue();
22497
22498 // Calculate the new Ptr and create the new load.
22499 SDLoc DL(N);
22500 SDValue Ptr = ScalarLoad->getBasePtr();
22501 if (InsIndex != 0)
22502 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22503 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22504 MachinePointerInfo PtrInfo =
22505 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22506 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22507
22508 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22509 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22510 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22511 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22512 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22513}
22514
22515SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22516 SDValue InVec = N->getOperand(0);
22517 SDValue InVal = N->getOperand(1);
22518 SDValue EltNo = N->getOperand(2);
22519 SDLoc DL(N);
22520
22521 EVT VT = InVec.getValueType();
22522 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22523
22524 // Insert into out-of-bounds element is undefined.
22525 if (IndexC && VT.isFixedLengthVector() &&
22526 IndexC->getZExtValue() >= VT.getVectorNumElements())
22527 return DAG.getUNDEF(VT);
22528
22529 // Remove redundant insertions:
22530 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22531 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22532 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22533 return InVec;
22534
22535 if (!IndexC) {
22536 // If this is variable insert to undef vector, it might be better to splat:
22537 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22538 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22539 return DAG.getSplat(VT, DL, InVal);
22540 return SDValue();
22541 }
22542
22543 if (VT.isScalableVector())
22544 return SDValue();
22545
22546 unsigned NumElts = VT.getVectorNumElements();
22547
22548 // We must know which element is being inserted for folds below here.
22549 unsigned Elt = IndexC->getZExtValue();
22550
22551 // Handle <1 x ???> vector insertion special cases.
22552 if (NumElts == 1) {
22553 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22554 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22555 InVal.getOperand(0).getValueType() == VT &&
22556 isNullConstant(InVal.getOperand(1)))
22557 return InVal.getOperand(0);
22558 }
22559
22560 // Canonicalize insert_vector_elt dag nodes.
22561 // Example:
22562 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22563 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22564 //
22565 // Do this only if the child insert_vector node has one use; also
22566 // do this only if indices are both constants and Idx1 < Idx0.
22567 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22568 && isa<ConstantSDNode>(InVec.getOperand(2))) {
22569 unsigned OtherElt = InVec.getConstantOperandVal(2);
22570 if (Elt < OtherElt) {
22571 // Swap nodes.
22572 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22573 InVec.getOperand(0), InVal, EltNo);
22574 AddToWorklist(NewOp.getNode());
22575 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22576 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22577 }
22578 }
22579
22580 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22581 return Shuf;
22582
22583 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22584 return Shuf;
22585
22586 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22587 return Shuf;
22588
22589 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22590 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22591 // vXi1 vector - we don't need to recurse.
22592 if (NumElts == 1)
22593 return DAG.getBuildVector(VT, DL, {InVal});
22594
22595 // If we haven't already collected the element, insert into the op list.
22596 EVT MaxEltVT = InVal.getValueType();
22597 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22598 unsigned Idx) {
22599 if (!Ops[Idx]) {
22600 Ops[Idx] = Elt;
22601 if (VT.isInteger()) {
22602 EVT EltVT = Elt.getValueType();
22603 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22604 }
22605 }
22606 };
22607
22608 // Ensure all the operands are the same value type, fill any missing
22609 // operands with UNDEF and create the BUILD_VECTOR.
22610 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22611 assert(Ops.size() == NumElts && "Unexpected vector size");
22612 for (SDValue &Op : Ops) {
22613 if (Op)
22614 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22615 else
22616 Op = DAG.getUNDEF(MaxEltVT);
22617 }
22618 return DAG.getBuildVector(VT, DL, Ops);
22619 };
22620
22621 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22622 Ops[Elt] = InVal;
22623
22624 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22625 for (SDValue CurVec = InVec; CurVec;) {
22626 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22627 if (CurVec.isUndef())
22628 return CanonicalizeBuildVector(Ops);
22629
22630 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22631 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22632 for (unsigned I = 0; I != NumElts; ++I)
22633 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22634 return CanonicalizeBuildVector(Ops);
22635 }
22636
22637 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22638 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22639 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22640 return CanonicalizeBuildVector(Ops);
22641 }
22642
22643 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22644 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22645 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22646 if (CurIdx->getAPIntValue().ult(NumElts)) {
22647 unsigned Idx = CurIdx->getZExtValue();
22648 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22649
22650 // Found entire BUILD_VECTOR.
22651 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22652 return CanonicalizeBuildVector(Ops);
22653
22654 CurVec = CurVec->getOperand(0);
22655 continue;
22656 }
22657
22658 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22659 // update the shuffle mask (and second operand if we started with unary
22660 // shuffle) and create a new legal shuffle.
22661 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22662 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22663 SDValue LHS = SVN->getOperand(0);
22664 SDValue RHS = SVN->getOperand(1);
22666 bool Merged = true;
22667 for (auto I : enumerate(Ops)) {
22668 SDValue &Op = I.value();
22669 if (Op) {
22670 SmallVector<int, 16> NewMask;
22671 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22672 Merged = false;
22673 break;
22674 }
22675 Mask = std::move(NewMask);
22676 }
22677 }
22678 if (Merged)
22679 if (SDValue NewShuffle =
22680 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22681 return NewShuffle;
22682 }
22683
22684 // If all insertions are zero value, try to convert to AND mask.
22685 // TODO: Do this for -1 with OR mask?
22686 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22687 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22688 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22689 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22690 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22692 for (unsigned I = 0; I != NumElts; ++I)
22693 Mask[I] = Ops[I] ? Zero : AllOnes;
22694 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22695 DAG.getBuildVector(VT, DL, Mask));
22696 }
22697
22698 // Failed to find a match in the chain - bail.
22699 break;
22700 }
22701
22702 // See if we can fill in the missing constant elements as zeros.
22703 // TODO: Should we do this for any constant?
22704 APInt DemandedZeroElts = APInt::getZero(NumElts);
22705 for (unsigned I = 0; I != NumElts; ++I)
22706 if (!Ops[I])
22707 DemandedZeroElts.setBit(I);
22708
22709 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22710 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22711 : DAG.getConstantFP(0, DL, MaxEltVT);
22712 for (unsigned I = 0; I != NumElts; ++I)
22713 if (!Ops[I])
22714 Ops[I] = Zero;
22715
22716 return CanonicalizeBuildVector(Ops);
22717 }
22718 }
22719
22720 return SDValue();
22721}
22722
22723SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22724 SDValue EltNo,
22725 LoadSDNode *OriginalLoad) {
22726 assert(OriginalLoad->isSimple());
22727
22728 EVT ResultVT = EVE->getValueType(0);
22729 EVT VecEltVT = InVecVT.getVectorElementType();
22730
22731 // If the vector element type is not a multiple of a byte then we are unable
22732 // to correctly compute an address to load only the extracted element as a
22733 // scalar.
22734 if (!VecEltVT.isByteSized())
22735 return SDValue();
22736
22737 ISD::LoadExtType ExtTy =
22738 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
22739 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22740 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22741 return SDValue();
22742
22743 Align Alignment = OriginalLoad->getAlign();
22745 SDLoc DL(EVE);
22746 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22747 int Elt = ConstEltNo->getZExtValue();
22748 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22749 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22750 Alignment = commonAlignment(Alignment, PtrOff);
22751 } else {
22752 // Discard the pointer info except the address space because the memory
22753 // operand can't represent this new access since the offset is variable.
22754 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22755 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22756 }
22757
22758 unsigned IsFast = 0;
22759 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22760 OriginalLoad->getAddressSpace(), Alignment,
22761 OriginalLoad->getMemOperand()->getFlags(),
22762 &IsFast) ||
22763 !IsFast)
22764 return SDValue();
22765
22766 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22767 InVecVT, EltNo);
22768
22769 // We are replacing a vector load with a scalar load. The new load must have
22770 // identical memory op ordering to the original.
22771 SDValue Load;
22772 if (ResultVT.bitsGT(VecEltVT)) {
22773 // If the result type of vextract is wider than the load, then issue an
22774 // extending load instead.
22775 ISD::LoadExtType ExtType =
22776 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22777 : ISD::EXTLOAD;
22778 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22779 NewPtr, MPI, VecEltVT, Alignment,
22780 OriginalLoad->getMemOperand()->getFlags(),
22781 OriginalLoad->getAAInfo());
22782 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22783 } else {
22784 // The result type is narrower or the same width as the vector element
22785 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22786 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22787 OriginalLoad->getAAInfo());
22788 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22789 if (ResultVT.bitsLT(VecEltVT))
22790 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22791 else
22792 Load = DAG.getBitcast(ResultVT, Load);
22793 }
22794 ++OpsNarrowed;
22795 return Load;
22796}
22797
22798/// Transform a vector binary operation into a scalar binary operation by moving
22799/// the math/logic after an extract element of a vector.
22801 const SDLoc &DL, bool LegalTypes) {
22802 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22803 SDValue Vec = ExtElt->getOperand(0);
22804 SDValue Index = ExtElt->getOperand(1);
22805 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22806 unsigned Opc = Vec.getOpcode();
22807 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
22808 Vec->getNumValues() != 1)
22809 return SDValue();
22810
22811 // Targets may want to avoid this to prevent an expensive register transfer.
22812 if (!TLI.shouldScalarizeBinop(Vec))
22813 return SDValue();
22814
22815 EVT ResVT = ExtElt->getValueType(0);
22816 if (Opc == ISD::SETCC &&
22817 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
22818 return SDValue();
22819
22820 // Extracting an element of a vector constant is constant-folded, so this
22821 // transform is just replacing a vector op with a scalar op while moving the
22822 // extract.
22823 SDValue Op0 = Vec.getOperand(0);
22824 SDValue Op1 = Vec.getOperand(1);
22825 APInt SplatVal;
22826 if (!isAnyConstantBuildVector(Op0, true) &&
22827 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
22828 !isAnyConstantBuildVector(Op1, true) &&
22829 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
22830 return SDValue();
22831
22832 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
22833 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
22834 if (Opc == ISD::SETCC) {
22835 EVT OpVT = Op0.getValueType().getVectorElementType();
22836 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
22837 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
22838 SDValue NewVal = DAG.getSetCC(
22839 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22840 // We may need to sign- or zero-extend the result to match the same
22841 // behaviour as the vector version of SETCC.
22842 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
22843 if (ResVT != MVT::i1 &&
22844 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
22845 VecBoolContents != TLI.getBooleanContents(ResVT)) {
22847 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
22848 DAG.getValueType(MVT::i1));
22849 else
22850 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
22851 }
22852 return NewVal;
22853 }
22854 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
22855 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
22856 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
22857}
22858
22859// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22860// recursively analyse all of it's users. and try to model themselves as
22861// bit sequence extractions. If all of them agree on the new, narrower element
22862// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22863// new element type, do so now.
22864// This is mainly useful to recover from legalization that scalarized
22865// the vector as wide elements, but tries to rebuild it with narrower elements.
22866//
22867// Some more nodes could be modelled if that helps cover interesting patterns.
22868bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22869 SDNode *N) {
22870 // We perform this optimization post type-legalization because
22871 // the type-legalizer often scalarizes integer-promoted vectors.
22872 // Performing this optimization before may cause legalizaton cycles.
22873 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22874 return false;
22875
22876 // TODO: Add support for big-endian.
22877 if (DAG.getDataLayout().isBigEndian())
22878 return false;
22879
22880 SDValue VecOp = N->getOperand(0);
22881 EVT VecVT = VecOp.getValueType();
22882 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22883
22884 // We must start with a constant extraction index.
22885 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22886 if (!IndexC)
22887 return false;
22888
22889 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22890 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22891
22892 // TODO: deal with the case of implicit anyext of the extraction.
22893 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22894 EVT ScalarVT = N->getValueType(0);
22895 if (VecVT.getScalarType() != ScalarVT)
22896 return false;
22897
22898 // TODO: deal with the cases other than everything being integer-typed.
22899 if (!ScalarVT.isScalarInteger())
22900 return false;
22901
22902 struct Entry {
22904
22905 // Which bits of VecOp does it contain?
22906 unsigned BitPos;
22907 int NumBits;
22908 // NOTE: the actual width of \p Producer may be wider than NumBits!
22909
22910 Entry(Entry &&) = default;
22911 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22912 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22913
22914 Entry() = delete;
22915 Entry(const Entry &) = delete;
22916 Entry &operator=(const Entry &) = delete;
22917 Entry &operator=(Entry &&) = delete;
22918 };
22919 SmallVector<Entry, 32> Worklist;
22921
22922 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22923 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22924 /*NumBits=*/VecEltBitWidth);
22925
22926 while (!Worklist.empty()) {
22927 Entry E = Worklist.pop_back_val();
22928 // Does the node not even use any of the VecOp bits?
22929 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22930 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22931 return false; // Let's allow the other combines clean this up first.
22932 // Did we fail to model any of the users of the Producer?
22933 bool ProducerIsLeaf = false;
22934 // Look at each user of this Producer.
22935 for (SDNode *User : E.Producer->users()) {
22936 switch (User->getOpcode()) {
22937 // TODO: support ISD::BITCAST
22938 // TODO: support ISD::ANY_EXTEND
22939 // TODO: support ISD::ZERO_EXTEND
22940 // TODO: support ISD::SIGN_EXTEND
22941 case ISD::TRUNCATE:
22942 // Truncation simply means we keep position, but extract less bits.
22943 Worklist.emplace_back(User, E.BitPos,
22944 /*NumBits=*/User->getValueSizeInBits(0));
22945 break;
22946 // TODO: support ISD::SRA
22947 // TODO: support ISD::SHL
22948 case ISD::SRL:
22949 // We should be shifting the Producer by a constant amount.
22950 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22951 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22952 // Logical right-shift means that we start extraction later,
22953 // but stop it at the same position we did previously.
22954 unsigned ShAmt = ShAmtC->getZExtValue();
22955 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22956 break;
22957 }
22958 [[fallthrough]];
22959 default:
22960 // We can not model this user of the Producer.
22961 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22962 ProducerIsLeaf = true;
22963 // Profitability check: all users that we can not model
22964 // must be ISD::BUILD_VECTOR's.
22965 if (User->getOpcode() != ISD::BUILD_VECTOR)
22966 return false;
22967 break;
22968 }
22969 }
22970 if (ProducerIsLeaf)
22971 Leafs.emplace_back(std::move(E));
22972 }
22973
22974 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22975
22976 // If we are still at the same element granularity, give up,
22977 if (NewVecEltBitWidth == VecEltBitWidth)
22978 return false;
22979
22980 // The vector width must be a multiple of the new element width.
22981 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22982 return false;
22983
22984 // All leafs must agree on the new element width.
22985 // All leafs must not expect any "padding" bits ontop of that width.
22986 // All leafs must start extraction from multiple of that width.
22987 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22988 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22989 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22990 E.BitPos % NewVecEltBitWidth == 0;
22991 }))
22992 return false;
22993
22994 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22995 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22996 VecVT.getSizeInBits() / NewVecEltBitWidth);
22997
22998 if (LegalTypes &&
22999 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
23000 return false;
23001
23002 if (LegalOperations &&
23003 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23005 return false;
23006
23007 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23008 for (const Entry &E : Leafs) {
23009 SDLoc DL(E.Producer);
23010 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23011 assert(NewIndex < NewVecVT.getVectorNumElements() &&
23012 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23013 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23014 DAG.getVectorIdxConstant(NewIndex, DL));
23015 CombineTo(E.Producer, V);
23016 }
23017
23018 return true;
23019}
23020
23021SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23022 SDValue VecOp = N->getOperand(0);
23023 SDValue Index = N->getOperand(1);
23024 EVT ScalarVT = N->getValueType(0);
23025 EVT VecVT = VecOp.getValueType();
23026 if (VecOp.isUndef())
23027 return DAG.getUNDEF(ScalarVT);
23028
23029 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23030 //
23031 // This only really matters if the index is non-constant since other combines
23032 // on the constant elements already work.
23033 SDLoc DL(N);
23034 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23035 Index == VecOp.getOperand(2)) {
23036 SDValue Elt = VecOp.getOperand(1);
23037 AddUsersToWorklist(VecOp.getNode());
23038 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23039 }
23040
23041 // (vextract (scalar_to_vector val, 0) -> val
23042 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23043 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23044 if (DAG.isKnownNeverZero(Index))
23045 return DAG.getUNDEF(ScalarVT);
23046
23047 // Check if the result type doesn't match the inserted element type.
23048 // The inserted element and extracted element may have mismatched bitwidth.
23049 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23050 SDValue InOp = VecOp.getOperand(0);
23051 if (InOp.getValueType() != ScalarVT) {
23052 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23053 if (InOp.getValueType().bitsGT(ScalarVT))
23054 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23055 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23056 }
23057 return InOp;
23058 }
23059
23060 // extract_vector_elt of out-of-bounds element -> UNDEF
23061 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23062 if (IndexC && VecVT.isFixedLengthVector() &&
23063 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23064 return DAG.getUNDEF(ScalarVT);
23065
23066 // extract_vector_elt (build_vector x, y), 1 -> y
23067 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23068 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23069 TLI.isTypeLegal(VecVT)) {
23070 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23071 VecVT.isFixedLengthVector()) &&
23072 "BUILD_VECTOR used for scalable vectors");
23073 unsigned IndexVal =
23074 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23075 SDValue Elt = VecOp.getOperand(IndexVal);
23076 EVT InEltVT = Elt.getValueType();
23077
23078 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23079 isNullConstant(Elt)) {
23080 // Sometimes build_vector's scalar input types do not match result type.
23081 if (ScalarVT == InEltVT)
23082 return Elt;
23083
23084 // TODO: It may be useful to truncate if free if the build_vector
23085 // implicitly converts.
23086 }
23087 }
23088
23089 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23090 return BO;
23091
23092 if (VecVT.isScalableVector())
23093 return SDValue();
23094
23095 // All the code from this point onwards assumes fixed width vectors, but it's
23096 // possible that some of the combinations could be made to work for scalable
23097 // vectors too.
23098 unsigned NumElts = VecVT.getVectorNumElements();
23099 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23100
23101 // See if the extracted element is constant, in which case fold it if its
23102 // a legal fp immediate.
23103 if (IndexC && ScalarVT.isFloatingPoint()) {
23104 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23105 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23106 if (KnownElt.isConstant()) {
23107 APFloat CstFP =
23108 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23109 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23110 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23111 }
23112 }
23113
23114 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23115 // there are regressions on multiple targets without it. We can end up with a
23116 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23117 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23118 VecOp.hasOneUse()) {
23119 // The vector index of the LSBs of the source depend on the endian-ness.
23120 bool IsLE = DAG.getDataLayout().isLittleEndian();
23121 unsigned ExtractIndex = IndexC->getZExtValue();
23122 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23123 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23124 SDValue BCSrc = VecOp.getOperand(0);
23125 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23126 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23127
23128 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23129 if (LegalTypes && BCSrc.getValueType().isInteger() &&
23130 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23131 BCSrc.getScalarValueSizeInBits() ==
23133 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23134 // trunc i64 X to i32
23135 SDValue X = BCSrc.getOperand(0);
23136 EVT XVT = X.getValueType();
23137 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23138 "Extract element and scalar to vector can't change element type "
23139 "from FP to integer.");
23140 unsigned XBitWidth = X.getValueSizeInBits();
23141 unsigned Scale = XBitWidth / VecEltBitWidth;
23142 BCTruncElt = IsLE ? 0 : Scale - 1;
23143
23144 // An extract element return value type can be wider than its vector
23145 // operand element type. In that case, the high bits are undefined, so
23146 // it's possible that we may need to extend rather than truncate.
23147 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23148 assert(XBitWidth % VecEltBitWidth == 0 &&
23149 "Scalar bitwidth must be a multiple of vector element bitwidth");
23150
23151 if (ExtractIndex != BCTruncElt) {
23152 unsigned ShiftIndex =
23153 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23154 X = DAG.getNode(
23155 ISD::SRL, DL, XVT, X,
23156 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23157 }
23158
23159 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23160 }
23161 }
23162 }
23163
23164 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23165 // We only perform this optimization before the op legalization phase because
23166 // we may introduce new vector instructions which are not backed by TD
23167 // patterns. For example on AVX, extracting elements from a wide vector
23168 // without using extract_subvector. However, if we can find an underlying
23169 // scalar value, then we can always use that.
23170 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23171 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23172 // Find the new index to extract from.
23173 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23174
23175 // Extracting an undef index is undef.
23176 if (OrigElt == -1)
23177 return DAG.getUNDEF(ScalarVT);
23178
23179 // Select the right vector half to extract from.
23180 SDValue SVInVec;
23181 if (OrigElt < (int)NumElts) {
23182 SVInVec = VecOp.getOperand(0);
23183 } else {
23184 SVInVec = VecOp.getOperand(1);
23185 OrigElt -= NumElts;
23186 }
23187
23188 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
23189 SDValue InOp = SVInVec.getOperand(OrigElt);
23190 if (InOp.getValueType() != ScalarVT) {
23191 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23192 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
23193 }
23194
23195 return InOp;
23196 }
23197
23198 // FIXME: We should handle recursing on other vector shuffles and
23199 // scalar_to_vector here as well.
23200
23201 if (!LegalOperations ||
23202 // FIXME: Should really be just isOperationLegalOrCustom.
23205 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
23206 DAG.getVectorIdxConstant(OrigElt, DL));
23207 }
23208 }
23209
23210 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
23211 // simplify it based on the (valid) extraction indices.
23212 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23213 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23214 Use->getOperand(0) == VecOp &&
23215 isa<ConstantSDNode>(Use->getOperand(1));
23216 })) {
23217 APInt DemandedElts = APInt::getZero(NumElts);
23218 for (SDNode *User : VecOp->users()) {
23219 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23220 if (CstElt->getAPIntValue().ult(NumElts))
23221 DemandedElts.setBit(CstElt->getZExtValue());
23222 }
23223 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
23224 // We simplified the vector operand of this extract element. If this
23225 // extract is not dead, visit it again so it is folded properly.
23226 if (N->getOpcode() != ISD::DELETED_NODE)
23227 AddToWorklist(N);
23228 return SDValue(N, 0);
23229 }
23230 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
23231 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
23232 // We simplified the vector operand of this extract element. If this
23233 // extract is not dead, visit it again so it is folded properly.
23234 if (N->getOpcode() != ISD::DELETED_NODE)
23235 AddToWorklist(N);
23236 return SDValue(N, 0);
23237 }
23238 }
23239
23240 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
23241 return SDValue(N, 0);
23242
23243 // Everything under here is trying to match an extract of a loaded value.
23244 // If the result of load has to be truncated, then it's not necessarily
23245 // profitable.
23246 bool BCNumEltsChanged = false;
23247 EVT ExtVT = VecVT.getVectorElementType();
23248 EVT LVT = ExtVT;
23249 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
23250 return SDValue();
23251
23252 if (VecOp.getOpcode() == ISD::BITCAST) {
23253 // Don't duplicate a load with other uses.
23254 if (!VecOp.hasOneUse())
23255 return SDValue();
23256
23257 EVT BCVT = VecOp.getOperand(0).getValueType();
23258 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
23259 return SDValue();
23260 if (NumElts != BCVT.getVectorNumElements())
23261 BCNumEltsChanged = true;
23262 VecOp = VecOp.getOperand(0);
23263 ExtVT = BCVT.getVectorElementType();
23264 }
23265
23266 // extract (vector load $addr), i --> load $addr + i * size
23267 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
23268 ISD::isNormalLoad(VecOp.getNode()) &&
23269 !Index->hasPredecessor(VecOp.getNode())) {
23270 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
23271 if (VecLoad && VecLoad->isSimple())
23272 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
23273 }
23274
23275 // Perform only after legalization to ensure build_vector / vector_shuffle
23276 // optimizations have already been done.
23277 if (!LegalOperations || !IndexC)
23278 return SDValue();
23279
23280 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
23281 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
23282 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
23283 int Elt = IndexC->getZExtValue();
23284 LoadSDNode *LN0 = nullptr;
23285 if (ISD::isNormalLoad(VecOp.getNode())) {
23286 LN0 = cast<LoadSDNode>(VecOp);
23287 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23288 VecOp.getOperand(0).getValueType() == ExtVT &&
23289 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
23290 // Don't duplicate a load with other uses.
23291 if (!VecOp.hasOneUse())
23292 return SDValue();
23293
23294 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
23295 }
23296 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
23297 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
23298 // =>
23299 // (load $addr+1*size)
23300
23301 // Don't duplicate a load with other uses.
23302 if (!VecOp.hasOneUse())
23303 return SDValue();
23304
23305 // If the bit convert changed the number of elements, it is unsafe
23306 // to examine the mask.
23307 if (BCNumEltsChanged)
23308 return SDValue();
23309
23310 // Select the input vector, guarding against out of range extract vector.
23311 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
23312 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
23313
23314 if (VecOp.getOpcode() == ISD::BITCAST) {
23315 // Don't duplicate a load with other uses.
23316 if (!VecOp.hasOneUse())
23317 return SDValue();
23318
23319 VecOp = VecOp.getOperand(0);
23320 }
23321 if (ISD::isNormalLoad(VecOp.getNode())) {
23322 LN0 = cast<LoadSDNode>(VecOp);
23323 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
23324 Index = DAG.getConstant(Elt, DL, Index.getValueType());
23325 }
23326 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
23327 VecVT.getVectorElementType() == ScalarVT &&
23328 (!LegalTypes ||
23329 TLI.isTypeLegal(
23331 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
23332 // -> extract_vector_elt a, 0
23333 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
23334 // -> extract_vector_elt a, 1
23335 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
23336 // -> extract_vector_elt b, 0
23337 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
23338 // -> extract_vector_elt b, 1
23339 EVT ConcatVT = VecOp.getOperand(0).getValueType();
23340 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
23341 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
23342 Index.getValueType());
23343
23344 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
23346 ConcatVT.getVectorElementType(),
23347 ConcatOp, NewIdx);
23348 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
23349 }
23350
23351 // Make sure we found a non-volatile load and the extractelement is
23352 // the only use.
23353 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
23354 return SDValue();
23355
23356 // If Idx was -1 above, Elt is going to be -1, so just return undef.
23357 if (Elt == -1)
23358 return DAG.getUNDEF(LVT);
23359
23360 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
23361}
23362
23363// Simplify (build_vec (ext )) to (bitcast (build_vec ))
23364SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
23365 // We perform this optimization post type-legalization because
23366 // the type-legalizer often scalarizes integer-promoted vectors.
23367 // Performing this optimization before may create bit-casts which
23368 // will be type-legalized to complex code sequences.
23369 // We perform this optimization only before the operation legalizer because we
23370 // may introduce illegal operations.
23371 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23372 return SDValue();
23373
23374 unsigned NumInScalars = N->getNumOperands();
23375 SDLoc DL(N);
23376 EVT VT = N->getValueType(0);
23377
23378 // Check to see if this is a BUILD_VECTOR of a bunch of values
23379 // which come from any_extend or zero_extend nodes. If so, we can create
23380 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
23381 // optimizations. We do not handle sign-extend because we can't fill the sign
23382 // using shuffles.
23383 EVT SourceType = MVT::Other;
23384 bool AllAnyExt = true;
23385
23386 for (unsigned i = 0; i != NumInScalars; ++i) {
23387 SDValue In = N->getOperand(i);
23388 // Ignore undef inputs.
23389 if (In.isUndef()) continue;
23390
23391 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
23392 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
23393
23394 // Abort if the element is not an extension.
23395 if (!ZeroExt && !AnyExt) {
23396 SourceType = MVT::Other;
23397 break;
23398 }
23399
23400 // The input is a ZeroExt or AnyExt. Check the original type.
23401 EVT InTy = In.getOperand(0).getValueType();
23402
23403 // Check that all of the widened source types are the same.
23404 if (SourceType == MVT::Other)
23405 // First time.
23406 SourceType = InTy;
23407 else if (InTy != SourceType) {
23408 // Multiple income types. Abort.
23409 SourceType = MVT::Other;
23410 break;
23411 }
23412
23413 // Check if all of the extends are ANY_EXTENDs.
23414 AllAnyExt &= AnyExt;
23415 }
23416
23417 // In order to have valid types, all of the inputs must be extended from the
23418 // same source type and all of the inputs must be any or zero extend.
23419 // Scalar sizes must be a power of two.
23420 EVT OutScalarTy = VT.getScalarType();
23421 bool ValidTypes =
23422 SourceType != MVT::Other &&
23423 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
23424 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
23425
23426 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
23427 // turn into a single shuffle instruction.
23428 if (!ValidTypes)
23429 return SDValue();
23430
23431 // If we already have a splat buildvector, then don't fold it if it means
23432 // introducing zeros.
23433 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
23434 return SDValue();
23435
23436 bool isLE = DAG.getDataLayout().isLittleEndian();
23437 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
23438 assert(ElemRatio > 1 && "Invalid element size ratio");
23439 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
23440 DAG.getConstant(0, DL, SourceType);
23441
23442 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
23443 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
23444
23445 // Populate the new build_vector
23446 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23447 SDValue Cast = N->getOperand(i);
23448 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
23449 Cast.getOpcode() == ISD::ZERO_EXTEND ||
23450 Cast.isUndef()) && "Invalid cast opcode");
23451 SDValue In;
23452 if (Cast.isUndef())
23453 In = DAG.getUNDEF(SourceType);
23454 else
23455 In = Cast->getOperand(0);
23456 unsigned Index = isLE ? (i * ElemRatio) :
23457 (i * ElemRatio + (ElemRatio - 1));
23458
23459 assert(Index < Ops.size() && "Invalid index");
23460 Ops[Index] = In;
23461 }
23462
23463 // The type of the new BUILD_VECTOR node.
23464 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
23465 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
23466 "Invalid vector size");
23467 // Check if the new vector type is legal.
23468 if (!isTypeLegal(VecVT) ||
23469 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
23471 return SDValue();
23472
23473 // Make the new BUILD_VECTOR.
23474 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
23475
23476 // The new BUILD_VECTOR node has the potential to be further optimized.
23477 AddToWorklist(BV.getNode());
23478 // Bitcast to the desired type.
23479 return DAG.getBitcast(VT, BV);
23480}
23481
23482// Simplify (build_vec (trunc $1)
23483// (trunc (srl $1 half-width))
23484// (trunc (srl $1 (2 * half-width))))
23485// to (bitcast $1)
23486SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
23487 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23488
23489 EVT VT = N->getValueType(0);
23490
23491 // Don't run this before LegalizeTypes if VT is legal.
23492 // Targets may have other preferences.
23493 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
23494 return SDValue();
23495
23496 // Only for little endian
23497 if (!DAG.getDataLayout().isLittleEndian())
23498 return SDValue();
23499
23500 SDLoc DL(N);
23501 EVT OutScalarTy = VT.getScalarType();
23502 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
23503
23504 // Only for power of two types to be sure that bitcast works well
23505 if (!isPowerOf2_64(ScalarTypeBitsize))
23506 return SDValue();
23507
23508 unsigned NumInScalars = N->getNumOperands();
23509
23510 // Look through bitcasts
23511 auto PeekThroughBitcast = [](SDValue Op) {
23512 if (Op.getOpcode() == ISD::BITCAST)
23513 return Op.getOperand(0);
23514 return Op;
23515 };
23516
23517 // The source value where all the parts are extracted.
23518 SDValue Src;
23519 for (unsigned i = 0; i != NumInScalars; ++i) {
23520 SDValue In = PeekThroughBitcast(N->getOperand(i));
23521 // Ignore undef inputs.
23522 if (In.isUndef()) continue;
23523
23524 if (In.getOpcode() != ISD::TRUNCATE)
23525 return SDValue();
23526
23527 In = PeekThroughBitcast(In.getOperand(0));
23528
23529 if (In.getOpcode() != ISD::SRL) {
23530 // For now only build_vec without shuffling, handle shifts here in the
23531 // future.
23532 if (i != 0)
23533 return SDValue();
23534
23535 Src = In;
23536 } else {
23537 // In is SRL
23538 SDValue part = PeekThroughBitcast(In.getOperand(0));
23539
23540 if (!Src) {
23541 Src = part;
23542 } else if (Src != part) {
23543 // Vector parts do not stem from the same variable
23544 return SDValue();
23545 }
23546
23547 SDValue ShiftAmtVal = In.getOperand(1);
23548 if (!isa<ConstantSDNode>(ShiftAmtVal))
23549 return SDValue();
23550
23551 uint64_t ShiftAmt = In.getConstantOperandVal(1);
23552
23553 // The extracted value is not extracted at the right position
23554 if (ShiftAmt != i * ScalarTypeBitsize)
23555 return SDValue();
23556 }
23557 }
23558
23559 // Only cast if the size is the same
23560 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23561 return SDValue();
23562
23563 return DAG.getBitcast(VT, Src);
23564}
23565
23566SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23567 ArrayRef<int> VectorMask,
23568 SDValue VecIn1, SDValue VecIn2,
23569 unsigned LeftIdx, bool DidSplitVec) {
23570 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23571
23572 EVT VT = N->getValueType(0);
23573 EVT InVT1 = VecIn1.getValueType();
23574 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23575
23576 unsigned NumElems = VT.getVectorNumElements();
23577 unsigned ShuffleNumElems = NumElems;
23578
23579 // If we artificially split a vector in two already, then the offsets in the
23580 // operands will all be based off of VecIn1, even those in VecIn2.
23581 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23582
23583 uint64_t VTSize = VT.getFixedSizeInBits();
23584 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23585 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23586
23587 assert(InVT2Size <= InVT1Size &&
23588 "Inputs must be sorted to be in non-increasing vector size order.");
23589
23590 // We can't generate a shuffle node with mismatched input and output types.
23591 // Try to make the types match the type of the output.
23592 if (InVT1 != VT || InVT2 != VT) {
23593 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23594 // If the output vector length is a multiple of both input lengths,
23595 // we can concatenate them and pad the rest with undefs.
23596 unsigned NumConcats = VTSize / InVT1Size;
23597 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23598 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23599 ConcatOps[0] = VecIn1;
23600 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23601 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23602 VecIn2 = SDValue();
23603 } else if (InVT1Size == VTSize * 2) {
23604 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23605 return SDValue();
23606
23607 if (!VecIn2.getNode()) {
23608 // If we only have one input vector, and it's twice the size of the
23609 // output, split it in two.
23610 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23611 DAG.getVectorIdxConstant(NumElems, DL));
23612 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23613 // Since we now have shorter input vectors, adjust the offset of the
23614 // second vector's start.
23615 Vec2Offset = NumElems;
23616 } else {
23617 assert(InVT2Size <= InVT1Size &&
23618 "Second input is not going to be larger than the first one.");
23619
23620 // VecIn1 is wider than the output, and we have another, possibly
23621 // smaller input. Pad the smaller input with undefs, shuffle at the
23622 // input vector width, and extract the output.
23623 // The shuffle type is different than VT, so check legality again.
23624 if (LegalOperations &&
23626 return SDValue();
23627
23628 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23629 // lower it back into a BUILD_VECTOR. So if the inserted type is
23630 // illegal, don't even try.
23631 if (InVT1 != InVT2) {
23632 if (!TLI.isTypeLegal(InVT2))
23633 return SDValue();
23634 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23635 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23636 }
23637 ShuffleNumElems = NumElems * 2;
23638 }
23639 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23640 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23641 ConcatOps[0] = VecIn2;
23642 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23643 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23644 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23645 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23646 return SDValue();
23647 // If dest vector has less than two elements, then use shuffle and extract
23648 // from larger regs will cost even more.
23649 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23650 return SDValue();
23651 assert(InVT2Size <= InVT1Size &&
23652 "Second input is not going to be larger than the first one.");
23653
23654 // VecIn1 is wider than the output, and we have another, possibly
23655 // smaller input. Pad the smaller input with undefs, shuffle at the
23656 // input vector width, and extract the output.
23657 // The shuffle type is different than VT, so check legality again.
23658 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23659 return SDValue();
23660
23661 if (InVT1 != InVT2) {
23662 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23663 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23664 }
23665 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23666 } else {
23667 // TODO: Support cases where the length mismatch isn't exactly by a
23668 // factor of 2.
23669 // TODO: Move this check upwards, so that if we have bad type
23670 // mismatches, we don't create any DAG nodes.
23671 return SDValue();
23672 }
23673 }
23674
23675 // Initialize mask to undef.
23676 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23677
23678 // Only need to run up to the number of elements actually used, not the
23679 // total number of elements in the shuffle - if we are shuffling a wider
23680 // vector, the high lanes should be set to undef.
23681 for (unsigned i = 0; i != NumElems; ++i) {
23682 if (VectorMask[i] <= 0)
23683 continue;
23684
23685 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23686 if (VectorMask[i] == (int)LeftIdx) {
23687 Mask[i] = ExtIndex;
23688 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23689 Mask[i] = Vec2Offset + ExtIndex;
23690 }
23691 }
23692
23693 // The type the input vectors may have changed above.
23694 InVT1 = VecIn1.getValueType();
23695
23696 // If we already have a VecIn2, it should have the same type as VecIn1.
23697 // If we don't, get an undef/zero vector of the appropriate type.
23698 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23699 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23700
23701 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23702 if (ShuffleNumElems > NumElems)
23703 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23704
23705 return Shuffle;
23706}
23707
23709 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23710
23711 // First, determine where the build vector is not undef.
23712 // TODO: We could extend this to handle zero elements as well as undefs.
23713 int NumBVOps = BV->getNumOperands();
23714 int ZextElt = -1;
23715 for (int i = 0; i != NumBVOps; ++i) {
23716 SDValue Op = BV->getOperand(i);
23717 if (Op.isUndef())
23718 continue;
23719 if (ZextElt == -1)
23720 ZextElt = i;
23721 else
23722 return SDValue();
23723 }
23724 // Bail out if there's no non-undef element.
23725 if (ZextElt == -1)
23726 return SDValue();
23727
23728 // The build vector contains some number of undef elements and exactly
23729 // one other element. That other element must be a zero-extended scalar
23730 // extracted from a vector at a constant index to turn this into a shuffle.
23731 // Also, require that the build vector does not implicitly truncate/extend
23732 // its elements.
23733 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23734 EVT VT = BV->getValueType(0);
23735 SDValue Zext = BV->getOperand(ZextElt);
23736 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23738 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23740 return SDValue();
23741
23742 // The zero-extend must be a multiple of the source size, and we must be
23743 // building a vector of the same size as the source of the extract element.
23744 SDValue Extract = Zext.getOperand(0);
23745 unsigned DestSize = Zext.getValueSizeInBits();
23746 unsigned SrcSize = Extract.getValueSizeInBits();
23747 if (DestSize % SrcSize != 0 ||
23748 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23749 return SDValue();
23750
23751 // Create a shuffle mask that will combine the extracted element with zeros
23752 // and undefs.
23753 int ZextRatio = DestSize / SrcSize;
23754 int NumMaskElts = NumBVOps * ZextRatio;
23755 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23756 for (int i = 0; i != NumMaskElts; ++i) {
23757 if (i / ZextRatio == ZextElt) {
23758 // The low bits of the (potentially translated) extracted element map to
23759 // the source vector. The high bits map to zero. We will use a zero vector
23760 // as the 2nd source operand of the shuffle, so use the 1st element of
23761 // that vector (mask value is number-of-elements) for the high bits.
23762 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23763 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23764 : NumMaskElts;
23765 }
23766
23767 // Undef elements of the build vector remain undef because we initialize
23768 // the shuffle mask with -1.
23769 }
23770
23771 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23772 // bitcast (shuffle V, ZeroVec, VectorMask)
23773 SDLoc DL(BV);
23774 EVT VecVT = Extract.getOperand(0).getValueType();
23775 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23776 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23777 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23778 ZeroVec, ShufMask, DAG);
23779 if (!Shuf)
23780 return SDValue();
23781 return DAG.getBitcast(VT, Shuf);
23782}
23783
23784// FIXME: promote to STLExtras.
23785template <typename R, typename T>
23786static auto getFirstIndexOf(R &&Range, const T &Val) {
23787 auto I = find(Range, Val);
23788 if (I == Range.end())
23789 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23790 return std::distance(Range.begin(), I);
23791}
23792
23793// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23794// operations. If the types of the vectors we're extracting from allow it,
23795// turn this into a vector_shuffle node.
23796SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23797 SDLoc DL(N);
23798 EVT VT = N->getValueType(0);
23799
23800 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23801 if (!isTypeLegal(VT))
23802 return SDValue();
23803
23805 return V;
23806
23807 // May only combine to shuffle after legalize if shuffle is legal.
23808 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23809 return SDValue();
23810
23811 bool UsesZeroVector = false;
23812 unsigned NumElems = N->getNumOperands();
23813
23814 // Record, for each element of the newly built vector, which input vector
23815 // that element comes from. -1 stands for undef, 0 for the zero vector,
23816 // and positive values for the input vectors.
23817 // VectorMask maps each element to its vector number, and VecIn maps vector
23818 // numbers to their initial SDValues.
23819
23820 SmallVector<int, 8> VectorMask(NumElems, -1);
23822 VecIn.push_back(SDValue());
23823
23824 // If we have a single extract_element with a constant index, track the index
23825 // value.
23826 unsigned OneConstExtractIndex = ~0u;
23827
23828 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
23829 unsigned NumExtracts = 0;
23830
23831 for (unsigned i = 0; i != NumElems; ++i) {
23832 SDValue Op = N->getOperand(i);
23833
23834 if (Op.isUndef())
23835 continue;
23836
23837 // See if we can use a blend with a zero vector.
23838 // TODO: Should we generalize this to a blend with an arbitrary constant
23839 // vector?
23841 UsesZeroVector = true;
23842 VectorMask[i] = 0;
23843 continue;
23844 }
23845
23846 // Not an undef or zero. If the input is something other than an
23847 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23848 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
23849 return SDValue();
23850
23851 SDValue ExtractedFromVec = Op.getOperand(0);
23852 if (ExtractedFromVec.getValueType().isScalableVector())
23853 return SDValue();
23854 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
23855 if (!ExtractIdx)
23856 return SDValue();
23857
23858 if (ExtractIdx->getAsAPIntVal().uge(
23859 ExtractedFromVec.getValueType().getVectorNumElements()))
23860 return SDValue();
23861
23862 // All inputs must have the same element type as the output.
23863 if (VT.getVectorElementType() !=
23864 ExtractedFromVec.getValueType().getVectorElementType())
23865 return SDValue();
23866
23867 OneConstExtractIndex = ExtractIdx->getZExtValue();
23868 ++NumExtracts;
23869
23870 // Have we seen this input vector before?
23871 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23872 // a map back from SDValues to numbers isn't worth it.
23873 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23874 if (Idx == -1) { // A new source vector?
23875 Idx = VecIn.size();
23876 VecIn.push_back(ExtractedFromVec);
23877 }
23878
23879 VectorMask[i] = Idx;
23880 }
23881
23882 // If we didn't find at least one input vector, bail out.
23883 if (VecIn.size() < 2)
23884 return SDValue();
23885
23886 // If all the Operands of BUILD_VECTOR extract from same
23887 // vector, then split the vector efficiently based on the maximum
23888 // vector access index and adjust the VectorMask and
23889 // VecIn accordingly.
23890 bool DidSplitVec = false;
23891 if (VecIn.size() == 2) {
23892 // If we only found a single constant indexed extract_vector_elt feeding the
23893 // build_vector, do not produce a more complicated shuffle if the extract is
23894 // cheap with other constant/undef elements. Skip broadcast patterns with
23895 // multiple uses in the build_vector.
23896
23897 // TODO: This should be more aggressive about skipping the shuffle
23898 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
23899 // index.
23900 if (NumExtracts == 1 &&
23903 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
23904 return SDValue();
23905
23906 unsigned MaxIndex = 0;
23907 unsigned NearestPow2 = 0;
23908 SDValue Vec = VecIn.back();
23909 EVT InVT = Vec.getValueType();
23910 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23911
23912 for (unsigned i = 0; i < NumElems; i++) {
23913 if (VectorMask[i] <= 0)
23914 continue;
23915 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23916 IndexVec[i] = Index;
23917 MaxIndex = std::max(MaxIndex, Index);
23918 }
23919
23920 NearestPow2 = PowerOf2Ceil(MaxIndex);
23921 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23922 NumElems * 2 < NearestPow2) {
23923 unsigned SplitSize = NearestPow2 / 2;
23924 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23925 InVT.getVectorElementType(), SplitSize);
23926 if (TLI.isTypeLegal(SplitVT) &&
23927 SplitSize + SplitVT.getVectorNumElements() <=
23928 InVT.getVectorNumElements()) {
23929 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23930 DAG.getVectorIdxConstant(SplitSize, DL));
23931 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23932 DAG.getVectorIdxConstant(0, DL));
23933 VecIn.pop_back();
23934 VecIn.push_back(VecIn1);
23935 VecIn.push_back(VecIn2);
23936 DidSplitVec = true;
23937
23938 for (unsigned i = 0; i < NumElems; i++) {
23939 if (VectorMask[i] <= 0)
23940 continue;
23941 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23942 }
23943 }
23944 }
23945 }
23946
23947 // Sort input vectors by decreasing vector element count,
23948 // while preserving the relative order of equally-sized vectors.
23949 // Note that we keep the first "implicit zero vector as-is.
23950 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23951 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23952 [](const SDValue &a, const SDValue &b) {
23953 return a.getValueType().getVectorNumElements() >
23954 b.getValueType().getVectorNumElements();
23955 });
23956
23957 // We now also need to rebuild the VectorMask, because it referenced element
23958 // order in VecIn, and we just sorted them.
23959 for (int &SourceVectorIndex : VectorMask) {
23960 if (SourceVectorIndex <= 0)
23961 continue;
23962 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23963 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23964 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23965 SourceVectorIndex = Idx;
23966 }
23967
23968 VecIn = std::move(SortedVecIn);
23969
23970 // TODO: Should this fire if some of the input vectors has illegal type (like
23971 // it does now), or should we let legalization run its course first?
23972
23973 // Shuffle phase:
23974 // Take pairs of vectors, and shuffle them so that the result has elements
23975 // from these vectors in the correct places.
23976 // For example, given:
23977 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23978 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23979 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23980 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23981 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23982 // We will generate:
23983 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23984 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23985 SmallVector<SDValue, 4> Shuffles;
23986 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23987 unsigned LeftIdx = 2 * In + 1;
23988 SDValue VecLeft = VecIn[LeftIdx];
23989 SDValue VecRight =
23990 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23991
23992 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23993 VecRight, LeftIdx, DidSplitVec))
23994 Shuffles.push_back(Shuffle);
23995 else
23996 return SDValue();
23997 }
23998
23999 // If we need the zero vector as an "ingredient" in the blend tree, add it
24000 // to the list of shuffles.
24001 if (UsesZeroVector)
24002 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24003 : DAG.getConstantFP(0.0, DL, VT));
24004
24005 // If we only have one shuffle, we're done.
24006 if (Shuffles.size() == 1)
24007 return Shuffles[0];
24008
24009 // Update the vector mask to point to the post-shuffle vectors.
24010 for (int &Vec : VectorMask)
24011 if (Vec == 0)
24012 Vec = Shuffles.size() - 1;
24013 else
24014 Vec = (Vec - 1) / 2;
24015
24016 // More than one shuffle. Generate a binary tree of blends, e.g. if from
24017 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24018 // generate:
24019 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24020 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24021 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24022 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24023 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24024 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24025 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24026
24027 // Make sure the initial size of the shuffle list is even.
24028 if (Shuffles.size() % 2)
24029 Shuffles.push_back(DAG.getUNDEF(VT));
24030
24031 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24032 if (CurSize % 2) {
24033 Shuffles[CurSize] = DAG.getUNDEF(VT);
24034 CurSize++;
24035 }
24036 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24037 int Left = 2 * In;
24038 int Right = 2 * In + 1;
24039 SmallVector<int, 8> Mask(NumElems, -1);
24040 SDValue L = Shuffles[Left];
24041 ArrayRef<int> LMask;
24042 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24043 L.use_empty() && L.getOperand(1).isUndef() &&
24044 L.getOperand(0).getValueType() == L.getValueType();
24045 if (IsLeftShuffle) {
24046 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24047 L = L.getOperand(0);
24048 }
24049 SDValue R = Shuffles[Right];
24050 ArrayRef<int> RMask;
24051 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24052 R.use_empty() && R.getOperand(1).isUndef() &&
24053 R.getOperand(0).getValueType() == R.getValueType();
24054 if (IsRightShuffle) {
24055 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24056 R = R.getOperand(0);
24057 }
24058 for (unsigned I = 0; I != NumElems; ++I) {
24059 if (VectorMask[I] == Left) {
24060 Mask[I] = I;
24061 if (IsLeftShuffle)
24062 Mask[I] = LMask[I];
24063 VectorMask[I] = In;
24064 } else if (VectorMask[I] == Right) {
24065 Mask[I] = I + NumElems;
24066 if (IsRightShuffle)
24067 Mask[I] = RMask[I] + NumElems;
24068 VectorMask[I] = In;
24069 }
24070 }
24071
24072 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24073 }
24074 }
24075 return Shuffles[0];
24076}
24077
24078// Try to turn a build vector of zero extends of extract vector elts into a
24079// a vector zero extend and possibly an extract subvector.
24080// TODO: Support sign extend?
24081// TODO: Allow undef elements?
24082SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24083 if (LegalOperations)
24084 return SDValue();
24085
24086 EVT VT = N->getValueType(0);
24087
24088 bool FoundZeroExtend = false;
24089 SDValue Op0 = N->getOperand(0);
24090 auto checkElem = [&](SDValue Op) -> int64_t {
24091 unsigned Opc = Op.getOpcode();
24092 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24093 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24094 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24095 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24096 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24097 return C->getZExtValue();
24098 return -1;
24099 };
24100
24101 // Make sure the first element matches
24102 // (zext (extract_vector_elt X, C))
24103 // Offset must be a constant multiple of the
24104 // known-minimum vector length of the result type.
24105 int64_t Offset = checkElem(Op0);
24106 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24107 return SDValue();
24108
24109 unsigned NumElems = N->getNumOperands();
24110 SDValue In = Op0.getOperand(0).getOperand(0);
24111 EVT InSVT = In.getValueType().getScalarType();
24112 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24113
24114 // Don't create an illegal input type after type legalization.
24115 if (LegalTypes && !TLI.isTypeLegal(InVT))
24116 return SDValue();
24117
24118 // Ensure all the elements come from the same vector and are adjacent.
24119 for (unsigned i = 1; i != NumElems; ++i) {
24120 if ((Offset + i) != checkElem(N->getOperand(i)))
24121 return SDValue();
24122 }
24123
24124 SDLoc DL(N);
24125 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24126 Op0.getOperand(0).getOperand(1));
24127 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24128 VT, In);
24129}
24130
24131// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24132// and all other elements being constant zero's, granularize the BUILD_VECTOR's
24133// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24134// This patten can appear during legalization.
24135//
24136// NOTE: This can be generalized to allow more than a single
24137// non-constant-zero op, UNDEF's, and to be KnownBits-based,
24138SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24139 // Don't run this after legalization. Targets may have other preferences.
24140 if (Level >= AfterLegalizeDAG)
24141 return SDValue();
24142
24143 // FIXME: support big-endian.
24144 if (DAG.getDataLayout().isBigEndian())
24145 return SDValue();
24146
24147 EVT VT = N->getValueType(0);
24148 EVT OpVT = N->getOperand(0).getValueType();
24149 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24150
24151 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24152
24153 if (!TLI.isTypeLegal(OpIntVT) ||
24154 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24155 return SDValue();
24156
24157 unsigned EltBitwidth = VT.getScalarSizeInBits();
24158 // NOTE: the actual width of operands may be wider than that!
24159
24160 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24161 // active bits they all have? We'll want to truncate them all to that width.
24162 unsigned ActiveBits = 0;
24163 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24164 for (auto I : enumerate(N->ops())) {
24165 SDValue Op = I.value();
24166 // FIXME: support UNDEF elements?
24167 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
24168 unsigned OpActiveBits =
24169 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24170 if (OpActiveBits == 0) {
24171 KnownZeroOps.setBit(I.index());
24172 continue;
24173 }
24174 // Profitability check: don't allow non-zero constant operands.
24175 return SDValue();
24176 }
24177 // Profitability check: there must only be a single non-zero operand,
24178 // and it must be the first operand of the BUILD_VECTOR.
24179 if (I.index() != 0)
24180 return SDValue();
24181 // The operand must be a zero-extension itself.
24182 // FIXME: this could be generalized to known leading zeros check.
24183 if (Op.getOpcode() != ISD::ZERO_EXTEND)
24184 return SDValue();
24185 unsigned CurrActiveBits =
24186 Op.getOperand(0).getValueSizeInBits().getFixedValue();
24187 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24188 ActiveBits = CurrActiveBits;
24189 // We want to at least halve the element size.
24190 if (2 * ActiveBits > EltBitwidth)
24191 return SDValue();
24192 }
24193
24194 // This BUILD_VECTOR must have at least one non-constant-zero operand.
24195 if (ActiveBits == 0)
24196 return SDValue();
24197
24198 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
24199 // into how many chunks can we split our element width?
24200 EVT NewScalarIntVT, NewIntVT;
24201 std::optional<unsigned> Factor;
24202 // We can split the element into at least two chunks, but not into more
24203 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
24204 // for which the element width is a multiple of it,
24205 // and the resulting types/operations on that chunk width are legal.
24206 assert(2 * ActiveBits <= EltBitwidth &&
24207 "We know that half or less bits of the element are active.");
24208 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24209 if (EltBitwidth % Scale != 0)
24210 continue;
24211 unsigned ChunkBitwidth = EltBitwidth / Scale;
24212 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
24213 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
24214 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
24215 Scale * N->getNumOperands());
24216 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
24217 (LegalOperations &&
24218 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
24220 continue;
24221 Factor = Scale;
24222 break;
24223 }
24224 if (!Factor)
24225 return SDValue();
24226
24227 SDLoc DL(N);
24228 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
24229
24230 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
24232 NewOps.reserve(NewIntVT.getVectorNumElements());
24233 for (auto I : enumerate(N->ops())) {
24234 SDValue Op = I.value();
24235 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
24236 unsigned SrcOpIdx = I.index();
24237 if (KnownZeroOps[SrcOpIdx]) {
24238 NewOps.append(*Factor, ZeroOp);
24239 continue;
24240 }
24241 Op = DAG.getBitcast(OpIntVT, Op);
24242 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
24243 NewOps.emplace_back(Op);
24244 NewOps.append(*Factor - 1, ZeroOp);
24245 }
24246 assert(NewOps.size() == NewIntVT.getVectorNumElements());
24247 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
24248 NewBV = DAG.getBitcast(VT, NewBV);
24249 return NewBV;
24250}
24251
24252SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
24253 EVT VT = N->getValueType(0);
24254
24255 // A vector built entirely of undefs is undef.
24257 return DAG.getUNDEF(VT);
24258
24259 // If this is a splat of a bitcast from another vector, change to a
24260 // concat_vector.
24261 // For example:
24262 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
24263 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
24264 //
24265 // If X is a build_vector itself, the concat can become a larger build_vector.
24266 // TODO: Maybe this is useful for non-splat too?
24267 if (!LegalOperations) {
24268 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
24269 // Only change build_vector to a concat_vector if the splat value type is
24270 // same as the vector element type.
24271 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
24273 EVT SrcVT = Splat.getValueType();
24274 if (SrcVT.isVector()) {
24275 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
24276 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
24277 SrcVT.getVectorElementType(), NumElts);
24278 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
24279 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
24280 SDValue Concat =
24281 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
24282 return DAG.getBitcast(VT, Concat);
24283 }
24284 }
24285 }
24286 }
24287
24288 // Check if we can express BUILD VECTOR via subvector extract.
24289 if (!LegalTypes && (N->getNumOperands() > 1)) {
24290 SDValue Op0 = N->getOperand(0);
24291 auto checkElem = [&](SDValue Op) -> uint64_t {
24292 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
24293 (Op0.getOperand(0) == Op.getOperand(0)))
24294 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
24295 return CNode->getZExtValue();
24296 return -1;
24297 };
24298
24299 int Offset = checkElem(Op0);
24300 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
24301 if (Offset + i != checkElem(N->getOperand(i))) {
24302 Offset = -1;
24303 break;
24304 }
24305 }
24306
24307 if ((Offset == 0) &&
24308 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
24309 return Op0.getOperand(0);
24310 if ((Offset != -1) &&
24311 ((Offset % N->getValueType(0).getVectorNumElements()) ==
24312 0)) // IDX must be multiple of output size.
24313 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
24314 Op0.getOperand(0), Op0.getOperand(1));
24315 }
24316
24317 if (SDValue V = convertBuildVecZextToZext(N))
24318 return V;
24319
24320 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
24321 return V;
24322
24323 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
24324 return V;
24325
24326 if (SDValue V = reduceBuildVecTruncToBitCast(N))
24327 return V;
24328
24329 if (SDValue V = reduceBuildVecToShuffle(N))
24330 return V;
24331
24332 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
24333 // Do this late as some of the above may replace the splat.
24335 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
24336 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
24337 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
24338 }
24339
24340 return SDValue();
24341}
24342
24344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24345 EVT OpVT = N->getOperand(0).getValueType();
24346
24347 // If the operands are legal vectors, leave them alone.
24348 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
24349 return SDValue();
24350
24351 SDLoc DL(N);
24352 EVT VT = N->getValueType(0);
24354 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24355
24356 // Keep track of what we encounter.
24357 EVT AnyFPVT;
24358
24359 for (const SDValue &Op : N->ops()) {
24360 if (ISD::BITCAST == Op.getOpcode() &&
24361 !Op.getOperand(0).getValueType().isVector())
24362 Ops.push_back(Op.getOperand(0));
24363 else if (ISD::UNDEF == Op.getOpcode())
24364 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
24365 else
24366 return SDValue();
24367
24368 // Note whether we encounter an integer or floating point scalar.
24369 // If it's neither, bail out, it could be something weird like x86mmx.
24370 EVT LastOpVT = Ops.back().getValueType();
24371 if (LastOpVT.isFloatingPoint())
24372 AnyFPVT = LastOpVT;
24373 else if (!LastOpVT.isInteger())
24374 return SDValue();
24375 }
24376
24377 // If any of the operands is a floating point scalar bitcast to a vector,
24378 // use floating point types throughout, and bitcast everything.
24379 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
24380 if (AnyFPVT != EVT()) {
24381 SVT = AnyFPVT;
24382 for (SDValue &Op : Ops) {
24383 if (Op.getValueType() == SVT)
24384 continue;
24385 if (Op.isUndef())
24386 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
24387 else
24388 Op = DAG.getBitcast(SVT, Op);
24389 }
24390 }
24391
24392 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
24393 VT.getSizeInBits() / SVT.getSizeInBits());
24394 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
24395}
24396
24397// Attempt to merge nested concat_vectors/undefs.
24398// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
24399// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
24401 SelectionDAG &DAG) {
24402 EVT VT = N->getValueType(0);
24403
24404 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
24405 EVT SubVT;
24406 SDValue FirstConcat;
24407 for (const SDValue &Op : N->ops()) {
24408 if (Op.isUndef())
24409 continue;
24410 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
24411 return SDValue();
24412 if (!FirstConcat) {
24413 SubVT = Op.getOperand(0).getValueType();
24414 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
24415 return SDValue();
24416 FirstConcat = Op;
24417 continue;
24418 }
24419 if (SubVT != Op.getOperand(0).getValueType())
24420 return SDValue();
24421 }
24422 assert(FirstConcat && "Concat of all-undefs found");
24423
24424 SmallVector<SDValue> ConcatOps;
24425 for (const SDValue &Op : N->ops()) {
24426 if (Op.isUndef()) {
24427 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
24428 continue;
24429 }
24430 ConcatOps.append(Op->op_begin(), Op->op_end());
24431 }
24432 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
24433}
24434
24435// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
24436// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
24437// most two distinct vectors the same size as the result, attempt to turn this
24438// into a legal shuffle.
24440 EVT VT = N->getValueType(0);
24441 EVT OpVT = N->getOperand(0).getValueType();
24442
24443 // We currently can't generate an appropriate shuffle for a scalable vector.
24444 if (VT.isScalableVector())
24445 return SDValue();
24446
24447 int NumElts = VT.getVectorNumElements();
24448 int NumOpElts = OpVT.getVectorNumElements();
24449
24450 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
24452
24453 for (SDValue Op : N->ops()) {
24455
24456 // UNDEF nodes convert to UNDEF shuffle mask values.
24457 if (Op.isUndef()) {
24458 Mask.append((unsigned)NumOpElts, -1);
24459 continue;
24460 }
24461
24462 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24463 return SDValue();
24464
24465 // What vector are we extracting the subvector from and at what index?
24466 SDValue ExtVec = Op.getOperand(0);
24467 int ExtIdx = Op.getConstantOperandVal(1);
24468
24469 // We want the EVT of the original extraction to correctly scale the
24470 // extraction index.
24471 EVT ExtVT = ExtVec.getValueType();
24472 ExtVec = peekThroughBitcasts(ExtVec);
24473
24474 // UNDEF nodes convert to UNDEF shuffle mask values.
24475 if (ExtVec.isUndef()) {
24476 Mask.append((unsigned)NumOpElts, -1);
24477 continue;
24478 }
24479
24480 // Ensure that we are extracting a subvector from a vector the same
24481 // size as the result.
24482 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
24483 return SDValue();
24484
24485 // Scale the subvector index to account for any bitcast.
24486 int NumExtElts = ExtVT.getVectorNumElements();
24487 if (0 == (NumExtElts % NumElts))
24488 ExtIdx /= (NumExtElts / NumElts);
24489 else if (0 == (NumElts % NumExtElts))
24490 ExtIdx *= (NumElts / NumExtElts);
24491 else
24492 return SDValue();
24493
24494 // At most we can reference 2 inputs in the final shuffle.
24495 if (SV0.isUndef() || SV0 == ExtVec) {
24496 SV0 = ExtVec;
24497 for (int i = 0; i != NumOpElts; ++i)
24498 Mask.push_back(i + ExtIdx);
24499 } else if (SV1.isUndef() || SV1 == ExtVec) {
24500 SV1 = ExtVec;
24501 for (int i = 0; i != NumOpElts; ++i)
24502 Mask.push_back(i + ExtIdx + NumElts);
24503 } else {
24504 return SDValue();
24505 }
24506 }
24507
24508 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24509 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
24510 DAG.getBitcast(VT, SV1), Mask, DAG);
24511}
24512
24514 unsigned CastOpcode = N->getOperand(0).getOpcode();
24515 switch (CastOpcode) {
24516 case ISD::SINT_TO_FP:
24517 case ISD::UINT_TO_FP:
24518 case ISD::FP_TO_SINT:
24519 case ISD::FP_TO_UINT:
24520 // TODO: Allow more opcodes?
24521 // case ISD::BITCAST:
24522 // case ISD::TRUNCATE:
24523 // case ISD::ZERO_EXTEND:
24524 // case ISD::SIGN_EXTEND:
24525 // case ISD::FP_EXTEND:
24526 break;
24527 default:
24528 return SDValue();
24529 }
24530
24531 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
24532 if (!SrcVT.isVector())
24533 return SDValue();
24534
24535 // All operands of the concat must be the same kind of cast from the same
24536 // source type.
24538 for (SDValue Op : N->ops()) {
24539 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
24540 Op.getOperand(0).getValueType() != SrcVT)
24541 return SDValue();
24542 SrcOps.push_back(Op.getOperand(0));
24543 }
24544
24545 // The wider cast must be supported by the target. This is unusual because
24546 // the operation support type parameter depends on the opcode. In addition,
24547 // check the other type in the cast to make sure this is really legal.
24548 EVT VT = N->getValueType(0);
24549 EVT SrcEltVT = SrcVT.getVectorElementType();
24550 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24551 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
24552 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24553 switch (CastOpcode) {
24554 case ISD::SINT_TO_FP:
24555 case ISD::UINT_TO_FP:
24556 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
24557 !TLI.isTypeLegal(VT))
24558 return SDValue();
24559 break;
24560 case ISD::FP_TO_SINT:
24561 case ISD::FP_TO_UINT:
24562 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24563 !TLI.isTypeLegal(ConcatSrcVT))
24564 return SDValue();
24565 break;
24566 default:
24567 llvm_unreachable("Unexpected cast opcode");
24568 }
24569
24570 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24571 SDLoc DL(N);
24572 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24573 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24574}
24575
24576// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24577// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24578// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24580 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24581 bool LegalOperations) {
24582 EVT VT = N->getValueType(0);
24583 EVT OpVT = N->getOperand(0).getValueType();
24584 if (VT.isScalableVector())
24585 return SDValue();
24586
24587 // For now, only allow simple 2-operand concatenations.
24588 if (N->getNumOperands() != 2)
24589 return SDValue();
24590
24591 // Don't create illegal types/shuffles when not allowed to.
24592 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24593 (LegalOperations &&
24595 return SDValue();
24596
24597 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24598 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24599 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24600 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24601 // (4) and for now, the SHUFFLE_VECTOR must be unary.
24602 ShuffleVectorSDNode *SVN = nullptr;
24603 for (SDValue Op : N->ops()) {
24604 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24605 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24606 all_of(N->ops(), [CurSVN](SDValue Op) {
24607 // FIXME: can we allow UNDEF operands?
24608 return !Op.isUndef() &&
24609 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24610 })) {
24611 SVN = CurSVN;
24612 break;
24613 }
24614 }
24615 if (!SVN)
24616 return SDValue();
24617
24618 // We are going to pad the shuffle operands, so any indice, that was picking
24619 // from the second operand, must be adjusted.
24620 SmallVector<int, 16> AdjustedMask;
24621 AdjustedMask.reserve(SVN->getMask().size());
24622 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24623 append_range(AdjustedMask, SVN->getMask());
24624
24625 // Identity masks for the operands of the (padded) shuffle.
24626 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24627 MutableArrayRef<int> FirstShufOpIdentityMask =
24628 MutableArrayRef<int>(IdentityMask)
24630 MutableArrayRef<int> SecondShufOpIdentityMask =
24632 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24633 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24635
24636 // New combined shuffle mask.
24638 Mask.reserve(VT.getVectorNumElements());
24639 for (SDValue Op : N->ops()) {
24640 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24641 if (Op.getNode() == SVN) {
24642 append_range(Mask, AdjustedMask);
24643 continue;
24644 }
24645 if (Op == SVN->getOperand(0)) {
24646 append_range(Mask, FirstShufOpIdentityMask);
24647 continue;
24648 }
24649 if (Op == SVN->getOperand(1)) {
24650 append_range(Mask, SecondShufOpIdentityMask);
24651 continue;
24652 }
24653 llvm_unreachable("Unexpected operand!");
24654 }
24655
24656 // Don't create illegal shuffle masks.
24657 if (!TLI.isShuffleMaskLegal(Mask, VT))
24658 return SDValue();
24659
24660 // Pad the shuffle operands with UNDEF.
24661 SDLoc dl(N);
24662 std::array<SDValue, 2> ShufOps;
24663 for (auto I : zip(SVN->ops(), ShufOps)) {
24664 SDValue ShufOp = std::get<0>(I);
24665 SDValue &NewShufOp = std::get<1>(I);
24666 if (ShufOp.isUndef())
24667 NewShufOp = DAG.getUNDEF(VT);
24668 else {
24669 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24670 DAG.getUNDEF(OpVT));
24671 ShufOpParts[0] = ShufOp;
24672 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24673 }
24674 }
24675 // Finally, create the new wide shuffle.
24676 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24677}
24678
24679SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24680 // If we only have one input vector, we don't need to do any concatenation.
24681 if (N->getNumOperands() == 1)
24682 return N->getOperand(0);
24683
24684 // Check if all of the operands are undefs.
24685 EVT VT = N->getValueType(0);
24687 return DAG.getUNDEF(VT);
24688
24689 // Optimize concat_vectors where all but the first of the vectors are undef.
24690 if (all_of(drop_begin(N->ops()),
24691 [](const SDValue &Op) { return Op.isUndef(); })) {
24692 SDValue In = N->getOperand(0);
24693 assert(In.getValueType().isVector() && "Must concat vectors");
24694
24695 // If the input is a concat_vectors, just make a larger concat by padding
24696 // with smaller undefs.
24697 //
24698 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24699 // here could cause an infinite loop. That legalizing happens when LegalDAG
24700 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24701 // scalable.
24702 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24703 !(LegalDAG && In.getValueType().isScalableVector())) {
24704 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24705 SmallVector<SDValue, 4> Ops(In->ops());
24706 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24707 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24708 }
24709
24711
24712 // concat_vectors(scalar_to_vector(scalar), undef) ->
24713 // scalar_to_vector(scalar)
24714 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24715 Scalar.hasOneUse()) {
24716 EVT SVT = Scalar.getValueType().getVectorElementType();
24717 if (SVT == Scalar.getOperand(0).getValueType())
24718 Scalar = Scalar.getOperand(0);
24719 }
24720
24721 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24722 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24723 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24724 // look through the trunc so we can still do the transform:
24725 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24726 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24727 !TLI.isTypeLegal(Scalar.getValueType()) &&
24728 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24729 Scalar = Scalar->getOperand(0);
24730
24731 EVT SclTy = Scalar.getValueType();
24732
24733 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24734 return SDValue();
24735
24736 // Bail out if the vector size is not a multiple of the scalar size.
24737 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24738 return SDValue();
24739
24740 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24741 if (VNTNumElms < 2)
24742 return SDValue();
24743
24744 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24745 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24746 return SDValue();
24747
24748 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24749 return DAG.getBitcast(VT, Res);
24750 }
24751 }
24752
24753 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24754 // We have already tested above for an UNDEF only concatenation.
24755 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24756 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24757 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24758 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24759 };
24760 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24762 EVT SVT = VT.getScalarType();
24763
24764 EVT MinVT = SVT;
24765 if (!SVT.isFloatingPoint()) {
24766 // If BUILD_VECTOR are from built from integer, they may have different
24767 // operand types. Get the smallest type and truncate all operands to it.
24768 bool FoundMinVT = false;
24769 for (const SDValue &Op : N->ops())
24770 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24771 EVT OpSVT = Op.getOperand(0).getValueType();
24772 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24773 FoundMinVT = true;
24774 }
24775 assert(FoundMinVT && "Concat vector type mismatch");
24776 }
24777
24778 for (const SDValue &Op : N->ops()) {
24779 EVT OpVT = Op.getValueType();
24780 unsigned NumElts = OpVT.getVectorNumElements();
24781
24782 if (ISD::UNDEF == Op.getOpcode())
24783 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24784
24785 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24786 if (SVT.isFloatingPoint()) {
24787 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24788 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24789 } else {
24790 for (unsigned i = 0; i != NumElts; ++i)
24791 Opnds.push_back(
24792 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24793 }
24794 }
24795 }
24796
24797 assert(VT.getVectorNumElements() == Opnds.size() &&
24798 "Concat vector type mismatch");
24799 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24800 }
24801
24802 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24803 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24805 return V;
24806
24807 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24808 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24810 return V;
24811
24812 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24814 return V;
24815 }
24816
24817 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24818 return V;
24819
24821 N, DAG, TLI, LegalTypes, LegalOperations))
24822 return V;
24823
24824 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24825 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24826 // operands and look for a CONCAT operations that place the incoming vectors
24827 // at the exact same location.
24828 //
24829 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24830 SDValue SingleSource = SDValue();
24831 unsigned PartNumElem =
24832 N->getOperand(0).getValueType().getVectorMinNumElements();
24833
24834 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24835 SDValue Op = N->getOperand(i);
24836
24837 if (Op.isUndef())
24838 continue;
24839
24840 // Check if this is the identity extract:
24841 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24842 return SDValue();
24843
24844 // Find the single incoming vector for the extract_subvector.
24845 if (SingleSource.getNode()) {
24846 if (Op.getOperand(0) != SingleSource)
24847 return SDValue();
24848 } else {
24849 SingleSource = Op.getOperand(0);
24850
24851 // Check the source type is the same as the type of the result.
24852 // If not, this concat may extend the vector, so we can not
24853 // optimize it away.
24854 if (SingleSource.getValueType() != N->getValueType(0))
24855 return SDValue();
24856 }
24857
24858 // Check that we are reading from the identity index.
24859 unsigned IdentityIndex = i * PartNumElem;
24860 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24861 return SDValue();
24862 }
24863
24864 if (SingleSource.getNode())
24865 return SingleSource;
24866
24867 return SDValue();
24868}
24869
24870// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24871// if the subvector can be sourced for free.
24872static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
24873 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24874 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24875 return V.getOperand(1);
24876 }
24877 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24878 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24879 V.getOperand(0).getValueType() == SubVT &&
24880 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24881 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24882 return V.getOperand(SubIdx);
24883 }
24884 return SDValue();
24885}
24886
24888 SelectionDAG &DAG,
24889 bool LegalOperations) {
24890 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24891 SDValue BinOp = Extract->getOperand(0);
24892 unsigned BinOpcode = BinOp.getOpcode();
24893 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24894 return SDValue();
24895
24896 EVT VecVT = BinOp.getValueType();
24897 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24898 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24899 return SDValue();
24900
24901 SDValue Index = Extract->getOperand(1);
24902 EVT SubVT = Extract->getValueType(0);
24903 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24904 return SDValue();
24905
24906 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24907 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24908
24909 // TODO: We could handle the case where only 1 operand is being inserted by
24910 // creating an extract of the other operand, but that requires checking
24911 // number of uses and/or costs.
24912 if (!Sub0 || !Sub1)
24913 return SDValue();
24914
24915 // We are inserting both operands of the wide binop only to extract back
24916 // to the narrow vector size. Eliminate all of the insert/extract:
24917 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24918 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24919 BinOp->getFlags());
24920}
24921
24922/// If we are extracting a subvector produced by a wide binary operator try
24923/// to use a narrow binary operator and/or avoid concatenation and extraction.
24925 bool LegalOperations) {
24926 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24927 // some of these bailouts with other transforms.
24928
24929 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24930 return V;
24931
24932 // The extract index must be a constant, so we can map it to a concat operand.
24933 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24934 if (!ExtractIndexC)
24935 return SDValue();
24936
24937 // We are looking for an optionally bitcasted wide vector binary operator
24938 // feeding an extract subvector.
24939 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24940 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24941 unsigned BOpcode = BinOp.getOpcode();
24942 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24943 return SDValue();
24944
24945 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24946 // reduced to the unary fneg when it is visited, and we probably want to deal
24947 // with fneg in a target-specific way.
24948 if (BOpcode == ISD::FSUB) {
24949 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24950 if (C && C->getValueAPF().isNegZero())
24951 return SDValue();
24952 }
24953
24954 // The binop must be a vector type, so we can extract some fraction of it.
24955 EVT WideBVT = BinOp.getValueType();
24956 // The optimisations below currently assume we are dealing with fixed length
24957 // vectors. It is possible to add support for scalable vectors, but at the
24958 // moment we've done no analysis to prove whether they are profitable or not.
24959 if (!WideBVT.isFixedLengthVector())
24960 return SDValue();
24961
24962 EVT VT = Extract->getValueType(0);
24963 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24964 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24965 "Extract index is not a multiple of the vector length.");
24966
24967 // Bail out if this is not a proper multiple width extraction.
24968 unsigned WideWidth = WideBVT.getSizeInBits();
24969 unsigned NarrowWidth = VT.getSizeInBits();
24970 if (WideWidth % NarrowWidth != 0)
24971 return SDValue();
24972
24973 // Bail out if we are extracting a fraction of a single operation. This can
24974 // occur because we potentially looked through a bitcast of the binop.
24975 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24976 unsigned WideNumElts = WideBVT.getVectorNumElements();
24977 if (WideNumElts % NarrowingRatio != 0)
24978 return SDValue();
24979
24980 // Bail out if the target does not support a narrower version of the binop.
24981 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24982 WideNumElts / NarrowingRatio);
24983 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24984 LegalOperations))
24985 return SDValue();
24986
24987 // If extraction is cheap, we don't need to look at the binop operands
24988 // for concat ops. The narrow binop alone makes this transform profitable.
24989 // We can't just reuse the original extract index operand because we may have
24990 // bitcasted.
24991 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24992 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24993 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24994 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24995 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24996 SDLoc DL(Extract);
24997 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24998 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24999 BinOp.getOperand(0), NewExtIndex);
25000 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25001 BinOp.getOperand(1), NewExtIndex);
25002 SDValue NarrowBinOp =
25003 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25004 return DAG.getBitcast(VT, NarrowBinOp);
25005 }
25006
25007 // Only handle the case where we are doubling and then halving. A larger ratio
25008 // may require more than two narrow binops to replace the wide binop.
25009 if (NarrowingRatio != 2)
25010 return SDValue();
25011
25012 // TODO: The motivating case for this transform is an x86 AVX1 target. That
25013 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25014 // flavors, but no other 256-bit integer support. This could be extended to
25015 // handle any binop, but that may require fixing/adding other folds to avoid
25016 // codegen regressions.
25017 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25018 return SDValue();
25019
25020 // We need at least one concatenation operation of a binop operand to make
25021 // this transform worthwhile. The concat must double the input vector sizes.
25022 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25023 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25024 return V.getOperand(ConcatOpNum);
25025 return SDValue();
25026 };
25027 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25028 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25029
25030 if (SubVecL || SubVecR) {
25031 // If a binop operand was not the result of a concat, we must extract a
25032 // half-sized operand for our new narrow binop:
25033 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25034 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25035 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25036 SDLoc DL(Extract);
25037 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25038 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25039 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25040 BinOp.getOperand(0), IndexC);
25041
25042 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25043 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25044 BinOp.getOperand(1), IndexC);
25045
25046 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25047 return DAG.getBitcast(VT, NarrowBinOp);
25048 }
25049
25050 return SDValue();
25051}
25052
25053/// If we are extracting a subvector from a wide vector load, convert to a
25054/// narrow load to eliminate the extraction:
25055/// (extract_subvector (load wide vector)) --> (load narrow vector)
25057 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25058 if (DAG.getDataLayout().isBigEndian())
25059 return SDValue();
25060
25061 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
25062 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
25063 return SDValue();
25064
25065 // Allow targets to opt-out.
25066 EVT VT = Extract->getValueType(0);
25067
25068 // We can only create byte sized loads.
25069 if (!VT.isByteSized())
25070 return SDValue();
25071
25072 unsigned Index = Extract->getConstantOperandVal(1);
25073 unsigned NumElts = VT.getVectorMinNumElements();
25074 // A fixed length vector being extracted from a scalable vector
25075 // may not be any *smaller* than the scalable one.
25076 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25077 return SDValue();
25078
25079 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25080 // multiple of the minimum number of elements in the result type.
25081 assert(Index % NumElts == 0 && "The extract subvector index is not a "
25082 "multiple of the result's element count");
25083
25084 // It's fine to use TypeSize here as we know the offset will not be negative.
25085 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25086
25087 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25088 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25089 return SDValue();
25090
25091 // The narrow load will be offset from the base address of the old load if
25092 // we are extracting from something besides index 0 (little-endian).
25093 SDLoc DL(Extract);
25094
25095 // TODO: Use "BaseIndexOffset" to make this more effective.
25096 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25097
25100 MachineMemOperand *MMO;
25101 if (Offset.isScalable()) {
25102 MachinePointerInfo MPI =
25104 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
25105 } else
25106 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25107 StoreSize);
25108
25109 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25110 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25111 return NewLd;
25112}
25113
25114/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25115/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25116/// EXTRACT_SUBVECTOR(Op?, ?),
25117/// Mask'))
25118/// iff it is legal and profitable to do so. Notably, the trimmed mask
25119/// (containing only the elements that are extracted)
25120/// must reference at most two subvectors.
25122 SelectionDAG &DAG,
25123 const TargetLowering &TLI,
25124 bool LegalOperations) {
25125 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25126 "Must only be called on EXTRACT_SUBVECTOR's");
25127
25128 SDValue N0 = N->getOperand(0);
25129
25130 // Only deal with non-scalable vectors.
25131 EVT NarrowVT = N->getValueType(0);
25132 EVT WideVT = N0.getValueType();
25133 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25134 return SDValue();
25135
25136 // The operand must be a shufflevector.
25137 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
25138 if (!WideShuffleVector)
25139 return SDValue();
25140
25141 // The old shuffleneeds to go away.
25142 if (!WideShuffleVector->hasOneUse())
25143 return SDValue();
25144
25145 // And the narrow shufflevector that we'll form must be legal.
25146 if (LegalOperations &&
25148 return SDValue();
25149
25150 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
25151 int NumEltsExtracted = NarrowVT.getVectorNumElements();
25152 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
25153 "Extract index is not a multiple of the output vector length.");
25154
25155 int WideNumElts = WideVT.getVectorNumElements();
25156
25157 SmallVector<int, 16> NewMask;
25158 NewMask.reserve(NumEltsExtracted);
25159 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
25160 DemandedSubvectors;
25161
25162 // Try to decode the wide mask into narrow mask from at most two subvectors.
25163 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
25164 NumEltsExtracted)) {
25165 assert((M >= -1) && (M < (2 * WideNumElts)) &&
25166 "Out-of-bounds shuffle mask?");
25167
25168 if (M < 0) {
25169 // Does not depend on operands, does not require adjustment.
25170 NewMask.emplace_back(M);
25171 continue;
25172 }
25173
25174 // From which operand of the shuffle does this shuffle mask element pick?
25175 int WideShufOpIdx = M / WideNumElts;
25176 // Which element of that operand is picked?
25177 int OpEltIdx = M % WideNumElts;
25178
25179 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
25180 "Shuffle mask vector decomposition failure.");
25181
25182 // And which NumEltsExtracted-sized subvector of that operand is that?
25183 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
25184 // And which element within that subvector of that operand is that?
25185 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
25186
25187 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
25188 "Shuffle mask subvector decomposition failure.");
25189
25190 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
25191 WideShufOpIdx * WideNumElts) == M &&
25192 "Shuffle mask full decomposition failure.");
25193
25194 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25195
25196 if (Op.isUndef()) {
25197 // Picking from an undef operand. Let's adjust mask instead.
25198 NewMask.emplace_back(-1);
25199 continue;
25200 }
25201
25202 const std::pair<SDValue, int> DemandedSubvector =
25203 std::make_pair(Op, OpSubvecIdx);
25204
25205 if (DemandedSubvectors.insert(DemandedSubvector)) {
25206 if (DemandedSubvectors.size() > 2)
25207 return SDValue(); // We can't handle more than two subvectors.
25208 // How many elements into the WideVT does this subvector start?
25209 int Index = NumEltsExtracted * OpSubvecIdx;
25210 // Bail out if the extraction isn't going to be cheap.
25211 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
25212 return SDValue();
25213 }
25214
25215 // Ok, but from which operand of the new shuffle will this element pick?
25216 int NewOpIdx =
25217 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
25218 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
25219
25220 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
25221 NewMask.emplace_back(AdjM);
25222 }
25223 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
25224 assert(DemandedSubvectors.size() <= 2 &&
25225 "Should have ended up demanding at most two subvectors.");
25226
25227 // Did we discover that the shuffle does not actually depend on operands?
25228 if (DemandedSubvectors.empty())
25229 return DAG.getUNDEF(NarrowVT);
25230
25231 // Profitability check: only deal with extractions from the first subvector
25232 // unless the mask becomes an identity mask.
25233 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
25234 any_of(NewMask, [](int M) { return M < 0; }))
25235 for (auto &DemandedSubvector : DemandedSubvectors)
25236 if (DemandedSubvector.second != 0)
25237 return SDValue();
25238
25239 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
25240 // operand[s]/index[es], so there is no point in checking for it's legality.
25241
25242 // Do not turn a legal shuffle into an illegal one.
25243 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
25244 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
25245 return SDValue();
25246
25247 SDLoc DL(N);
25248
25250 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
25251 &DemandedSubvector : DemandedSubvectors) {
25252 // How many elements into the WideVT does this subvector start?
25253 int Index = NumEltsExtracted * DemandedSubvector.second;
25254 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
25255 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
25256 DemandedSubvector.first, IndexC));
25257 }
25258 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
25259 "Should end up with either one or two ops");
25260
25261 // If we ended up with only one operand, pad with an undef.
25262 if (NewOps.size() == 1)
25263 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
25264
25265 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
25266}
25267
25268SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
25269 EVT NVT = N->getValueType(0);
25270 SDValue V = N->getOperand(0);
25271 uint64_t ExtIdx = N->getConstantOperandVal(1);
25272 SDLoc DL(N);
25273
25274 // Extract from UNDEF is UNDEF.
25275 if (V.isUndef())
25276 return DAG.getUNDEF(NVT);
25277
25279 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
25280 return NarrowLoad;
25281
25282 // Combine an extract of an extract into a single extract_subvector.
25283 // ext (ext X, C), 0 --> ext X, C
25284 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
25285 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
25286 V.getConstantOperandVal(1)) &&
25288 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
25289 V.getOperand(1));
25290 }
25291 }
25292
25293 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
25294 if (V.getOpcode() == ISD::SPLAT_VECTOR)
25295 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
25296 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
25297 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
25298
25299 // extract_subvector(insert_subvector(x,y,c1),c2)
25300 // --> extract_subvector(y,c2-c1)
25301 // iff we're just extracting from the inserted subvector.
25302 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25303 SDValue InsSub = V.getOperand(1);
25304 EVT InsSubVT = InsSub.getValueType();
25305 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
25306 unsigned InsIdx = V.getConstantOperandVal(2);
25307 unsigned NumSubElts = NVT.getVectorMinNumElements();
25308 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
25309 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
25310 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
25311 V.getValueType().isFixedLengthVector())
25312 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
25313 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
25314 }
25315
25316 // Try to move vector bitcast after extract_subv by scaling extraction index:
25317 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
25318 if (V.getOpcode() == ISD::BITCAST &&
25319 V.getOperand(0).getValueType().isVector() &&
25320 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
25321 SDValue SrcOp = V.getOperand(0);
25322 EVT SrcVT = SrcOp.getValueType();
25323 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
25324 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
25325 if ((SrcNumElts % DestNumElts) == 0) {
25326 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
25327 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
25328 EVT NewExtVT =
25329 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
25331 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
25332 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25333 V.getOperand(0), NewIndex);
25334 return DAG.getBitcast(NVT, NewExtract);
25335 }
25336 }
25337 if ((DestNumElts % SrcNumElts) == 0) {
25338 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
25339 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
25340 ElementCount NewExtEC =
25341 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
25342 EVT ScalarVT = SrcVT.getScalarType();
25343 if ((ExtIdx % DestSrcRatio) == 0) {
25344 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
25345 EVT NewExtVT =
25346 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
25348 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25349 SDValue NewExtract =
25350 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25351 V.getOperand(0), NewIndex);
25352 return DAG.getBitcast(NVT, NewExtract);
25353 }
25354 if (NewExtEC.isScalar() &&
25356 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25357 SDValue NewExtract =
25358 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
25359 V.getOperand(0), NewIndex);
25360 return DAG.getBitcast(NVT, NewExtract);
25361 }
25362 }
25363 }
25364 }
25365 }
25366
25367 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
25368 unsigned ExtNumElts = NVT.getVectorMinNumElements();
25369 EVT ConcatSrcVT = V.getOperand(0).getValueType();
25370 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
25371 "Concat and extract subvector do not change element type");
25372 assert((ExtIdx % ExtNumElts) == 0 &&
25373 "Extract index is not a multiple of the input vector length.");
25374
25375 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
25376 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
25377
25378 // If the concatenated source types match this extract, it's a direct
25379 // simplification:
25380 // extract_subvec (concat V1, V2, ...), i --> Vi
25381 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
25382 return V.getOperand(ConcatOpIdx);
25383
25384 // If the concatenated source vectors are a multiple length of this extract,
25385 // then extract a fraction of one of those source vectors directly from a
25386 // concat operand. Example:
25387 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
25388 // v2i8 extract_subvec v8i8 Y, 6
25389 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
25390 ConcatSrcNumElts % ExtNumElts == 0) {
25391 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
25392 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
25393 "Trying to extract from >1 concat operand?");
25394 assert(NewExtIdx % ExtNumElts == 0 &&
25395 "Extract index is not a multiple of the input vector length.");
25396 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
25397 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
25398 V.getOperand(ConcatOpIdx), NewIndexC);
25399 }
25400 }
25401
25402 if (SDValue V =
25403 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
25404 return V;
25405
25407
25408 // If the input is a build vector. Try to make a smaller build vector.
25409 if (V.getOpcode() == ISD::BUILD_VECTOR) {
25410 EVT InVT = V.getValueType();
25411 unsigned ExtractSize = NVT.getSizeInBits();
25412 unsigned EltSize = InVT.getScalarSizeInBits();
25413 // Only do this if we won't split any elements.
25414 if (ExtractSize % EltSize == 0) {
25415 unsigned NumElems = ExtractSize / EltSize;
25416 EVT EltVT = InVT.getVectorElementType();
25417 EVT ExtractVT =
25418 NumElems == 1 ? EltVT
25419 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
25420 if ((Level < AfterLegalizeDAG ||
25421 (NumElems == 1 ||
25422 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
25423 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
25424 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
25425
25426 if (NumElems == 1) {
25427 SDValue Src = V->getOperand(IdxVal);
25428 if (EltVT != Src.getValueType())
25429 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
25430 return DAG.getBitcast(NVT, Src);
25431 }
25432
25433 // Extract the pieces from the original build_vector.
25434 SDValue BuildVec =
25435 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
25436 return DAG.getBitcast(NVT, BuildVec);
25437 }
25438 }
25439 }
25440
25441 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25442 // Handle only simple case where vector being inserted and vector
25443 // being extracted are of same size.
25444 EVT SmallVT = V.getOperand(1).getValueType();
25445 if (!NVT.bitsEq(SmallVT))
25446 return SDValue();
25447
25448 // Combine:
25449 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
25450 // Into:
25451 // indices are equal or bit offsets are equal => V1
25452 // otherwise => (extract_subvec V1, ExtIdx)
25453 uint64_t InsIdx = V.getConstantOperandVal(2);
25454 if (InsIdx * SmallVT.getScalarSizeInBits() ==
25455 ExtIdx * NVT.getScalarSizeInBits()) {
25456 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
25457 return SDValue();
25458
25459 return DAG.getBitcast(NVT, V.getOperand(1));
25460 }
25461 return DAG.getNode(
25463 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
25464 N->getOperand(1));
25465 }
25466
25467 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
25468 return NarrowBOp;
25469
25471 return SDValue(N, 0);
25472
25473 return SDValue();
25474}
25475
25476/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
25477/// followed by concatenation. Narrow vector ops may have better performance
25478/// than wide ops, and this can unlock further narrowing of other vector ops.
25479/// Targets can invert this transform later if it is not profitable.
25481 SelectionDAG &DAG) {
25482 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
25483 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
25484 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
25485 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
25486 return SDValue();
25487
25488 // Split the wide shuffle mask into halves. Any mask element that is accessing
25489 // operand 1 is offset down to account for narrowing of the vectors.
25490 ArrayRef<int> Mask = Shuf->getMask();
25491 EVT VT = Shuf->getValueType(0);
25492 unsigned NumElts = VT.getVectorNumElements();
25493 unsigned HalfNumElts = NumElts / 2;
25494 SmallVector<int, 16> Mask0(HalfNumElts, -1);
25495 SmallVector<int, 16> Mask1(HalfNumElts, -1);
25496 for (unsigned i = 0; i != NumElts; ++i) {
25497 if (Mask[i] == -1)
25498 continue;
25499 // If we reference the upper (undef) subvector then the element is undef.
25500 if ((Mask[i] % NumElts) >= HalfNumElts)
25501 continue;
25502 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
25503 if (i < HalfNumElts)
25504 Mask0[i] = M;
25505 else
25506 Mask1[i - HalfNumElts] = M;
25507 }
25508
25509 // Ask the target if this is a valid transform.
25510 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25511 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
25512 HalfNumElts);
25513 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
25514 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
25515 return SDValue();
25516
25517 // shuffle (concat X, undef), (concat Y, undef), Mask -->
25518 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
25519 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
25520 SDLoc DL(Shuf);
25521 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
25522 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
25523 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
25524}
25525
25526// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
25527// or turn a shuffle of a single concat into simpler shuffle then concat.
25529 EVT VT = N->getValueType(0);
25530 unsigned NumElts = VT.getVectorNumElements();
25531
25532 SDValue N0 = N->getOperand(0);
25533 SDValue N1 = N->getOperand(1);
25534 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25535 ArrayRef<int> Mask = SVN->getMask();
25536
25538 EVT ConcatVT = N0.getOperand(0).getValueType();
25539 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
25540 unsigned NumConcats = NumElts / NumElemsPerConcat;
25541
25542 auto IsUndefMaskElt = [](int i) { return i == -1; };
25543
25544 // Special case: shuffle(concat(A,B)) can be more efficiently represented
25545 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
25546 // half vector elements.
25547 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
25548 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
25549 IsUndefMaskElt)) {
25550 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
25551 N0.getOperand(1),
25552 Mask.slice(0, NumElemsPerConcat));
25553 N1 = DAG.getUNDEF(ConcatVT);
25554 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
25555 }
25556
25557 // Look at every vector that's inserted. We're looking for exact
25558 // subvector-sized copies from a concatenated vector
25559 for (unsigned I = 0; I != NumConcats; ++I) {
25560 unsigned Begin = I * NumElemsPerConcat;
25561 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25562
25563 // Make sure we're dealing with a copy.
25564 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25565 Ops.push_back(DAG.getUNDEF(ConcatVT));
25566 continue;
25567 }
25568
25569 int OpIdx = -1;
25570 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25571 if (IsUndefMaskElt(SubMask[i]))
25572 continue;
25573 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25574 return SDValue();
25575 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25576 if (0 <= OpIdx && EltOpIdx != OpIdx)
25577 return SDValue();
25578 OpIdx = EltOpIdx;
25579 }
25580 assert(0 <= OpIdx && "Unknown concat_vectors op");
25581
25582 if (OpIdx < (int)N0.getNumOperands())
25583 Ops.push_back(N0.getOperand(OpIdx));
25584 else
25585 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25586 }
25587
25588 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25589}
25590
25591// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25592// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25593//
25594// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25595// a simplification in some sense, but it isn't appropriate in general: some
25596// BUILD_VECTORs are substantially cheaper than others. The general case
25597// of a BUILD_VECTOR requires inserting each element individually (or
25598// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25599// all constants is a single constant pool load. A BUILD_VECTOR where each
25600// element is identical is a splat. A BUILD_VECTOR where most of the operands
25601// are undef lowers to a small number of element insertions.
25602//
25603// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25604// We don't fold shuffles where one side is a non-zero constant, and we don't
25605// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25606// non-constant operands. This seems to work out reasonably well in practice.
25608 SelectionDAG &DAG,
25609 const TargetLowering &TLI) {
25610 EVT VT = SVN->getValueType(0);
25611 unsigned NumElts = VT.getVectorNumElements();
25612 SDValue N0 = SVN->getOperand(0);
25613 SDValue N1 = SVN->getOperand(1);
25614
25615 if (!N0->hasOneUse())
25616 return SDValue();
25617
25618 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25619 // discussed above.
25620 if (!N1.isUndef()) {
25621 if (!N1->hasOneUse())
25622 return SDValue();
25623
25624 bool N0AnyConst = isAnyConstantBuildVector(N0);
25625 bool N1AnyConst = isAnyConstantBuildVector(N1);
25626 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25627 return SDValue();
25628 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25629 return SDValue();
25630 }
25631
25632 // If both inputs are splats of the same value then we can safely merge this
25633 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25634 bool IsSplat = false;
25635 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25636 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25637 if (BV0 && BV1)
25638 if (SDValue Splat0 = BV0->getSplatValue())
25639 IsSplat = (Splat0 == BV1->getSplatValue());
25640
25642 SmallSet<SDValue, 16> DuplicateOps;
25643 for (int M : SVN->getMask()) {
25644 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25645 if (M >= 0) {
25646 int Idx = M < (int)NumElts ? M : M - NumElts;
25647 SDValue &S = (M < (int)NumElts ? N0 : N1);
25648 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25649 Op = S.getOperand(Idx);
25650 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25651 SDValue Op0 = S.getOperand(0);
25652 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25653 } else {
25654 // Operand can't be combined - bail out.
25655 return SDValue();
25656 }
25657 }
25658
25659 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25660 // generating a splat; semantically, this is fine, but it's likely to
25661 // generate low-quality code if the target can't reconstruct an appropriate
25662 // shuffle.
25663 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25664 if (!IsSplat && !DuplicateOps.insert(Op).second)
25665 return SDValue();
25666
25667 Ops.push_back(Op);
25668 }
25669
25670 // BUILD_VECTOR requires all inputs to be of the same type, find the
25671 // maximum type and extend them all.
25672 EVT SVT = VT.getScalarType();
25673 if (SVT.isInteger())
25674 for (SDValue &Op : Ops)
25675 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25676 if (SVT != VT.getScalarType())
25677 for (SDValue &Op : Ops)
25678 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25679 : (TLI.isZExtFree(Op.getValueType(), SVT)
25680 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25681 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25682 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25683}
25684
25685// Match shuffles that can be converted to *_vector_extend_in_reg.
25686// This is often generated during legalization.
25687// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25688// and returns the EVT to which the extension should be performed.
25689// NOTE: this assumes that the src is the first operand of the shuffle.
25691 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25692 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25693 bool LegalOperations) {
25694 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25695
25696 // TODO Add support for big-endian when we have a test case.
25697 if (!VT.isInteger() || IsBigEndian)
25698 return std::nullopt;
25699
25700 unsigned NumElts = VT.getVectorNumElements();
25701 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25702
25703 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25704 // power-of-2 extensions as they are the most likely.
25705 // FIXME: should try Scale == NumElts case too,
25706 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25707 // The vector width must be a multiple of Scale.
25708 if (NumElts % Scale != 0)
25709 continue;
25710
25711 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25712 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25713
25714 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25715 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25716 continue;
25717
25718 if (Match(Scale))
25719 return OutVT;
25720 }
25721
25722 return std::nullopt;
25723}
25724
25725// Match shuffles that can be converted to any_vector_extend_in_reg.
25726// This is often generated during legalization.
25727// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25729 SelectionDAG &DAG,
25730 const TargetLowering &TLI,
25731 bool LegalOperations) {
25732 EVT VT = SVN->getValueType(0);
25733 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25734
25735 // TODO Add support for big-endian when we have a test case.
25736 if (!VT.isInteger() || IsBigEndian)
25737 return SDValue();
25738
25739 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25740 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25741 Mask = SVN->getMask()](unsigned Scale) {
25742 for (unsigned i = 0; i != NumElts; ++i) {
25743 if (Mask[i] < 0)
25744 continue;
25745 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25746 continue;
25747 return false;
25748 }
25749 return true;
25750 };
25751
25752 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25753 SDValue N0 = SVN->getOperand(0);
25754 // Never create an illegal type. Only create unsupported operations if we
25755 // are pre-legalization.
25756 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25757 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25758 if (!OutVT)
25759 return SDValue();
25760 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25761}
25762
25763// Match shuffles that can be converted to zero_extend_vector_inreg.
25764// This is often generated during legalization.
25765// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25767 SelectionDAG &DAG,
25768 const TargetLowering &TLI,
25769 bool LegalOperations) {
25770 bool LegalTypes = true;
25771 EVT VT = SVN->getValueType(0);
25772 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25773 unsigned NumElts = VT.getVectorNumElements();
25774 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25775
25776 // TODO: add support for big-endian when we have a test case.
25777 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25778 if (!VT.isInteger() || IsBigEndian)
25779 return SDValue();
25780
25781 SmallVector<int, 16> Mask(SVN->getMask());
25782 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25783 for (int &Indice : Mask) {
25784 if (Indice < 0)
25785 continue;
25786 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25787 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25788 Fn(Indice, OpIdx, OpEltIdx);
25789 }
25790 };
25791
25792 // Which elements of which operand does this shuffle demand?
25793 std::array<APInt, 2> OpsDemandedElts;
25794 for (APInt &OpDemandedElts : OpsDemandedElts)
25795 OpDemandedElts = APInt::getZero(NumElts);
25796 ForEachDecomposedIndice(
25797 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25798 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25799 });
25800
25801 // Element-wise(!), which of these demanded elements are know to be zero?
25802 std::array<APInt, 2> OpsKnownZeroElts;
25803 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25804 std::get<2>(I) =
25805 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25806
25807 // Manifest zeroable element knowledge in the shuffle mask.
25808 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25809 // this is a local invention, but it won't leak into DAG.
25810 // FIXME: should we not manifest them, but just check when matching?
25811 bool HadZeroableElts = false;
25812 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25813 int &Indice, int OpIdx, int OpEltIdx) {
25814 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25815 Indice = -2; // Zeroable element.
25816 HadZeroableElts = true;
25817 }
25818 });
25819
25820 // Don't proceed unless we've refined at least one zeroable mask indice.
25821 // If we didn't, then we are still trying to match the same shuffle mask
25822 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25823 // and evidently failed. Proceeding will lead to endless combine loops.
25824 if (!HadZeroableElts)
25825 return SDValue();
25826
25827 // The shuffle may be more fine-grained than we want. Widen elements first.
25828 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25829 SmallVector<int, 16> ScaledMask;
25830 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25831 assert(Mask.size() >= ScaledMask.size() &&
25832 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25833 int Prescale = Mask.size() / ScaledMask.size();
25834
25835 NumElts = ScaledMask.size();
25836 EltSizeInBits *= Prescale;
25837
25838 EVT PrescaledVT = EVT::getVectorVT(
25839 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25840 NumElts);
25841
25842 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25843 return SDValue();
25844
25845 // For example,
25846 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25847 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25848 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25849 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25850 "Unexpected mask scaling factor.");
25851 ArrayRef<int> Mask = ScaledMask;
25852 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25853 SrcElt != NumSrcElts; ++SrcElt) {
25854 // Analyze the shuffle mask in Scale-sized chunks.
25855 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25856 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25857 Mask = Mask.drop_front(MaskChunk.size());
25858 // The first indice in this chunk must be SrcElt, but not zero!
25859 // FIXME: undef should be fine, but that results in more-defined result.
25860 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25861 return false;
25862 // The rest of the indices in this chunk must be zeros.
25863 // FIXME: undef should be fine, but that results in more-defined result.
25864 if (!all_of(MaskChunk.drop_front(1),
25865 [](int Indice) { return Indice == -2; }))
25866 return false;
25867 }
25868 assert(Mask.empty() && "Did not process the whole mask?");
25869 return true;
25870 };
25871
25872 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25873 for (bool Commuted : {false, true}) {
25874 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25875 if (Commuted)
25877 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25878 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25879 LegalOperations);
25880 if (OutVT)
25881 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25882 DAG.getBitcast(PrescaledVT, Op)));
25883 }
25884 return SDValue();
25885}
25886
25887// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25888// each source element of a large type into the lowest elements of a smaller
25889// destination type. This is often generated during legalization.
25890// If the source node itself was a '*_extend_vector_inreg' node then we should
25891// then be able to remove it.
25893 SelectionDAG &DAG) {
25894 EVT VT = SVN->getValueType(0);
25895 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25896
25897 // TODO Add support for big-endian when we have a test case.
25898 if (!VT.isInteger() || IsBigEndian)
25899 return SDValue();
25900
25902
25903 unsigned Opcode = N0.getOpcode();
25904 if (!ISD::isExtVecInRegOpcode(Opcode))
25905 return SDValue();
25906
25907 SDValue N00 = N0.getOperand(0);
25908 ArrayRef<int> Mask = SVN->getMask();
25909 unsigned NumElts = VT.getVectorNumElements();
25910 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25911 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25912 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25913
25914 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25915 return SDValue();
25916 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25917
25918 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25919 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25920 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25921 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25922 for (unsigned i = 0; i != NumElts; ++i) {
25923 if (Mask[i] < 0)
25924 continue;
25925 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25926 continue;
25927 return false;
25928 }
25929 return true;
25930 };
25931
25932 // At the moment we just handle the case where we've truncated back to the
25933 // same size as before the extension.
25934 // TODO: handle more extension/truncation cases as cases arise.
25935 if (EltSizeInBits != ExtSrcSizeInBits)
25936 return SDValue();
25937
25938 // We can remove *extend_vector_inreg only if the truncation happens at
25939 // the same scale as the extension.
25940 if (isTruncate(ExtScale))
25941 return DAG.getBitcast(VT, N00);
25942
25943 return SDValue();
25944}
25945
25946// Combine shuffles of splat-shuffles of the form:
25947// shuffle (shuffle V, undef, splat-mask), undef, M
25948// If splat-mask contains undef elements, we need to be careful about
25949// introducing undef's in the folded mask which are not the result of composing
25950// the masks of the shuffles.
25952 SelectionDAG &DAG) {
25953 EVT VT = Shuf->getValueType(0);
25954 unsigned NumElts = VT.getVectorNumElements();
25955
25956 if (!Shuf->getOperand(1).isUndef())
25957 return SDValue();
25958
25959 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25960 // in disguise, with all demanded elements being identical.
25961 // FIXME: this can be done per-operand.
25962 if (!Shuf->isSplat()) {
25963 APInt DemandedElts(NumElts, 0);
25964 for (int Idx : Shuf->getMask()) {
25965 if (Idx < 0)
25966 continue; // Ignore sentinel indices.
25967 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25968 DemandedElts.setBit(Idx);
25969 }
25970 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25971 APInt UndefElts;
25972 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25973 // Even if all demanded elements are splat, some of them could be undef.
25974 // Which lowest demanded element is *not* known-undef?
25975 std::optional<unsigned> MinNonUndefIdx;
25976 for (int Idx : Shuf->getMask()) {
25977 if (Idx < 0 || UndefElts[Idx])
25978 continue; // Ignore sentinel indices, and undef elements.
25979 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25980 }
25981 if (!MinNonUndefIdx)
25982 return DAG.getUNDEF(VT); // All undef - result is undef.
25983 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25984 SmallVector<int, 8> SplatMask(Shuf->getMask());
25985 for (int &Idx : SplatMask) {
25986 if (Idx < 0)
25987 continue; // Passthrough sentinel indices.
25988 // Otherwise, just pick the lowest demanded non-undef element.
25989 // Or sentinel undef, if we know we'd pick a known-undef element.
25990 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25991 }
25992 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25993 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25994 Shuf->getOperand(1), SplatMask);
25995 }
25996 }
25997
25998 // If the inner operand is a known splat with no undefs, just return that directly.
25999 // TODO: Create DemandedElts mask from Shuf's mask.
26000 // TODO: Allow undef elements and merge with the shuffle code below.
26001 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26002 return Shuf->getOperand(0);
26003
26004 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26005 if (!Splat || !Splat->isSplat())
26006 return SDValue();
26007
26008 ArrayRef<int> ShufMask = Shuf->getMask();
26009 ArrayRef<int> SplatMask = Splat->getMask();
26010 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26011
26012 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26013 // every undef mask element in the splat-shuffle has a corresponding undef
26014 // element in the user-shuffle's mask or if the composition of mask elements
26015 // would result in undef.
26016 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26017 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26018 // In this case it is not legal to simplify to the splat-shuffle because we
26019 // may be exposing the users of the shuffle an undef element at index 1
26020 // which was not there before the combine.
26021 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26022 // In this case the composition of masks yields SplatMask, so it's ok to
26023 // simplify to the splat-shuffle.
26024 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26025 // In this case the composed mask includes all undef elements of SplatMask
26026 // and in addition sets element zero to undef. It is safe to simplify to
26027 // the splat-shuffle.
26028 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26029 ArrayRef<int> SplatMask) {
26030 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26031 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26032 SplatMask[UserMask[i]] != -1)
26033 return false;
26034 return true;
26035 };
26036 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26037 return Shuf->getOperand(0);
26038
26039 // Create a new shuffle with a mask that is composed of the two shuffles'
26040 // masks.
26041 SmallVector<int, 32> NewMask;
26042 for (int Idx : ShufMask)
26043 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26044
26045 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26046 Splat->getOperand(0), Splat->getOperand(1),
26047 NewMask);
26048}
26049
26050// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26051// the mask can be treated as a larger type.
26053 SelectionDAG &DAG,
26054 const TargetLowering &TLI,
26055 bool LegalOperations) {
26056 SDValue Op0 = SVN->getOperand(0);
26057 SDValue Op1 = SVN->getOperand(1);
26058 EVT VT = SVN->getValueType(0);
26059 if (Op0.getOpcode() != ISD::BITCAST)
26060 return SDValue();
26061 EVT InVT = Op0.getOperand(0).getValueType();
26062 if (!InVT.isVector() ||
26063 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26064 Op1.getOperand(0).getValueType() != InVT)))
26065 return SDValue();
26067 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26068 return SDValue();
26069
26070 int VTLanes = VT.getVectorNumElements();
26071 int InLanes = InVT.getVectorNumElements();
26072 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26073 (LegalOperations &&
26075 return SDValue();
26076 int Factor = VTLanes / InLanes;
26077
26078 // Check that each group of lanes in the mask are either undef or make a valid
26079 // mask for the wider lane type.
26080 ArrayRef<int> Mask = SVN->getMask();
26081 SmallVector<int> NewMask;
26082 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26083 return SDValue();
26084
26085 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26086 return SDValue();
26087
26088 // Create the new shuffle with the new mask and bitcast it back to the
26089 // original type.
26090 SDLoc DL(SVN);
26091 Op0 = Op0.getOperand(0);
26092 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26093 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26094 return DAG.getBitcast(VT, NewShuf);
26095}
26096
26097/// Combine shuffle of shuffle of the form:
26098/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26100 SelectionDAG &DAG) {
26101 if (!OuterShuf->getOperand(1).isUndef())
26102 return SDValue();
26103 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26104 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26105 return SDValue();
26106
26107 ArrayRef<int> OuterMask = OuterShuf->getMask();
26108 ArrayRef<int> InnerMask = InnerShuf->getMask();
26109 unsigned NumElts = OuterMask.size();
26110 assert(NumElts == InnerMask.size() && "Mask length mismatch");
26111 SmallVector<int, 32> CombinedMask(NumElts, -1);
26112 int SplatIndex = -1;
26113 for (unsigned i = 0; i != NumElts; ++i) {
26114 // Undef lanes remain undef.
26115 int OuterMaskElt = OuterMask[i];
26116 if (OuterMaskElt == -1)
26117 continue;
26118
26119 // Peek through the shuffle masks to get the underlying source element.
26120 int InnerMaskElt = InnerMask[OuterMaskElt];
26121 if (InnerMaskElt == -1)
26122 continue;
26123
26124 // Initialize the splatted element.
26125 if (SplatIndex == -1)
26126 SplatIndex = InnerMaskElt;
26127
26128 // Non-matching index - this is not a splat.
26129 if (SplatIndex != InnerMaskElt)
26130 return SDValue();
26131
26132 CombinedMask[i] = InnerMaskElt;
26133 }
26134 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26135 getSplatIndex(CombinedMask) != -1) &&
26136 "Expected a splat mask");
26137
26138 // TODO: The transform may be a win even if the mask is not legal.
26139 EVT VT = OuterShuf->getValueType(0);
26140 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26141 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
26142 return SDValue();
26143
26144 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26145 InnerShuf->getOperand(1), CombinedMask);
26146}
26147
26148/// If the shuffle mask is taking exactly one element from the first vector
26149/// operand and passing through all other elements from the second vector
26150/// operand, return the index of the mask element that is choosing an element
26151/// from the first operand. Otherwise, return -1.
26153 int MaskSize = Mask.size();
26154 int EltFromOp0 = -1;
26155 // TODO: This does not match if there are undef elements in the shuffle mask.
26156 // Should we ignore undefs in the shuffle mask instead? The trade-off is
26157 // removing an instruction (a shuffle), but losing the knowledge that some
26158 // vector lanes are not needed.
26159 for (int i = 0; i != MaskSize; ++i) {
26160 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
26161 // We're looking for a shuffle of exactly one element from operand 0.
26162 if (EltFromOp0 != -1)
26163 return -1;
26164 EltFromOp0 = i;
26165 } else if (Mask[i] != i + MaskSize) {
26166 // Nothing from operand 1 can change lanes.
26167 return -1;
26168 }
26169 }
26170 return EltFromOp0;
26171}
26172
26173/// If a shuffle inserts exactly one element from a source vector operand into
26174/// another vector operand and we can access the specified element as a scalar,
26175/// then we can eliminate the shuffle.
26177 SelectionDAG &DAG) {
26178 // First, check if we are taking one element of a vector and shuffling that
26179 // element into another vector.
26180 ArrayRef<int> Mask = Shuf->getMask();
26181 SmallVector<int, 16> CommutedMask(Mask);
26182 SDValue Op0 = Shuf->getOperand(0);
26183 SDValue Op1 = Shuf->getOperand(1);
26184 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
26185 if (ShufOp0Index == -1) {
26186 // Commute mask and check again.
26188 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
26189 if (ShufOp0Index == -1)
26190 return SDValue();
26191 // Commute operands to match the commuted shuffle mask.
26192 std::swap(Op0, Op1);
26193 Mask = CommutedMask;
26194 }
26195
26196 // The shuffle inserts exactly one element from operand 0 into operand 1.
26197 // Now see if we can access that element as a scalar via a real insert element
26198 // instruction.
26199 // TODO: We can try harder to locate the element as a scalar. Examples: it
26200 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
26201 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
26202 "Shuffle mask value must be from operand 0");
26203
26204 SDValue Elt;
26205 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
26206 m_SpecificInt(Mask[ShufOp0Index])))) {
26207 // There's an existing insertelement with constant insertion index, so we
26208 // don't need to check the legality/profitability of a replacement operation
26209 // that differs at most in the constant value. The target should be able to
26210 // lower any of those in a similar way. If not, legalization will expand
26211 // this to a scalar-to-vector plus shuffle.
26212 //
26213 // Note that the shuffle may move the scalar from the position that the
26214 // insert element used. Therefore, our new insert element occurs at the
26215 // shuffle's mask index value, not the insert's index value.
26216 //
26217 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26218 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
26219 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
26220 Op1, Elt, NewInsIndex);
26221 }
26222
26223 return SDValue();
26224}
26225
26226/// If we have a unary shuffle of a shuffle, see if it can be folded away
26227/// completely. This has the potential to lose undef knowledge because the first
26228/// shuffle may not have an undef mask element where the second one does. So
26229/// only call this after doing simplifications based on demanded elements.
26231 // shuf (shuf0 X, Y, Mask0), undef, Mask
26232 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26233 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
26234 return SDValue();
26235
26236 ArrayRef<int> Mask = Shuf->getMask();
26237 ArrayRef<int> Mask0 = Shuf0->getMask();
26238 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
26239 // Ignore undef elements.
26240 if (Mask[i] == -1)
26241 continue;
26242 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
26243
26244 // Is the element of the shuffle operand chosen by this shuffle the same as
26245 // the element chosen by the shuffle operand itself?
26246 if (Mask0[Mask[i]] != Mask0[i])
26247 return SDValue();
26248 }
26249 // Every element of this shuffle is identical to the result of the previous
26250 // shuffle, so we can replace this value.
26251 return Shuf->getOperand(0);
26252}
26253
26254SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
26255 EVT VT = N->getValueType(0);
26256 unsigned NumElts = VT.getVectorNumElements();
26257
26258 SDValue N0 = N->getOperand(0);
26259 SDValue N1 = N->getOperand(1);
26260
26261 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
26262
26263 // Canonicalize shuffle undef, undef -> undef
26264 if (N0.isUndef() && N1.isUndef())
26265 return DAG.getUNDEF(VT);
26266
26267 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
26268
26269 // Canonicalize shuffle v, v -> v, undef
26270 if (N0 == N1)
26271 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
26272 createUnaryMask(SVN->getMask(), NumElts));
26273
26274 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
26275 if (N0.isUndef())
26276 return DAG.getCommutedVectorShuffle(*SVN);
26277
26278 // Remove references to rhs if it is undef
26279 if (N1.isUndef()) {
26280 bool Changed = false;
26281 SmallVector<int, 8> NewMask;
26282 for (unsigned i = 0; i != NumElts; ++i) {
26283 int Idx = SVN->getMaskElt(i);
26284 if (Idx >= (int)NumElts) {
26285 Idx = -1;
26286 Changed = true;
26287 }
26288 NewMask.push_back(Idx);
26289 }
26290 if (Changed)
26291 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
26292 }
26293
26294 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
26295 return InsElt;
26296
26297 // A shuffle of a single vector that is a splatted value can always be folded.
26298 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
26299 return V;
26300
26301 if (SDValue V = formSplatFromShuffles(SVN, DAG))
26302 return V;
26303
26304 // If it is a splat, check if the argument vector is another splat or a
26305 // build_vector.
26306 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
26307 int SplatIndex = SVN->getSplatIndex();
26308 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
26309 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
26310 // splat (vector_bo L, R), Index -->
26311 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
26312 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
26313 SDLoc DL(N);
26314 EVT EltVT = VT.getScalarType();
26315 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
26316 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
26317 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
26318 SDValue NewBO =
26319 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
26320 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
26322 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
26323 }
26324
26325 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
26326 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
26327 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
26328 N0.hasOneUse()) {
26329 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
26330 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
26331
26333 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
26334 if (Idx->getAPIntValue() == SplatIndex)
26335 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
26336
26337 // Look through a bitcast if LE and splatting lane 0, through to a
26338 // scalar_to_vector or a build_vector.
26339 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
26340 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
26343 EVT N00VT = N0.getOperand(0).getValueType();
26344 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
26345 VT.isInteger() && N00VT.isInteger()) {
26346 EVT InVT =
26349 SDLoc(N), InVT);
26350 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
26351 }
26352 }
26353 }
26354
26355 // If this is a bit convert that changes the element type of the vector but
26356 // not the number of vector elements, look through it. Be careful not to
26357 // look though conversions that change things like v4f32 to v2f64.
26358 SDNode *V = N0.getNode();
26359 if (V->getOpcode() == ISD::BITCAST) {
26360 SDValue ConvInput = V->getOperand(0);
26361 if (ConvInput.getValueType().isVector() &&
26362 ConvInput.getValueType().getVectorNumElements() == NumElts)
26363 V = ConvInput.getNode();
26364 }
26365
26366 if (V->getOpcode() == ISD::BUILD_VECTOR) {
26367 assert(V->getNumOperands() == NumElts &&
26368 "BUILD_VECTOR has wrong number of operands");
26369 SDValue Base;
26370 bool AllSame = true;
26371 for (unsigned i = 0; i != NumElts; ++i) {
26372 if (!V->getOperand(i).isUndef()) {
26373 Base = V->getOperand(i);
26374 break;
26375 }
26376 }
26377 // Splat of <u, u, u, u>, return <u, u, u, u>
26378 if (!Base.getNode())
26379 return N0;
26380 for (unsigned i = 0; i != NumElts; ++i) {
26381 if (V->getOperand(i) != Base) {
26382 AllSame = false;
26383 break;
26384 }
26385 }
26386 // Splat of <x, x, x, x>, return <x, x, x, x>
26387 if (AllSame)
26388 return N0;
26389
26390 // Canonicalize any other splat as a build_vector, but avoid defining any
26391 // undefined elements in the mask.
26392 SDValue Splatted = V->getOperand(SplatIndex);
26393 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
26394 EVT EltVT = Splatted.getValueType();
26395
26396 for (unsigned i = 0; i != NumElts; ++i) {
26397 if (SVN->getMaskElt(i) < 0)
26398 Ops[i] = DAG.getUNDEF(EltVT);
26399 }
26400
26401 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
26402
26403 // We may have jumped through bitcasts, so the type of the
26404 // BUILD_VECTOR may not match the type of the shuffle.
26405 if (V->getValueType(0) != VT)
26406 NewBV = DAG.getBitcast(VT, NewBV);
26407 return NewBV;
26408 }
26409 }
26410
26411 // Simplify source operands based on shuffle mask.
26413 return SDValue(N, 0);
26414
26415 // This is intentionally placed after demanded elements simplification because
26416 // it could eliminate knowledge of undef elements created by this shuffle.
26417 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
26418 return ShufOp;
26419
26420 // Match shuffles that can be converted to any_vector_extend_in_reg.
26421 if (SDValue V =
26422 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
26423 return V;
26424
26425 // Combine "truncate_vector_in_reg" style shuffles.
26426 if (SDValue V = combineTruncationShuffle(SVN, DAG))
26427 return V;
26428
26429 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
26430 Level < AfterLegalizeVectorOps &&
26431 (N1.isUndef() ||
26432 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
26433 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
26434 if (SDValue V = partitionShuffleOfConcats(N, DAG))
26435 return V;
26436 }
26437
26438 // A shuffle of a concat of the same narrow vector can be reduced to use
26439 // only low-half elements of a concat with undef:
26440 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
26441 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
26442 N0.getNumOperands() == 2 &&
26443 N0.getOperand(0) == N0.getOperand(1)) {
26444 int HalfNumElts = (int)NumElts / 2;
26445 SmallVector<int, 8> NewMask;
26446 for (unsigned i = 0; i != NumElts; ++i) {
26447 int Idx = SVN->getMaskElt(i);
26448 if (Idx >= HalfNumElts) {
26449 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
26450 Idx -= HalfNumElts;
26451 }
26452 NewMask.push_back(Idx);
26453 }
26454 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
26455 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
26456 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
26457 N0.getOperand(0), UndefVec);
26458 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
26459 }
26460 }
26461
26462 // See if we can replace a shuffle with an insert_subvector.
26463 // e.g. v2i32 into v8i32:
26464 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
26465 // --> insert_subvector(lhs,rhs1,4).
26466 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
26468 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
26469 // Ensure RHS subvectors are legal.
26470 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
26471 EVT SubVT = RHS.getOperand(0).getValueType();
26472 int NumSubVecs = RHS.getNumOperands();
26473 int NumSubElts = SubVT.getVectorNumElements();
26474 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
26475 if (!TLI.isTypeLegal(SubVT))
26476 return SDValue();
26477
26478 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
26479 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
26480 return SDValue();
26481
26482 // Search [NumSubElts] spans for RHS sequence.
26483 // TODO: Can we avoid nested loops to increase performance?
26484 SmallVector<int> InsertionMask(NumElts);
26485 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
26486 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
26487 // Reset mask to identity.
26488 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
26489
26490 // Add subvector insertion.
26491 std::iota(InsertionMask.begin() + SubIdx,
26492 InsertionMask.begin() + SubIdx + NumSubElts,
26493 NumElts + (SubVec * NumSubElts));
26494
26495 // See if the shuffle mask matches the reference insertion mask.
26496 bool MatchingShuffle = true;
26497 for (int i = 0; i != (int)NumElts; ++i) {
26498 int ExpectIdx = InsertionMask[i];
26499 int ActualIdx = Mask[i];
26500 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
26501 MatchingShuffle = false;
26502 break;
26503 }
26504 }
26505
26506 if (MatchingShuffle)
26507 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
26508 RHS.getOperand(SubVec),
26509 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
26510 }
26511 }
26512 return SDValue();
26513 };
26514 ArrayRef<int> Mask = SVN->getMask();
26515 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
26516 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
26517 return InsertN1;
26518 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
26519 SmallVector<int> CommuteMask(Mask);
26521 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
26522 return InsertN0;
26523 }
26524 }
26525
26526 // If we're not performing a select/blend shuffle, see if we can convert the
26527 // shuffle into a AND node, with all the out-of-lane elements are known zero.
26528 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26529 bool IsInLaneMask = true;
26530 ArrayRef<int> Mask = SVN->getMask();
26531 SmallVector<int, 16> ClearMask(NumElts, -1);
26532 APInt DemandedLHS = APInt::getZero(NumElts);
26533 APInt DemandedRHS = APInt::getZero(NumElts);
26534 for (int I = 0; I != (int)NumElts; ++I) {
26535 int M = Mask[I];
26536 if (M < 0)
26537 continue;
26538 ClearMask[I] = M == I ? I : (I + NumElts);
26539 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
26540 if (M != I) {
26541 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
26542 Demanded.setBit(M % NumElts);
26543 }
26544 }
26545 // TODO: Should we try to mask with N1 as well?
26546 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
26547 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
26548 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
26549 SDLoc DL(N);
26552 // Transform the type to a legal type so that the buildvector constant
26553 // elements are not illegal. Make sure that the result is larger than the
26554 // original type, incase the value is split into two (eg i64->i32).
26555 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
26556 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
26557 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
26558 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
26559 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
26560 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
26561 for (int I = 0; I != (int)NumElts; ++I)
26562 if (0 <= Mask[I])
26563 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
26564
26565 // See if a clear mask is legal instead of going via
26566 // XformToShuffleWithZero which loses UNDEF mask elements.
26567 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26568 return DAG.getBitcast(
26569 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26570 DAG.getConstant(0, DL, IntVT), ClearMask));
26571
26572 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26573 return DAG.getBitcast(
26574 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26575 DAG.getBuildVector(IntVT, DL, AndMask)));
26576 }
26577 }
26578 }
26579
26580 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26581 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26582 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26583 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26584 return Res;
26585
26586 // If this shuffle only has a single input that is a bitcasted shuffle,
26587 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26588 // back to their original types.
26589 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26590 N1.isUndef() && Level < AfterLegalizeVectorOps &&
26591 TLI.isTypeLegal(VT)) {
26592
26594 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26595 EVT SVT = VT.getScalarType();
26596 EVT InnerVT = BC0->getValueType(0);
26597 EVT InnerSVT = InnerVT.getScalarType();
26598
26599 // Determine which shuffle works with the smaller scalar type.
26600 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26601 EVT ScaleSVT = ScaleVT.getScalarType();
26602
26603 if (TLI.isTypeLegal(ScaleVT) &&
26604 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26605 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26606 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26607 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26608
26609 // Scale the shuffle masks to the smaller scalar type.
26610 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26611 SmallVector<int, 8> InnerMask;
26612 SmallVector<int, 8> OuterMask;
26613 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26614 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26615
26616 // Merge the shuffle masks.
26617 SmallVector<int, 8> NewMask;
26618 for (int M : OuterMask)
26619 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26620
26621 // Test for shuffle mask legality over both commutations.
26622 SDValue SV0 = BC0->getOperand(0);
26623 SDValue SV1 = BC0->getOperand(1);
26624 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26625 if (!LegalMask) {
26626 std::swap(SV0, SV1);
26628 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26629 }
26630
26631 if (LegalMask) {
26632 SV0 = DAG.getBitcast(ScaleVT, SV0);
26633 SV1 = DAG.getBitcast(ScaleVT, SV1);
26634 return DAG.getBitcast(
26635 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26636 }
26637 }
26638 }
26639 }
26640
26641 // Match shuffles of bitcasts, so long as the mask can be treated as the
26642 // larger type.
26643 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26644 return V;
26645
26646 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26647 // operand, and SV1 as the second operand.
26648 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26649 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26650 auto MergeInnerShuffle =
26651 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26652 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26653 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26654 SmallVectorImpl<int> &Mask) -> bool {
26655 // Don't try to fold splats; they're likely to simplify somehow, or they
26656 // might be free.
26657 if (OtherSVN->isSplat())
26658 return false;
26659
26660 SV0 = SV1 = SDValue();
26661 Mask.clear();
26662
26663 for (unsigned i = 0; i != NumElts; ++i) {
26664 int Idx = SVN->getMaskElt(i);
26665 if (Idx < 0) {
26666 // Propagate Undef.
26667 Mask.push_back(Idx);
26668 continue;
26669 }
26670
26671 if (Commute)
26672 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26673
26674 SDValue CurrentVec;
26675 if (Idx < (int)NumElts) {
26676 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26677 // shuffle mask to identify which vector is actually referenced.
26678 Idx = OtherSVN->getMaskElt(Idx);
26679 if (Idx < 0) {
26680 // Propagate Undef.
26681 Mask.push_back(Idx);
26682 continue;
26683 }
26684 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26685 : OtherSVN->getOperand(1);
26686 } else {
26687 // This shuffle index references an element within N1.
26688 CurrentVec = N1;
26689 }
26690
26691 // Simple case where 'CurrentVec' is UNDEF.
26692 if (CurrentVec.isUndef()) {
26693 Mask.push_back(-1);
26694 continue;
26695 }
26696
26697 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26698 // will be the first or second operand of the combined shuffle.
26699 Idx = Idx % NumElts;
26700 if (!SV0.getNode() || SV0 == CurrentVec) {
26701 // Ok. CurrentVec is the left hand side.
26702 // Update the mask accordingly.
26703 SV0 = CurrentVec;
26704 Mask.push_back(Idx);
26705 continue;
26706 }
26707 if (!SV1.getNode() || SV1 == CurrentVec) {
26708 // Ok. CurrentVec is the right hand side.
26709 // Update the mask accordingly.
26710 SV1 = CurrentVec;
26711 Mask.push_back(Idx + NumElts);
26712 continue;
26713 }
26714
26715 // Last chance - see if the vector is another shuffle and if it
26716 // uses one of the existing candidate shuffle ops.
26717 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26718 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26719 if (InnerIdx < 0) {
26720 Mask.push_back(-1);
26721 continue;
26722 }
26723 SDValue InnerVec = (InnerIdx < (int)NumElts)
26724 ? CurrentSVN->getOperand(0)
26725 : CurrentSVN->getOperand(1);
26726 if (InnerVec.isUndef()) {
26727 Mask.push_back(-1);
26728 continue;
26729 }
26730 InnerIdx %= NumElts;
26731 if (InnerVec == SV0) {
26732 Mask.push_back(InnerIdx);
26733 continue;
26734 }
26735 if (InnerVec == SV1) {
26736 Mask.push_back(InnerIdx + NumElts);
26737 continue;
26738 }
26739 }
26740
26741 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26742 return false;
26743 }
26744
26745 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26746 return true;
26747
26748 // Avoid introducing shuffles with illegal mask.
26749 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26750 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26751 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26752 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26753 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26754 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26755 if (TLI.isShuffleMaskLegal(Mask, VT))
26756 return true;
26757
26758 std::swap(SV0, SV1);
26760 return TLI.isShuffleMaskLegal(Mask, VT);
26761 };
26762
26763 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26764 // Canonicalize shuffles according to rules:
26765 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26766 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26767 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26768 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26770 // The incoming shuffle must be of the same type as the result of the
26771 // current shuffle.
26772 assert(N1->getOperand(0).getValueType() == VT &&
26773 "Shuffle types don't match");
26774
26775 SDValue SV0 = N1->getOperand(0);
26776 SDValue SV1 = N1->getOperand(1);
26777 bool HasSameOp0 = N0 == SV0;
26778 bool IsSV1Undef = SV1.isUndef();
26779 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26780 // Commute the operands of this shuffle so merging below will trigger.
26781 return DAG.getCommutedVectorShuffle(*SVN);
26782 }
26783
26784 // Canonicalize splat shuffles to the RHS to improve merging below.
26785 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26786 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26787 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26788 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26789 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26790 return DAG.getCommutedVectorShuffle(*SVN);
26791 }
26792
26793 // Try to fold according to rules:
26794 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26795 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26796 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26797 // Don't try to fold shuffles with illegal type.
26798 // Only fold if this shuffle is the only user of the other shuffle.
26799 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26800 for (int i = 0; i != 2; ++i) {
26801 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26802 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26803 // The incoming shuffle must be of the same type as the result of the
26804 // current shuffle.
26805 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26806 assert(OtherSV->getOperand(0).getValueType() == VT &&
26807 "Shuffle types don't match");
26808
26809 SDValue SV0, SV1;
26811 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26812 SV0, SV1, Mask)) {
26813 // Check if all indices in Mask are Undef. In case, propagate Undef.
26814 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26815 return DAG.getUNDEF(VT);
26816
26817 return DAG.getVectorShuffle(VT, SDLoc(N),
26818 SV0 ? SV0 : DAG.getUNDEF(VT),
26819 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26820 }
26821 }
26822 }
26823
26824 // Merge shuffles through binops if we are able to merge it with at least
26825 // one other shuffles.
26826 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26827 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26828 unsigned SrcOpcode = N0.getOpcode();
26829 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26830 (N1.isUndef() ||
26831 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26832 // Get binop source ops, or just pass on the undef.
26833 SDValue Op00 = N0.getOperand(0);
26834 SDValue Op01 = N0.getOperand(1);
26835 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26836 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26837 // TODO: We might be able to relax the VT check but we don't currently
26838 // have any isBinOp() that has different result/ops VTs so play safe until
26839 // we have test coverage.
26840 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26841 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26842 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26843 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26844 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26845 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26846 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26847 SmallVectorImpl<int> &Mask, bool LeftOp,
26848 bool Commute) {
26849 SDValue InnerN = Commute ? N1 : N0;
26850 SDValue Op0 = LeftOp ? Op00 : Op01;
26851 SDValue Op1 = LeftOp ? Op10 : Op11;
26852 if (Commute)
26853 std::swap(Op0, Op1);
26854 // Only accept the merged shuffle if we don't introduce undef elements,
26855 // or the inner shuffle already contained undef elements.
26856 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26857 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26858 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26859 Mask) &&
26860 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26861 llvm::none_of(Mask, [](int M) { return M < 0; }));
26862 };
26863
26864 // Ensure we don't increase the number of shuffles - we must merge a
26865 // shuffle from at least one of the LHS and RHS ops.
26866 bool MergedLeft = false;
26867 SDValue LeftSV0, LeftSV1;
26868 SmallVector<int, 4> LeftMask;
26869 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26870 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26871 MergedLeft = true;
26872 } else {
26873 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26874 LeftSV0 = Op00, LeftSV1 = Op10;
26875 }
26876
26877 bool MergedRight = false;
26878 SDValue RightSV0, RightSV1;
26879 SmallVector<int, 4> RightMask;
26880 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26881 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26882 MergedRight = true;
26883 } else {
26884 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26885 RightSV0 = Op01, RightSV1 = Op11;
26886 }
26887
26888 if (MergedLeft || MergedRight) {
26889 SDLoc DL(N);
26891 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26892 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26894 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26895 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26896 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26897 }
26898 }
26899 }
26900 }
26901
26902 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26903 return V;
26904
26905 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26906 // Perform this really late, because it could eliminate knowledge
26907 // of undef elements created by this shuffle.
26908 if (Level < AfterLegalizeTypes)
26909 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26910 LegalOperations))
26911 return V;
26912
26913 return SDValue();
26914}
26915
26916SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26917 EVT VT = N->getValueType(0);
26918 if (!VT.isFixedLengthVector())
26919 return SDValue();
26920
26921 // Try to convert a scalar binop with an extracted vector element to a vector
26922 // binop. This is intended to reduce potentially expensive register moves.
26923 // TODO: Check if both operands are extracted.
26924 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26925 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26926 SDValue Scalar = N->getOperand(0);
26927 unsigned Opcode = Scalar.getOpcode();
26928 EVT VecEltVT = VT.getScalarType();
26929 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26930 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26931 Scalar.getOperand(0).getValueType() == VecEltVT &&
26932 Scalar.getOperand(1).getValueType() == VecEltVT &&
26933 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26934 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26935 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26936 // Match an extract element and get a shuffle mask equivalent.
26937 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26938
26939 for (int i : {0, 1}) {
26940 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26941 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26942 SDValue EE = Scalar.getOperand(i);
26943 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26944 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26945 EE.getOperand(0).getValueType() == VT &&
26946 isa<ConstantSDNode>(EE.getOperand(1))) {
26947 // Mask = {ExtractIndex, undef, undef....}
26948 ShufMask[0] = EE.getConstantOperandVal(1);
26949 // Make sure the shuffle is legal if we are crossing lanes.
26950 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26951 SDLoc DL(N);
26952 SDValue V[] = {EE.getOperand(0),
26953 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26954 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26955 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26956 ShufMask);
26957 }
26958 }
26959 }
26960 }
26961
26962 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26963 // with a VECTOR_SHUFFLE and possible truncate.
26964 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26965 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26966 return SDValue();
26967
26968 // If we have an implicit truncate, truncate here if it is legal.
26969 if (VecEltVT != Scalar.getValueType() &&
26970 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26971 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26972 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26973 }
26974
26975 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26976 if (!ExtIndexC)
26977 return SDValue();
26978
26979 SDValue SrcVec = Scalar.getOperand(0);
26980 EVT SrcVT = SrcVec.getValueType();
26981 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26982 unsigned VTNumElts = VT.getVectorNumElements();
26983 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26984 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26985 SmallVector<int, 8> Mask(SrcNumElts, -1);
26986 Mask[0] = ExtIndexC->getZExtValue();
26987 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26988 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26989 if (!LegalShuffle)
26990 return SDValue();
26991
26992 // If the initial vector is the same size, the shuffle is the result.
26993 if (VT == SrcVT)
26994 return LegalShuffle;
26995
26996 // If not, shorten the shuffled vector.
26997 if (VTNumElts != SrcNumElts) {
26998 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26999 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27000 SrcVT.getVectorElementType(), VTNumElts);
27001 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27002 ZeroIdx);
27003 }
27004 }
27005
27006 return SDValue();
27007}
27008
27009SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27010 EVT VT = N->getValueType(0);
27011 SDValue N0 = N->getOperand(0);
27012 SDValue N1 = N->getOperand(1);
27013 SDValue N2 = N->getOperand(2);
27014 uint64_t InsIdx = N->getConstantOperandVal(2);
27015
27016 // If inserting an UNDEF, just return the original vector.
27017 if (N1.isUndef())
27018 return N0;
27019
27020 // If this is an insert of an extracted vector into an undef vector, we can
27021 // just use the input to the extract if the types match, and can simplify
27022 // in some cases even if they don't.
27023 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27024 N1.getOperand(1) == N2) {
27025 EVT SrcVT = N1.getOperand(0).getValueType();
27026 if (SrcVT == VT)
27027 return N1.getOperand(0);
27028 // TODO: To remove the zero check, need to adjust the offset to
27029 // a multiple of the new src type.
27030 if (isNullConstant(N2)) {
27031 if (VT.knownBitsGE(SrcVT) &&
27032 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27033 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27034 VT, N0, N1.getOperand(0), N2);
27035 else if (VT.knownBitsLE(SrcVT) &&
27036 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27038 VT, N1.getOperand(0), N2);
27039 }
27040 }
27041
27042 // Handle case where we've ended up inserting back into the source vector
27043 // we extracted the subvector from.
27044 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27045 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
27046 N1.getOperand(1) == N2)
27047 return N0;
27048
27049 // Simplify scalar inserts into an undef vector:
27050 // insert_subvector undef, (splat X), N2 -> splat X
27051 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27052 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27053 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27054
27055 // If we are inserting a bitcast value into an undef, with the same
27056 // number of elements, just use the bitcast input of the extract.
27057 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27058 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27059 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27061 N1.getOperand(0).getOperand(1) == N2 &&
27063 VT.getVectorElementCount() &&
27065 VT.getSizeInBits()) {
27066 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27067 }
27068
27069 // If both N1 and N2 are bitcast values on which insert_subvector
27070 // would makes sense, pull the bitcast through.
27071 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27072 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27073 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27074 SDValue CN0 = N0.getOperand(0);
27075 SDValue CN1 = N1.getOperand(0);
27076 EVT CN0VT = CN0.getValueType();
27077 EVT CN1VT = CN1.getValueType();
27078 if (CN0VT.isVector() && CN1VT.isVector() &&
27079 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27081 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27082 CN0.getValueType(), CN0, CN1, N2);
27083 return DAG.getBitcast(VT, NewINSERT);
27084 }
27085 }
27086
27087 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27088 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27089 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27090 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27091 N0.getOperand(1).getValueType() == N1.getValueType() &&
27092 N0.getOperand(2) == N2)
27093 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27094 N1, N2);
27095
27096 // Eliminate an intermediate insert into an undef vector:
27097 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27098 // insert_subvector undef, X, 0
27099 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
27100 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
27101 isNullConstant(N2))
27102 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
27103 N1.getOperand(1), N2);
27104
27105 // Push subvector bitcasts to the output, adjusting the index as we go.
27106 // insert_subvector(bitcast(v), bitcast(s), c1)
27107 // -> bitcast(insert_subvector(v, s, c2))
27108 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
27109 N1.getOpcode() == ISD::BITCAST) {
27110 SDValue N0Src = peekThroughBitcasts(N0);
27111 SDValue N1Src = peekThroughBitcasts(N1);
27112 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
27113 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
27114 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
27115 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
27116 EVT NewVT;
27117 SDLoc DL(N);
27118 SDValue NewIdx;
27119 LLVMContext &Ctx = *DAG.getContext();
27120 ElementCount NumElts = VT.getVectorElementCount();
27121 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27122 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
27123 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
27124 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
27125 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
27126 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
27127 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
27128 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
27129 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
27130 NumElts.divideCoefficientBy(Scale));
27131 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
27132 }
27133 }
27134 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
27135 SDValue Res = DAG.getBitcast(NewVT, N0Src);
27136 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
27137 return DAG.getBitcast(VT, Res);
27138 }
27139 }
27140 }
27141
27142 // Canonicalize insert_subvector dag nodes.
27143 // Example:
27144 // (insert_subvector (insert_subvector A, Idx0), Idx1)
27145 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27146 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
27147 N1.getValueType() == N0.getOperand(1).getValueType()) {
27148 unsigned OtherIdx = N0.getConstantOperandVal(2);
27149 if (InsIdx < OtherIdx) {
27150 // Swap nodes.
27151 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
27152 N0.getOperand(0), N1, N2);
27153 AddToWorklist(NewOp.getNode());
27154 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
27155 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
27156 }
27157 }
27158
27159 // If the input vector is a concatenation, and the insert replaces
27160 // one of the pieces, we can optimize into a single concat_vectors.
27161 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
27162 N0.getOperand(0).getValueType() == N1.getValueType() &&
27165 unsigned Factor = N1.getValueType().getVectorMinNumElements();
27166 SmallVector<SDValue, 8> Ops(N0->ops());
27167 Ops[InsIdx / Factor] = N1;
27168 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
27169 }
27170
27171 // Simplify source operands based on insertion.
27173 return SDValue(N, 0);
27174
27175 return SDValue();
27176}
27177
27178SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
27179 SDValue N0 = N->getOperand(0);
27180
27181 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27182 if (N0->getOpcode() == ISD::FP16_TO_FP)
27183 return N0->getOperand(0);
27184
27185 return SDValue();
27186}
27187
27188SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
27189 auto Op = N->getOpcode();
27191 "opcode should be FP16_TO_FP or BF16_TO_FP.");
27192 SDValue N0 = N->getOperand(0);
27193
27194 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27195 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27196 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27198 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27199 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27200 }
27201 }
27202
27203 // Sometimes constants manage to survive very late in the pipeline, e.g.,
27204 // because they are wrapped inside the <1 x f16> type. Try one last time to
27205 // get rid of them.
27206 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
27207 N->getValueType(0), {N0});
27208 return Folded;
27209}
27210
27211SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
27212 SDValue N0 = N->getOperand(0);
27213
27214 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
27215 if (N0->getOpcode() == ISD::BF16_TO_FP)
27216 return N0->getOperand(0);
27217
27218 return SDValue();
27219}
27220
27221SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
27222 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27223 return visitFP16_TO_FP(N);
27224}
27225
27226SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
27227 SDValue N0 = N->getOperand(0);
27228 EVT VT = N0.getValueType();
27229 unsigned Opcode = N->getOpcode();
27230
27231 // VECREDUCE over 1-element vector is just an extract.
27232 if (VT.getVectorElementCount().isScalar()) {
27233 SDLoc dl(N);
27234 SDValue Res =
27236 DAG.getVectorIdxConstant(0, dl));
27237 if (Res.getValueType() != N->getValueType(0))
27238 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
27239 return Res;
27240 }
27241
27242 // On an boolean vector an and/or reduction is the same as a umin/umax
27243 // reduction. Convert them if the latter is legal while the former isn't.
27244 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
27245 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
27247 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
27248 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
27250 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
27251 }
27252
27253 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
27254 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
27255 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27256 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
27257 SDValue Vec = N0.getOperand(0);
27258 SDValue Subvec = N0.getOperand(1);
27259 if ((Opcode == ISD::VECREDUCE_OR &&
27260 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
27261 (Opcode == ISD::VECREDUCE_AND &&
27262 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
27263 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
27264 }
27265
27266 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
27267 // Same for zext and anyext, and for and/or/xor reductions.
27268 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
27269 Opcode == ISD::VECREDUCE_XOR) &&
27270 (N0.getOpcode() == ISD::SIGN_EXTEND ||
27271 N0.getOpcode() == ISD::ZERO_EXTEND ||
27272 N0.getOpcode() == ISD::ANY_EXTEND) &&
27273 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
27274 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
27276 N0.getOperand(0));
27277 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
27278 }
27279 return SDValue();
27280}
27281
27282SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
27283 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
27284
27285 // FSUB -> FMA combines:
27286 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
27287 AddToWorklist(Fused.getNode());
27288 return Fused;
27289 }
27290 return SDValue();
27291}
27292
27293SDValue DAGCombiner::visitVPOp(SDNode *N) {
27294
27295 if (N->getOpcode() == ISD::VP_GATHER)
27296 if (SDValue SD = visitVPGATHER(N))
27297 return SD;
27298
27299 if (N->getOpcode() == ISD::VP_SCATTER)
27300 if (SDValue SD = visitVPSCATTER(N))
27301 return SD;
27302
27303 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
27304 if (SDValue SD = visitVP_STRIDED_LOAD(N))
27305 return SD;
27306
27307 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
27308 if (SDValue SD = visitVP_STRIDED_STORE(N))
27309 return SD;
27310
27311 // VP operations in which all vector elements are disabled - either by
27312 // determining that the mask is all false or that the EVL is 0 - can be
27313 // eliminated.
27314 bool AreAllEltsDisabled = false;
27315 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
27316 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
27317 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
27318 AreAllEltsDisabled |=
27319 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
27320
27321 // This is the only generic VP combine we support for now.
27322 if (!AreAllEltsDisabled) {
27323 switch (N->getOpcode()) {
27324 case ISD::VP_FADD:
27325 return visitVP_FADD(N);
27326 case ISD::VP_FSUB:
27327 return visitVP_FSUB(N);
27328 case ISD::VP_FMA:
27329 return visitFMA<VPMatchContext>(N);
27330 case ISD::VP_SELECT:
27331 return visitVP_SELECT(N);
27332 case ISD::VP_MUL:
27333 return visitMUL<VPMatchContext>(N);
27334 case ISD::VP_SUB:
27335 return foldSubCtlzNot<VPMatchContext>(N, DAG);
27336 default:
27337 break;
27338 }
27339 return SDValue();
27340 }
27341
27342 // Binary operations can be replaced by UNDEF.
27343 if (ISD::isVPBinaryOp(N->getOpcode()))
27344 return DAG.getUNDEF(N->getValueType(0));
27345
27346 // VP Memory operations can be replaced by either the chain (stores) or the
27347 // chain + undef (loads).
27348 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
27349 if (MemSD->writeMem())
27350 return MemSD->getChain();
27351 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
27352 }
27353
27354 // Reduction operations return the start operand when no elements are active.
27355 if (ISD::isVPReduction(N->getOpcode()))
27356 return N->getOperand(0);
27357
27358 return SDValue();
27359}
27360
27361SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
27362 SDValue Chain = N->getOperand(0);
27363 SDValue Ptr = N->getOperand(1);
27364 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27365
27366 // Check if the memory, where FP state is written to, is used only in a single
27367 // load operation.
27368 LoadSDNode *LdNode = nullptr;
27369 for (auto *U : Ptr->users()) {
27370 if (U == N)
27371 continue;
27372 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
27373 if (LdNode && LdNode != Ld)
27374 return SDValue();
27375 LdNode = Ld;
27376 continue;
27377 }
27378 return SDValue();
27379 }
27380 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27381 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27383 return SDValue();
27384
27385 // Check if the loaded value is used only in a store operation.
27386 StoreSDNode *StNode = nullptr;
27387 for (SDUse &U : LdNode->uses()) {
27388 if (U.getResNo() == 0) {
27389 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
27390 if (StNode)
27391 return SDValue();
27392 StNode = St;
27393 } else {
27394 return SDValue();
27395 }
27396 }
27397 }
27398 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27399 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27400 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27401 return SDValue();
27402
27403 // Create new node GET_FPENV_MEM, which uses the store address to write FP
27404 // environment.
27405 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
27406 StNode->getMemOperand());
27407 CombineTo(StNode, Res, false);
27408 return Res;
27409}
27410
27411SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
27412 SDValue Chain = N->getOperand(0);
27413 SDValue Ptr = N->getOperand(1);
27414 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27415
27416 // Check if the address of FP state is used also in a store operation only.
27417 StoreSDNode *StNode = nullptr;
27418 for (auto *U : Ptr->users()) {
27419 if (U == N)
27420 continue;
27421 if (auto *St = dyn_cast<StoreSDNode>(U)) {
27422 if (StNode && StNode != St)
27423 return SDValue();
27424 StNode = St;
27425 continue;
27426 }
27427 return SDValue();
27428 }
27429 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27430 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27431 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
27432 return SDValue();
27433
27434 // Check if the stored value is loaded from some location and the loaded
27435 // value is used only in the store operation.
27436 SDValue StValue = StNode->getValue();
27437 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
27438 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27439 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27440 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27441 return SDValue();
27442
27443 // Create new node SET_FPENV_MEM, which uses the load address to read FP
27444 // environment.
27445 SDValue Res =
27446 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
27447 LdNode->getMemOperand());
27448 return Res;
27449}
27450
27451/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
27452/// with the destination vector and a zero vector.
27453/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
27454/// vector_shuffle V, Zero, <0, 4, 2, 4>
27455SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
27456 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
27457
27458 EVT VT = N->getValueType(0);
27459 SDValue LHS = N->getOperand(0);
27460 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
27461 SDLoc DL(N);
27462
27463 // Make sure we're not running after operation legalization where it
27464 // may have custom lowered the vector shuffles.
27465 if (LegalOperations)
27466 return SDValue();
27467
27468 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
27469 return SDValue();
27470
27471 EVT RVT = RHS.getValueType();
27472 unsigned NumElts = RHS.getNumOperands();
27473
27474 // Attempt to create a valid clear mask, splitting the mask into
27475 // sub elements and checking to see if each is
27476 // all zeros or all ones - suitable for shuffle masking.
27477 auto BuildClearMask = [&](int Split) {
27478 int NumSubElts = NumElts * Split;
27479 int NumSubBits = RVT.getScalarSizeInBits() / Split;
27480
27481 SmallVector<int, 8> Indices;
27482 for (int i = 0; i != NumSubElts; ++i) {
27483 int EltIdx = i / Split;
27484 int SubIdx = i % Split;
27485 SDValue Elt = RHS.getOperand(EltIdx);
27486 // X & undef --> 0 (not undef). So this lane must be converted to choose
27487 // from the zero constant vector (same as if the element had all 0-bits).
27488 if (Elt.isUndef()) {
27489 Indices.push_back(i + NumSubElts);
27490 continue;
27491 }
27492
27493 APInt Bits;
27494 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
27495 Bits = Cst->getAPIntValue();
27496 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
27497 Bits = CstFP->getValueAPF().bitcastToAPInt();
27498 else
27499 return SDValue();
27500
27501 // Extract the sub element from the constant bit mask.
27502 if (DAG.getDataLayout().isBigEndian())
27503 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
27504 else
27505 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
27506
27507 if (Bits.isAllOnes())
27508 Indices.push_back(i);
27509 else if (Bits == 0)
27510 Indices.push_back(i + NumSubElts);
27511 else
27512 return SDValue();
27513 }
27514
27515 // Let's see if the target supports this vector_shuffle.
27516 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
27517 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
27518 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
27519 return SDValue();
27520
27521 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
27522 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
27523 DAG.getBitcast(ClearVT, LHS),
27524 Zero, Indices));
27525 };
27526
27527 // Determine maximum split level (byte level masking).
27528 int MaxSplit = 1;
27529 if (RVT.getScalarSizeInBits() % 8 == 0)
27530 MaxSplit = RVT.getScalarSizeInBits() / 8;
27531
27532 for (int Split = 1; Split <= MaxSplit; ++Split)
27533 if (RVT.getScalarSizeInBits() % Split == 0)
27534 if (SDValue S = BuildClearMask(Split))
27535 return S;
27536
27537 return SDValue();
27538}
27539
27540/// If a vector binop is performed on splat values, it may be profitable to
27541/// extract, scalarize, and insert/splat.
27543 const SDLoc &DL, bool LegalTypes) {
27544 SDValue N0 = N->getOperand(0);
27545 SDValue N1 = N->getOperand(1);
27546 unsigned Opcode = N->getOpcode();
27547 EVT VT = N->getValueType(0);
27548 EVT EltVT = VT.getVectorElementType();
27549 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27550
27551 // TODO: Remove/replace the extract cost check? If the elements are available
27552 // as scalars, then there may be no extract cost. Should we ask if
27553 // inserting a scalar back into a vector is cheap instead?
27554 int Index0, Index1;
27555 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27556 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
27557 // Extract element from splat_vector should be free.
27558 // TODO: use DAG.isSplatValue instead?
27559 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
27561 if (!Src0 || !Src1 || Index0 != Index1 ||
27562 Src0.getValueType().getVectorElementType() != EltVT ||
27563 Src1.getValueType().getVectorElementType() != EltVT ||
27564 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27565 // If before type legalization, allow scalar types that will eventually be
27566 // made legal.
27568 Opcode, LegalTypes
27569 ? EltVT
27570 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
27571 return SDValue();
27572
27573 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
27574 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
27575 return SDValue();
27576
27577 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
27578 // All but one element should have an undef input, which will fold to a
27579 // constant or undef. Avoid splatting which would over-define potentially
27580 // undefined elements.
27581
27582 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27583 // build_vec ..undef, (bo X, Y), undef...
27584 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
27585 DAG.ExtractVectorElements(Src0, EltsX);
27586 DAG.ExtractVectorElements(Src1, EltsY);
27587
27588 for (auto [X, Y] : zip(EltsX, EltsY))
27589 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
27590 return DAG.getBuildVector(VT, DL, EltsResult);
27591 }
27592
27593 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27594 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
27595 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
27596 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27597
27598 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27599 return DAG.getSplat(VT, DL, ScalarBO);
27600}
27601
27602/// Visit a vector cast operation, like FP_EXTEND.
27603SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27604 EVT VT = N->getValueType(0);
27605 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27606 EVT EltVT = VT.getVectorElementType();
27607 unsigned Opcode = N->getOpcode();
27608
27609 SDValue N0 = N->getOperand(0);
27610 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27611
27612 // TODO: promote operation might be also good here?
27613 int Index0;
27614 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27615 if (Src0 &&
27616 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27617 TLI.isExtractVecEltCheap(VT, Index0)) &&
27618 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27619 TLI.preferScalarizeSplat(N)) {
27620 EVT SrcVT = N0.getValueType();
27621 EVT SrcEltVT = SrcVT.getVectorElementType();
27622 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27623 SDValue Elt =
27624 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27625 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27626 if (VT.isScalableVector())
27627 return DAG.getSplatVector(VT, DL, ScalarBO);
27629 return DAG.getBuildVector(VT, DL, Ops);
27630 }
27631
27632 return SDValue();
27633}
27634
27635/// Visit a binary vector operation, like ADD.
27636SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27637 EVT VT = N->getValueType(0);
27638 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27639
27640 SDValue LHS = N->getOperand(0);
27641 SDValue RHS = N->getOperand(1);
27642 unsigned Opcode = N->getOpcode();
27643 SDNodeFlags Flags = N->getFlags();
27644
27645 // Move unary shuffles with identical masks after a vector binop:
27646 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27647 // --> shuffle (VBinOp A, B), Undef, Mask
27648 // This does not require type legality checks because we are creating the
27649 // same types of operations that are in the original sequence. We do have to
27650 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27651 // though. This code is adapted from the identical transform in instcombine.
27652 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27653 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27654 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27655 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27656 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27657 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27658 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27659 RHS.getOperand(0), Flags);
27660 SDValue UndefV = LHS.getOperand(1);
27661 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27662 }
27663
27664 // Try to sink a splat shuffle after a binop with a uniform constant.
27665 // This is limited to cases where neither the shuffle nor the constant have
27666 // undefined elements because that could be poison-unsafe or inhibit
27667 // demanded elements analysis. It is further limited to not change a splat
27668 // of an inserted scalar because that may be optimized better by
27669 // load-folding or other target-specific behaviors.
27670 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27671 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27672 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27673 // binop (splat X), (splat C) --> splat (binop X, C)
27674 SDValue X = Shuf0->getOperand(0);
27675 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27676 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27677 Shuf0->getMask());
27678 }
27679 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27680 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27681 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27682 // binop (splat C), (splat X) --> splat (binop C, X)
27683 SDValue X = Shuf1->getOperand(0);
27684 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27685 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27686 Shuf1->getMask());
27687 }
27688 }
27689
27690 // The following pattern is likely to emerge with vector reduction ops. Moving
27691 // the binary operation ahead of insertion may allow using a narrower vector
27692 // instruction that has better performance than the wide version of the op:
27693 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27694 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27695 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27696 LHS.getOperand(2) == RHS.getOperand(2) &&
27697 (LHS.hasOneUse() || RHS.hasOneUse())) {
27698 SDValue X = LHS.getOperand(1);
27699 SDValue Y = RHS.getOperand(1);
27700 SDValue Z = LHS.getOperand(2);
27701 EVT NarrowVT = X.getValueType();
27702 if (NarrowVT == Y.getValueType() &&
27703 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27704 LegalOperations)) {
27705 // (binop undef, undef) may not return undef, so compute that result.
27706 SDValue VecC =
27707 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27708 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27709 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27710 }
27711 }
27712
27713 // Make sure all but the first op are undef or constant.
27714 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27715 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27716 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27717 return Op.isUndef() ||
27718 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27719 });
27720 };
27721
27722 // The following pattern is likely to emerge with vector reduction ops. Moving
27723 // the binary operation ahead of the concat may allow using a narrower vector
27724 // instruction that has better performance than the wide version of the op:
27725 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27726 // concat (VBinOp X, Y), VecC
27727 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27728 (LHS.hasOneUse() || RHS.hasOneUse())) {
27729 EVT NarrowVT = LHS.getOperand(0).getValueType();
27730 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27731 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27732 unsigned NumOperands = LHS.getNumOperands();
27733 SmallVector<SDValue, 4> ConcatOps;
27734 for (unsigned i = 0; i != NumOperands; ++i) {
27735 // This constant fold for operands 1 and up.
27736 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27737 RHS.getOperand(i)));
27738 }
27739
27740 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27741 }
27742 }
27743
27744 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
27745 return V;
27746
27747 return SDValue();
27748}
27749
27750SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27751 SDValue N2) {
27752 assert(N0.getOpcode() == ISD::SETCC &&
27753 "First argument must be a SetCC node!");
27754
27755 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27756 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27757
27758 // If we got a simplified select_cc node back from SimplifySelectCC, then
27759 // break it down into a new SETCC node, and a new SELECT node, and then return
27760 // the SELECT node, since we were called with a SELECT node.
27761 if (SCC.getNode()) {
27762 // Check to see if we got a select_cc back (to turn into setcc/select).
27763 // Otherwise, just return whatever node we got back, like fabs.
27764 if (SCC.getOpcode() == ISD::SELECT_CC) {
27765 const SDNodeFlags Flags = N0->getFlags();
27767 N0.getValueType(),
27768 SCC.getOperand(0), SCC.getOperand(1),
27769 SCC.getOperand(4), Flags);
27770 AddToWorklist(SETCC.getNode());
27771 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27772 SCC.getOperand(2), SCC.getOperand(3));
27773 SelectNode->setFlags(Flags);
27774 return SelectNode;
27775 }
27776
27777 return SCC;
27778 }
27779 return SDValue();
27780}
27781
27782/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27783/// being selected between, see if we can simplify the select. Callers of this
27784/// should assume that TheSelect is deleted if this returns true. As such, they
27785/// should return the appropriate thing (e.g. the node) back to the top-level of
27786/// the DAG combiner loop to avoid it being looked at.
27787bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27788 SDValue RHS) {
27789 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27790 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27791 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27792 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27793 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27794 SDValue Sqrt = RHS;
27796 SDValue CmpLHS;
27797 const ConstantFPSDNode *Zero = nullptr;
27798
27799 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27800 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27801 CmpLHS = TheSelect->getOperand(0);
27802 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27803 } else {
27804 // SELECT or VSELECT
27805 SDValue Cmp = TheSelect->getOperand(0);
27806 if (Cmp.getOpcode() == ISD::SETCC) {
27807 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27808 CmpLHS = Cmp.getOperand(0);
27809 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27810 }
27811 }
27812 if (Zero && Zero->isZero() &&
27813 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27814 CC == ISD::SETULT || CC == ISD::SETLT)) {
27815 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27816 CombineTo(TheSelect, Sqrt);
27817 return true;
27818 }
27819 }
27820 }
27821 // Cannot simplify select with vector condition
27822 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27823
27824 // If this is a select from two identical things, try to pull the operation
27825 // through the select.
27826 if (LHS.getOpcode() != RHS.getOpcode() ||
27827 !LHS.hasOneUse() || !RHS.hasOneUse())
27828 return false;
27829
27830 // If this is a load and the token chain is identical, replace the select
27831 // of two loads with a load through a select of the address to load from.
27832 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27833 // constants have been dropped into the constant pool.
27834 if (LHS.getOpcode() == ISD::LOAD) {
27835 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27836 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27837
27838 // Token chains must be identical.
27839 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27840 // Do not let this transformation reduce the number of volatile loads.
27841 // Be conservative for atomics for the moment
27842 // TODO: This does appear to be legal for unordered atomics (see D66309)
27843 !LLD->isSimple() || !RLD->isSimple() ||
27844 // FIXME: If either is a pre/post inc/dec load,
27845 // we'd need to split out the address adjustment.
27846 LLD->isIndexed() || RLD->isIndexed() ||
27847 // If this is an EXTLOAD, the VT's must match.
27848 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27849 // If this is an EXTLOAD, the kind of extension must match.
27850 (LLD->getExtensionType() != RLD->getExtensionType() &&
27851 // The only exception is if one of the extensions is anyext.
27852 LLD->getExtensionType() != ISD::EXTLOAD &&
27853 RLD->getExtensionType() != ISD::EXTLOAD) ||
27854 // FIXME: this discards src value information. This is
27855 // over-conservative. It would be beneficial to be able to remember
27856 // both potential memory locations. Since we are discarding
27857 // src value info, don't do the transformation if the memory
27858 // locations are not in the default address space.
27859 LLD->getPointerInfo().getAddrSpace() != 0 ||
27860 RLD->getPointerInfo().getAddrSpace() != 0 ||
27861 // We can't produce a CMOV of a TargetFrameIndex since we won't
27862 // generate the address generation required.
27865 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27866 LLD->getBasePtr().getValueType()))
27867 return false;
27868
27869 // The loads must not depend on one another.
27870 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27871 return false;
27872
27873 // Check that the select condition doesn't reach either load. If so,
27874 // folding this will induce a cycle into the DAG. If not, this is safe to
27875 // xform, so create a select of the addresses.
27876
27879
27880 // Always fail if LLD and RLD are not independent. TheSelect is a
27881 // predecessor to all Nodes in question so we need not search past it.
27882
27883 Visited.insert(TheSelect);
27884 Worklist.push_back(LLD);
27885 Worklist.push_back(RLD);
27886
27887 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27888 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27889 return false;
27890
27891 SDValue Addr;
27892 if (TheSelect->getOpcode() == ISD::SELECT) {
27893 // We cannot do this optimization if any pair of {RLD, LLD} is a
27894 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27895 // Loads, we only need to check if CondNode is a successor to one of the
27896 // loads. We can further avoid this if there's no use of their chain
27897 // value.
27898 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27899 Worklist.push_back(CondNode);
27900
27901 if ((LLD->hasAnyUseOfValue(1) &&
27902 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27903 (RLD->hasAnyUseOfValue(1) &&
27904 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27905 return false;
27906
27907 Addr = DAG.getSelect(SDLoc(TheSelect),
27908 LLD->getBasePtr().getValueType(),
27909 TheSelect->getOperand(0), LLD->getBasePtr(),
27910 RLD->getBasePtr());
27911 } else { // Otherwise SELECT_CC
27912 // We cannot do this optimization if any pair of {RLD, LLD} is a
27913 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27914 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27915 // one of the loads. We can further avoid this if there's no use of their
27916 // chain value.
27917
27918 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27919 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27920 Worklist.push_back(CondLHS);
27921 Worklist.push_back(CondRHS);
27922
27923 if ((LLD->hasAnyUseOfValue(1) &&
27924 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27925 (RLD->hasAnyUseOfValue(1) &&
27926 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27927 return false;
27928
27929 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27930 LLD->getBasePtr().getValueType(),
27931 TheSelect->getOperand(0),
27932 TheSelect->getOperand(1),
27933 LLD->getBasePtr(), RLD->getBasePtr(),
27934 TheSelect->getOperand(4));
27935 }
27936
27937 SDValue Load;
27938 // It is safe to replace the two loads if they have different alignments,
27939 // but the new load must be the minimum (most restrictive) alignment of the
27940 // inputs.
27941 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27942 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27943 if (!RLD->isInvariant())
27944 MMOFlags &= ~MachineMemOperand::MOInvariant;
27945 if (!RLD->isDereferenceable())
27946 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27947 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27948 // FIXME: Discards pointer and AA info.
27949 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27950 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27951 MMOFlags);
27952 } else {
27953 // FIXME: Discards pointer and AA info.
27954 Load = DAG.getExtLoad(
27956 : LLD->getExtensionType(),
27957 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27958 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27959 }
27960
27961 // Users of the select now use the result of the load.
27962 CombineTo(TheSelect, Load);
27963
27964 // Users of the old loads now use the new load's chain. We know the
27965 // old-load value is dead now.
27966 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27967 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27968 return true;
27969 }
27970
27971 return false;
27972}
27973
27974/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27975/// bitwise 'and'.
27976SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27977 SDValue N1, SDValue N2, SDValue N3,
27978 ISD::CondCode CC) {
27979 // If this is a select where the false operand is zero and the compare is a
27980 // check of the sign bit, see if we can perform the "gzip trick":
27981 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27982 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27983 EVT XType = N0.getValueType();
27984 EVT AType = N2.getValueType();
27985 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27986 return SDValue();
27987
27988 // If the comparison is testing for a positive value, we have to invert
27989 // the sign bit mask, so only do that transform if the target has a bitwise
27990 // 'and not' instruction (the invert is free).
27991 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27992 // (X > -1) ? A : 0
27993 // (X > 0) ? X : 0 <-- This is canonical signed max.
27994 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27995 return SDValue();
27996 } else if (CC == ISD::SETLT) {
27997 // (X < 0) ? A : 0
27998 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27999 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28000 return SDValue();
28001 } else {
28002 return SDValue();
28003 }
28004
28005 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28006 // constant.
28007 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28008 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28009 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28010 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28011 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28012 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28013 AddToWorklist(Shift.getNode());
28014
28015 if (XType.bitsGT(AType)) {
28016 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28017 AddToWorklist(Shift.getNode());
28018 }
28019
28020 if (CC == ISD::SETGT)
28021 Shift = DAG.getNOT(DL, Shift, AType);
28022
28023 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28024 }
28025 }
28026
28027 unsigned ShCt = XType.getSizeInBits() - 1;
28028 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28029 return SDValue();
28030
28031 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28032 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28033 AddToWorklist(Shift.getNode());
28034
28035 if (XType.bitsGT(AType)) {
28036 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28037 AddToWorklist(Shift.getNode());
28038 }
28039
28040 if (CC == ISD::SETGT)
28041 Shift = DAG.getNOT(DL, Shift, AType);
28042
28043 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28044}
28045
28046// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28047SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
28048 SDValue N0 = N->getOperand(0);
28049 SDValue N1 = N->getOperand(1);
28050 SDValue N2 = N->getOperand(2);
28051 SDLoc DL(N);
28052
28053 unsigned BinOpc = N1.getOpcode();
28054 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28055 (N1.getResNo() != N2.getResNo()))
28056 return SDValue();
28057
28058 // The use checks are intentionally on SDNode because we may be dealing
28059 // with opcodes that produce more than one SDValue.
28060 // TODO: Do we really need to check N0 (the condition operand of the select)?
28061 // But removing that clause could cause an infinite loop...
28062 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28063 return SDValue();
28064
28065 // Binops may include opcodes that return multiple values, so all values
28066 // must be created/propagated from the newly created binops below.
28067 SDVTList OpVTs = N1->getVTList();
28068
28069 // Fold select(cond, binop(x, y), binop(z, y))
28070 // --> binop(select(cond, x, z), y)
28071 if (N1.getOperand(1) == N2.getOperand(1)) {
28072 SDValue N10 = N1.getOperand(0);
28073 SDValue N20 = N2.getOperand(0);
28074 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28075 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
28076 NewBinOp->setFlags(N1->getFlags());
28077 NewBinOp->intersectFlagsWith(N2->getFlags());
28078 return SDValue(NewBinOp.getNode(), N1.getResNo());
28079 }
28080
28081 // Fold select(cond, binop(x, y), binop(x, z))
28082 // --> binop(x, select(cond, y, z))
28083 if (N1.getOperand(0) == N2.getOperand(0)) {
28084 SDValue N11 = N1.getOperand(1);
28085 SDValue N21 = N2.getOperand(1);
28086 // Second op VT might be different (e.g. shift amount type)
28087 if (N11.getValueType() == N21.getValueType()) {
28088 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28089 SDValue NewBinOp =
28090 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
28091 NewBinOp->setFlags(N1->getFlags());
28092 NewBinOp->intersectFlagsWith(N2->getFlags());
28093 return SDValue(NewBinOp.getNode(), N1.getResNo());
28094 }
28095 }
28096
28097 // TODO: Handle isCommutativeBinOp patterns as well?
28098 return SDValue();
28099}
28100
28101// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
28102SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
28103 SDValue N0 = N->getOperand(0);
28104 EVT VT = N->getValueType(0);
28105 bool IsFabs = N->getOpcode() == ISD::FABS;
28106 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
28107
28108 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
28109 return SDValue();
28110
28111 SDValue Int = N0.getOperand(0);
28112 EVT IntVT = Int.getValueType();
28113
28114 // The operand to cast should be integer.
28115 if (!IntVT.isInteger() || IntVT.isVector())
28116 return SDValue();
28117
28118 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28119 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28120 APInt SignMask;
28121 if (N0.getValueType().isVector()) {
28122 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
28123 // 0x7f...) per element and splat it.
28125 if (IsFabs)
28126 SignMask = ~SignMask;
28127 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
28128 } else {
28129 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
28130 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
28131 if (IsFabs)
28132 SignMask = ~SignMask;
28133 }
28134 SDLoc DL(N0);
28135 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
28136 DAG.getConstant(SignMask, DL, IntVT));
28137 AddToWorklist(Int.getNode());
28138 return DAG.getBitcast(VT, Int);
28139}
28140
28141/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
28142/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
28143/// in it. This may be a win when the constant is not otherwise available
28144/// because it replaces two constant pool loads with one.
28145SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
28146 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
28147 ISD::CondCode CC) {
28149 return SDValue();
28150
28151 // If we are before legalize types, we want the other legalization to happen
28152 // first (for example, to avoid messing with soft float).
28153 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
28154 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
28155 EVT VT = N2.getValueType();
28156 if (!TV || !FV || !TLI.isTypeLegal(VT))
28157 return SDValue();
28158
28159 // If a constant can be materialized without loads, this does not make sense.
28161 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28162 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28163 return SDValue();
28164
28165 // If both constants have multiple uses, then we won't need to do an extra
28166 // load. The values are likely around in registers for other users.
28167 if (!TV->hasOneUse() && !FV->hasOneUse())
28168 return SDValue();
28169
28170 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28171 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28172 Type *FPTy = Elts[0]->getType();
28173 const DataLayout &TD = DAG.getDataLayout();
28174
28175 // Create a ConstantArray of the two constants.
28176 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
28177 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
28178 TD.getPrefTypeAlign(FPTy));
28179 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28180
28181 // Get offsets to the 0 and 1 elements of the array, so we can select between
28182 // them.
28183 SDValue Zero = DAG.getIntPtrConstant(0, DL);
28184 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28185 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
28186 SDValue Cond =
28187 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
28188 AddToWorklist(Cond.getNode());
28189 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
28190 AddToWorklist(CstOffset.getNode());
28191 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
28192 AddToWorklist(CPIdx.getNode());
28193 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28195 DAG.getMachineFunction()), Alignment);
28196}
28197
28198/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
28199/// where 'cond' is the comparison specified by CC.
28200SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
28202 bool NotExtCompare) {
28203 // (x ? y : y) -> y.
28204 if (N2 == N3) return N2;
28205
28206 EVT CmpOpVT = N0.getValueType();
28207 EVT CmpResVT = getSetCCResultType(CmpOpVT);
28208 EVT VT = N2.getValueType();
28209 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
28210 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28211 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
28212
28213 // Determine if the condition we're dealing with is constant.
28214 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
28215 AddToWorklist(SCC.getNode());
28216 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
28217 // fold select_cc true, x, y -> x
28218 // fold select_cc false, x, y -> y
28219 return !(SCCC->isZero()) ? N2 : N3;
28220 }
28221 }
28222
28223 if (SDValue V =
28224 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
28225 return V;
28226
28227 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
28228 return V;
28229
28230 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
28231 // where y is has a single bit set.
28232 // A plaintext description would be, we can turn the SELECT_CC into an AND
28233 // when the condition can be materialized as an all-ones register. Any
28234 // single bit-test can be materialized as an all-ones register with
28235 // shift-left and shift-right-arith.
28236 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
28237 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
28238 SDValue AndLHS = N0->getOperand(0);
28239 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
28240 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
28241 // Shift the tested bit over the sign bit.
28242 const APInt &AndMask = ConstAndRHS->getAPIntValue();
28243 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
28244 unsigned ShCt = AndMask.getBitWidth() - 1;
28245 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
28246 SDLoc(AndLHS));
28247 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
28248
28249 // Now arithmetic right shift it all the way over, so the result is
28250 // either all-ones, or zero.
28251 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
28252 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
28253
28254 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
28255 }
28256 }
28257 }
28258
28259 // fold select C, 16, 0 -> shl C, 4
28260 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
28261 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
28262
28263 if ((Fold || Swap) &&
28264 TLI.getBooleanContents(CmpOpVT) ==
28266 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
28267
28268 if (Swap) {
28269 CC = ISD::getSetCCInverse(CC, CmpOpVT);
28270 std::swap(N2C, N3C);
28271 }
28272
28273 // If the caller doesn't want us to simplify this into a zext of a compare,
28274 // don't do it.
28275 if (NotExtCompare && N2C->isOne())
28276 return SDValue();
28277
28278 SDValue Temp, SCC;
28279 // zext (setcc n0, n1)
28280 if (LegalTypes) {
28281 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
28282 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
28283 } else {
28284 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
28285 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
28286 }
28287
28288 AddToWorklist(SCC.getNode());
28289 AddToWorklist(Temp.getNode());
28290
28291 if (N2C->isOne())
28292 return Temp;
28293
28294 unsigned ShCt = N2C->getAPIntValue().logBase2();
28295 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
28296 return SDValue();
28297
28298 // shl setcc result by log2 n2c
28299 return DAG.getNode(
28300 ISD::SHL, DL, N2.getValueType(), Temp,
28301 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
28302 }
28303
28304 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
28305 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
28306 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
28307 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
28308 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
28309 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
28310 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
28311 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
28312 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
28313 SDValue ValueOnZero = N2;
28314 SDValue Count = N3;
28315 // If the condition is NE instead of E, swap the operands.
28316 if (CC == ISD::SETNE)
28317 std::swap(ValueOnZero, Count);
28318 // Check if the value on zero is a constant equal to the bits in the type.
28319 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
28320 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
28321 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
28322 // legal, combine to just cttz.
28323 if ((Count.getOpcode() == ISD::CTTZ ||
28324 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
28325 N0 == Count.getOperand(0) &&
28326 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
28327 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
28328 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
28329 // legal, combine to just ctlz.
28330 if ((Count.getOpcode() == ISD::CTLZ ||
28331 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
28332 N0 == Count.getOperand(0) &&
28333 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
28334 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
28335 }
28336 }
28337 }
28338
28339 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
28340 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
28341 if (!NotExtCompare && N1C && N2C && N3C &&
28342 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
28343 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
28344 (N1C->isZero() && CC == ISD::SETLT)) &&
28345 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
28346 SDValue ASR = DAG.getNode(
28347 ISD::SRA, DL, CmpOpVT, N0,
28348 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
28349 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
28350 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
28351 }
28352
28353 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28354 return S;
28355 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28356 return S;
28357 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
28358 return ABD;
28359
28360 return SDValue();
28361}
28362
28363/// This is a stub for TargetLowering::SimplifySetCC.
28364SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
28365 ISD::CondCode Cond, const SDLoc &DL,
28366 bool foldBooleans) {
28368 DagCombineInfo(DAG, Level, false, this);
28369 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
28370}
28371
28372/// Given an ISD::SDIV node expressing a divide by constant, return
28373/// a DAG expression to select that will generate the same value by multiplying
28374/// by a magic number.
28375/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28376SDValue DAGCombiner::BuildSDIV(SDNode *N) {
28377 // when optimising for minimum size, we don't want to expand a div to a mul
28378 // and a shift.
28380 return SDValue();
28381
28383 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28384 for (SDNode *N : Built)
28385 AddToWorklist(N);
28386 return S;
28387 }
28388
28389 return SDValue();
28390}
28391
28392/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
28393/// DAG expression that will generate the same value by right shifting.
28394SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
28395 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28396 if (!C)
28397 return SDValue();
28398
28399 // Avoid division by zero.
28400 if (C->isZero())
28401 return SDValue();
28402
28404 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
28405 for (SDNode *N : Built)
28406 AddToWorklist(N);
28407 return S;
28408 }
28409
28410 return SDValue();
28411}
28412
28413/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
28414/// expression that will generate the same value by multiplying by a magic
28415/// number.
28416/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28417SDValue DAGCombiner::BuildUDIV(SDNode *N) {
28418 // when optimising for minimum size, we don't want to expand a div to a mul
28419 // and a shift.
28421 return SDValue();
28422
28424 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28425 for (SDNode *N : Built)
28426 AddToWorklist(N);
28427 return S;
28428 }
28429
28430 return SDValue();
28431}
28432
28433/// Given an ISD::SREM node expressing a remainder by constant power of 2,
28434/// return a DAG expression that will generate the same value.
28435SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
28436 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28437 if (!C)
28438 return SDValue();
28439
28440 // Avoid division by zero.
28441 if (C->isZero())
28442 return SDValue();
28443
28445 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
28446 for (SDNode *N : Built)
28447 AddToWorklist(N);
28448 return S;
28449 }
28450
28451 return SDValue();
28452}
28453
28454// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
28455//
28456// Returns the node that represents `Log2(Op)`. This may create a new node. If
28457// we are unable to compute `Log2(Op)` its return `SDValue()`.
28458//
28459// All nodes will be created at `DL` and the output will be of type `VT`.
28460//
28461// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
28462// `AssumeNonZero` if this function should simply assume (not require proving
28463// `Op` is non-zero).
28465 SDValue Op, unsigned Depth,
28466 bool AssumeNonZero) {
28467 assert(VT.isInteger() && "Only integer types are supported!");
28468
28469 auto PeekThroughCastsAndTrunc = [](SDValue V) {
28470 while (true) {
28471 switch (V.getOpcode()) {
28472 case ISD::TRUNCATE:
28473 case ISD::ZERO_EXTEND:
28474 V = V.getOperand(0);
28475 break;
28476 default:
28477 return V;
28478 }
28479 }
28480 };
28481
28482 if (VT.isScalableVector())
28483 return SDValue();
28484
28485 Op = PeekThroughCastsAndTrunc(Op);
28486
28487 // Helper for determining whether a value is a power-2 constant scalar or a
28488 // vector of such elements.
28489 SmallVector<APInt> Pow2Constants;
28490 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
28491 if (C->isZero() || C->isOpaque())
28492 return false;
28493 // TODO: We may also be able to support negative powers of 2 here.
28494 if (C->getAPIntValue().isPowerOf2()) {
28495 Pow2Constants.emplace_back(C->getAPIntValue());
28496 return true;
28497 }
28498 return false;
28499 };
28500
28501 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
28502 if (!VT.isVector())
28503 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
28504 // We need to create a build vector
28505 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
28506 return DAG.getSplat(VT, DL,
28507 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
28508 VT.getScalarType()));
28509 SmallVector<SDValue> Log2Ops;
28510 for (const APInt &Pow2 : Pow2Constants)
28511 Log2Ops.emplace_back(
28512 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
28513 return DAG.getBuildVector(VT, DL, Log2Ops);
28514 }
28515
28516 if (Depth >= DAG.MaxRecursionDepth)
28517 return SDValue();
28518
28519 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
28520 ToCast = PeekThroughCastsAndTrunc(ToCast);
28521 EVT CurVT = ToCast.getValueType();
28522 if (NewVT == CurVT)
28523 return ToCast;
28524
28525 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
28526 return DAG.getBitcast(NewVT, ToCast);
28527
28528 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
28529 };
28530
28531 // log2(X << Y) -> log2(X) + Y
28532 if (Op.getOpcode() == ISD::SHL) {
28533 // 1 << Y and X nuw/nsw << Y are all non-zero.
28534 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
28535 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
28536 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
28537 Depth + 1, AssumeNonZero))
28538 return DAG.getNode(ISD::ADD, DL, VT, LogX,
28539 CastToVT(VT, Op.getOperand(1)));
28540 }
28541
28542 // c ? X : Y -> c ? Log2(X) : Log2(Y)
28543 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
28544 Op.hasOneUse()) {
28545 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
28546 Depth + 1, AssumeNonZero))
28547 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
28548 Depth + 1, AssumeNonZero))
28549 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
28550 }
28551
28552 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
28553 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
28554 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
28555 Op.hasOneUse()) {
28556 // Use AssumeNonZero as false here. Otherwise we can hit case where
28557 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
28558 if (SDValue LogX =
28559 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
28560 /*AssumeNonZero*/ false))
28561 if (SDValue LogY =
28562 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
28563 /*AssumeNonZero*/ false))
28564 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
28565 }
28566
28567 return SDValue();
28568}
28569
28570/// Determines the LogBase2 value for a non-null input value using the
28571/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
28572SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
28573 bool KnownNonZero, bool InexpensiveOnly,
28574 std::optional<EVT> OutVT) {
28575 EVT VT = OutVT ? *OutVT : V.getValueType();
28576 SDValue InexpensiveLogBase2 =
28577 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
28578 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
28579 return InexpensiveLogBase2;
28580
28581 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
28582 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28583 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
28584 return LogBase2;
28585}
28586
28587/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28588/// For the reciprocal, we need to find the zero of the function:
28589/// F(X) = 1/X - A [which has a zero at X = 1/A]
28590/// =>
28591/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28592/// does not require additional intermediate precision]
28593/// For the last iteration, put numerator N into it to gain more precision:
28594/// Result = N X_i + X_i (N - N A X_i)
28595SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28596 SDNodeFlags Flags) {
28597 if (LegalDAG)
28598 return SDValue();
28599
28600 // TODO: Handle extended types?
28601 EVT VT = Op.getValueType();
28602 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28603 VT.getScalarType() != MVT::f64)
28604 return SDValue();
28605
28606 // If estimates are explicitly disabled for this function, we're done.
28608 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28609 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28610 return SDValue();
28611
28612 // Estimates may be explicitly enabled for this type with a custom number of
28613 // refinement steps.
28614 int Iterations = TLI.getDivRefinementSteps(VT, MF);
28615 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28616 AddToWorklist(Est.getNode());
28617
28618 SDLoc DL(Op);
28619 if (Iterations) {
28620 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28621
28622 // Newton iterations: Est = Est + Est (N - Arg * Est)
28623 // If this is the last iteration, also multiply by the numerator.
28624 for (int i = 0; i < Iterations; ++i) {
28625 SDValue MulEst = Est;
28626
28627 if (i == Iterations - 1) {
28628 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28629 AddToWorklist(MulEst.getNode());
28630 }
28631
28632 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28633 AddToWorklist(NewEst.getNode());
28634
28635 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28636 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28637 AddToWorklist(NewEst.getNode());
28638
28639 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28640 AddToWorklist(NewEst.getNode());
28641
28642 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28643 AddToWorklist(Est.getNode());
28644 }
28645 } else {
28646 // If no iterations are available, multiply with N.
28647 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28648 AddToWorklist(Est.getNode());
28649 }
28650
28651 return Est;
28652 }
28653
28654 return SDValue();
28655}
28656
28657/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28658/// For the reciprocal sqrt, we need to find the zero of the function:
28659/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28660/// =>
28661/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28662/// As a result, we precompute A/2 prior to the iteration loop.
28663SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28664 unsigned Iterations,
28665 SDNodeFlags Flags, bool Reciprocal) {
28666 EVT VT = Arg.getValueType();
28667 SDLoc DL(Arg);
28668 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28669
28670 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28671 // this entire sequence requires only one FP constant.
28672 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28673 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28674
28675 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28676 for (unsigned i = 0; i < Iterations; ++i) {
28677 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28678 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28679 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28680 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28681 }
28682
28683 // If non-reciprocal square root is requested, multiply the result by Arg.
28684 if (!Reciprocal)
28685 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28686
28687 return Est;
28688}
28689
28690/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28691/// For the reciprocal sqrt, we need to find the zero of the function:
28692/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28693/// =>
28694/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28695SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28696 unsigned Iterations,
28697 SDNodeFlags Flags, bool Reciprocal) {
28698 EVT VT = Arg.getValueType();
28699 SDLoc DL(Arg);
28700 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28701 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28702
28703 // This routine must enter the loop below to work correctly
28704 // when (Reciprocal == false).
28705 assert(Iterations > 0);
28706
28707 // Newton iterations for reciprocal square root:
28708 // E = (E * -0.5) * ((A * E) * E + -3.0)
28709 for (unsigned i = 0; i < Iterations; ++i) {
28710 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28711 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28712 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28713
28714 // When calculating a square root at the last iteration build:
28715 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28716 // (notice a common subexpression)
28717 SDValue LHS;
28718 if (Reciprocal || (i + 1) < Iterations) {
28719 // RSQRT: LHS = (E * -0.5)
28720 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28721 } else {
28722 // SQRT: LHS = (A * E) * -0.5
28723 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28724 }
28725
28726 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28727 }
28728
28729 return Est;
28730}
28731
28732/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28733/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28734/// Op can be zero.
28735SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28736 bool Reciprocal) {
28737 if (LegalDAG)
28738 return SDValue();
28739
28740 // TODO: Handle extended types?
28741 EVT VT = Op.getValueType();
28742 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28743 VT.getScalarType() != MVT::f64)
28744 return SDValue();
28745
28746 // If estimates are explicitly disabled for this function, we're done.
28748 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28749 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28750 return SDValue();
28751
28752 // Estimates may be explicitly enabled for this type with a custom number of
28753 // refinement steps.
28754 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28755
28756 bool UseOneConstNR = false;
28757 if (SDValue Est =
28758 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28759 Reciprocal)) {
28760 AddToWorklist(Est.getNode());
28761
28762 if (Iterations > 0)
28763 Est = UseOneConstNR
28764 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28765 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28766 if (!Reciprocal) {
28767 SDLoc DL(Op);
28768 // Try the target specific test first.
28769 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28770
28771 // The estimate is now completely wrong if the input was exactly 0.0 or
28772 // possibly a denormal. Force the answer to 0.0 or value provided by
28773 // target for those cases.
28774 Est = DAG.getNode(
28775 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28776 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28777 }
28778 return Est;
28779 }
28780
28781 return SDValue();
28782}
28783
28784SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28785 return buildSqrtEstimateImpl(Op, Flags, true);
28786}
28787
28788SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28789 return buildSqrtEstimateImpl(Op, Flags, false);
28790}
28791
28792/// Return true if there is any possibility that the two addresses overlap.
28793bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28794
28795 struct MemUseCharacteristics {
28796 bool IsVolatile;
28797 bool IsAtomic;
28799 int64_t Offset;
28800 LocationSize NumBytes;
28801 MachineMemOperand *MMO;
28802 };
28803
28804 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28805 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28806 int64_t Offset = 0;
28807 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28808 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28809 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28810 ? -1 * C->getSExtValue()
28811 : 0;
28812 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28813 return {LSN->isVolatile(), LSN->isAtomic(),
28814 LSN->getBasePtr(), Offset /*base offset*/,
28815 LocationSize::precise(Size), LSN->getMemOperand()};
28816 }
28817 if (const auto *LN = cast<LifetimeSDNode>(N))
28818 return {false /*isVolatile*/,
28819 /*isAtomic*/ false,
28820 LN->getOperand(1),
28821 (LN->hasOffset()) ? LN->getOffset() : 0,
28822 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28824 (MachineMemOperand *)nullptr};
28825 // Default.
28826 return {false /*isvolatile*/,
28827 /*isAtomic*/ false,
28828 SDValue(),
28829 (int64_t)0 /*offset*/,
28831 (MachineMemOperand *)nullptr};
28832 };
28833
28834 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28835 MUC1 = getCharacteristics(Op1);
28836
28837 // If they are to the same address, then they must be aliases.
28838 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28839 MUC0.Offset == MUC1.Offset)
28840 return true;
28841
28842 // If they are both volatile then they cannot be reordered.
28843 if (MUC0.IsVolatile && MUC1.IsVolatile)
28844 return true;
28845
28846 // Be conservative about atomics for the moment
28847 // TODO: This is way overconservative for unordered atomics (see D66309)
28848 if (MUC0.IsAtomic && MUC1.IsAtomic)
28849 return true;
28850
28851 if (MUC0.MMO && MUC1.MMO) {
28852 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28853 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28854 return false;
28855 }
28856
28857 // If NumBytes is scalable and offset is not 0, conservatively return may
28858 // alias
28859 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28860 MUC0.Offset != 0) ||
28861 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28862 MUC1.Offset != 0))
28863 return true;
28864 // Try to prove that there is aliasing, or that there is no aliasing. Either
28865 // way, we can return now. If nothing can be proved, proceed with more tests.
28866 bool IsAlias;
28867 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28868 DAG, IsAlias))
28869 return IsAlias;
28870
28871 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28872 // either are not known.
28873 if (!MUC0.MMO || !MUC1.MMO)
28874 return true;
28875
28876 // If one operation reads from invariant memory, and the other may store, they
28877 // cannot alias. These should really be checking the equivalent of mayWrite,
28878 // but it only matters for memory nodes other than load /store.
28879 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28880 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28881 return false;
28882
28883 // If we know required SrcValue1 and SrcValue2 have relatively large
28884 // alignment compared to the size and offset of the access, we may be able
28885 // to prove they do not alias. This check is conservative for now to catch
28886 // cases created by splitting vector types, it only works when the offsets are
28887 // multiples of the size of the data.
28888 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28889 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28890 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28891 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28892 LocationSize Size0 = MUC0.NumBytes;
28893 LocationSize Size1 = MUC1.NumBytes;
28894
28895 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28896 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28897 !Size1.isScalable() && Size0 == Size1 &&
28898 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28899 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28900 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28901 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28902 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28903
28904 // There is no overlap between these relatively aligned accesses of
28905 // similar size. Return no alias.
28906 if ((OffAlign0 + static_cast<int64_t>(
28907 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28908 (OffAlign1 + static_cast<int64_t>(
28909 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28910 return false;
28911 }
28912
28915 : DAG.getSubtarget().useAA();
28916#ifndef NDEBUG
28917 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28919 UseAA = false;
28920#endif
28921
28922 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28923 Size0.hasValue() && Size1.hasValue() &&
28924 // Can't represent a scalable size + fixed offset in LocationSize
28925 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28926 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28927 // Use alias analysis information.
28928 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28929 int64_t Overlap0 =
28930 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28931 int64_t Overlap1 =
28932 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28933 LocationSize Loc0 =
28934 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28935 LocationSize Loc1 =
28936 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28937 if (BatchAA->isNoAlias(
28938 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28939 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28940 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28941 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28942 return false;
28943 }
28944
28945 // Otherwise we have to assume they alias.
28946 return true;
28947}
28948
28949/// Walk up chain skipping non-aliasing memory nodes,
28950/// looking for aliasing nodes and adding them to the Aliases vector.
28951void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28952 SmallVectorImpl<SDValue> &Aliases) {
28953 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28954 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28955
28956 // Get alias information for node.
28957 // TODO: relax aliasing for unordered atomics (see D66309)
28958 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28959
28960 // Starting off.
28961 Chains.push_back(OriginalChain);
28962 unsigned Depth = 0;
28963
28964 // Attempt to improve chain by a single step
28965 auto ImproveChain = [&](SDValue &C) -> bool {
28966 switch (C.getOpcode()) {
28967 case ISD::EntryToken:
28968 // No need to mark EntryToken.
28969 C = SDValue();
28970 return true;
28971 case ISD::LOAD:
28972 case ISD::STORE: {
28973 // Get alias information for C.
28974 // TODO: Relax aliasing for unordered atomics (see D66309)
28975 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28976 cast<LSBaseSDNode>(C.getNode())->isSimple();
28977 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28978 // Look further up the chain.
28979 C = C.getOperand(0);
28980 return true;
28981 }
28982 // Alias, so stop here.
28983 return false;
28984 }
28985
28986 case ISD::CopyFromReg:
28987 // Always forward past CopyFromReg.
28988 C = C.getOperand(0);
28989 return true;
28990
28992 case ISD::LIFETIME_END: {
28993 // We can forward past any lifetime start/end that can be proven not to
28994 // alias the memory access.
28995 if (!mayAlias(N, C.getNode())) {
28996 // Look further up the chain.
28997 C = C.getOperand(0);
28998 return true;
28999 }
29000 return false;
29001 }
29002 default:
29003 return false;
29004 }
29005 };
29006
29007 // Look at each chain and determine if it is an alias. If so, add it to the
29008 // aliases list. If not, then continue up the chain looking for the next
29009 // candidate.
29010 while (!Chains.empty()) {
29011 SDValue Chain = Chains.pop_back_val();
29012
29013 // Don't bother if we've seen Chain before.
29014 if (!Visited.insert(Chain.getNode()).second)
29015 continue;
29016
29017 // For TokenFactor nodes, look at each operand and only continue up the
29018 // chain until we reach the depth limit.
29019 //
29020 // FIXME: The depth check could be made to return the last non-aliasing
29021 // chain we found before we hit a tokenfactor rather than the original
29022 // chain.
29023 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29024 Aliases.clear();
29025 Aliases.push_back(OriginalChain);
29026 return;
29027 }
29028
29029 if (Chain.getOpcode() == ISD::TokenFactor) {
29030 // We have to check each of the operands of the token factor for "small"
29031 // token factors, so we queue them up. Adding the operands to the queue
29032 // (stack) in reverse order maintains the original order and increases the
29033 // likelihood that getNode will find a matching token factor (CSE.)
29034 if (Chain.getNumOperands() > 16) {
29035 Aliases.push_back(Chain);
29036 continue;
29037 }
29038 for (unsigned n = Chain.getNumOperands(); n;)
29039 Chains.push_back(Chain.getOperand(--n));
29040 ++Depth;
29041 continue;
29042 }
29043 // Everything else
29044 if (ImproveChain(Chain)) {
29045 // Updated Chain Found, Consider new chain if one exists.
29046 if (Chain.getNode())
29047 Chains.push_back(Chain);
29048 ++Depth;
29049 continue;
29050 }
29051 // No Improved Chain Possible, treat as Alias.
29052 Aliases.push_back(Chain);
29053 }
29054}
29055
29056/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29057/// (aliasing node.)
29058SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
29059 if (OptLevel == CodeGenOptLevel::None)
29060 return OldChain;
29061
29062 // Ops for replacing token factor.
29064
29065 // Accumulate all the aliases to this node.
29066 GatherAllAliases(N, OldChain, Aliases);
29067
29068 // If no operands then chain to entry token.
29069 if (Aliases.empty())
29070 return DAG.getEntryNode();
29071
29072 // If a single operand then chain to it. We don't need to revisit it.
29073 if (Aliases.size() == 1)
29074 return Aliases[0];
29075
29076 // Construct a custom tailored token factor.
29077 return DAG.getTokenFactor(SDLoc(N), Aliases);
29078}
29079
29080// This function tries to collect a bunch of potentially interesting
29081// nodes to improve the chains of, all at once. This might seem
29082// redundant, as this function gets called when visiting every store
29083// node, so why not let the work be done on each store as it's visited?
29084//
29085// I believe this is mainly important because mergeConsecutiveStores
29086// is unable to deal with merging stores of different sizes, so unless
29087// we improve the chains of all the potential candidates up-front
29088// before running mergeConsecutiveStores, it might only see some of
29089// the nodes that will eventually be candidates, and then not be able
29090// to go from a partially-merged state to the desired final
29091// fully-merged state.
29092
29093bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
29094 SmallVector<StoreSDNode *, 8> ChainedStores;
29095 StoreSDNode *STChain = St;
29096 // Intervals records which offsets from BaseIndex have been covered. In
29097 // the common case, every store writes to the immediately previous address
29098 // space and thus merged with the previous interval at insertion time.
29099
29100 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
29102 IMap::Allocator A;
29103 IMap Intervals(A);
29104
29105 // This holds the base pointer, index, and the offset in bytes from the base
29106 // pointer.
29108
29109 // We must have a base and an offset.
29110 if (!BasePtr.getBase().getNode())
29111 return false;
29112
29113 // Do not handle stores to undef base pointers.
29114 if (BasePtr.getBase().isUndef())
29115 return false;
29116
29117 // Do not handle stores to opaque types
29118 if (St->getMemoryVT().isZeroSized())
29119 return false;
29120
29121 // BaseIndexOffset assumes that offsets are fixed-size, which
29122 // is not valid for scalable vectors where the offsets are
29123 // scaled by `vscale`, so bail out early.
29124 if (St->getMemoryVT().isScalableVT())
29125 return false;
29126
29127 // Add ST's interval.
29128 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
29129 std::monostate{});
29130
29131 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
29132 if (Chain->getMemoryVT().isScalableVector())
29133 return false;
29134
29135 // If the chain has more than one use, then we can't reorder the mem ops.
29136 if (!SDValue(Chain, 0)->hasOneUse())
29137 break;
29138 // TODO: Relax for unordered atomics (see D66309)
29139 if (!Chain->isSimple() || Chain->isIndexed())
29140 break;
29141
29142 // Find the base pointer and offset for this memory node.
29143 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
29144 // Check that the base pointer is the same as the original one.
29145 int64_t Offset;
29146 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
29147 break;
29148 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
29149 // Make sure we don't overlap with other intervals by checking the ones to
29150 // the left or right before inserting.
29151 auto I = Intervals.find(Offset);
29152 // If there's a next interval, we should end before it.
29153 if (I != Intervals.end() && I.start() < (Offset + Length))
29154 break;
29155 // If there's a previous interval, we should start after it.
29156 if (I != Intervals.begin() && (--I).stop() <= Offset)
29157 break;
29158 Intervals.insert(Offset, Offset + Length, std::monostate{});
29159
29160 ChainedStores.push_back(Chain);
29161 STChain = Chain;
29162 }
29163
29164 // If we didn't find a chained store, exit.
29165 if (ChainedStores.empty())
29166 return false;
29167
29168 // Improve all chained stores (St and ChainedStores members) starting from
29169 // where the store chain ended and return single TokenFactor.
29170 SDValue NewChain = STChain->getChain();
29172 for (unsigned I = ChainedStores.size(); I;) {
29173 StoreSDNode *S = ChainedStores[--I];
29174 SDValue BetterChain = FindBetterChain(S, NewChain);
29175 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
29176 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
29177 TFOps.push_back(SDValue(S, 0));
29178 ChainedStores[I] = S;
29179 }
29180
29181 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
29182 SDValue BetterChain = FindBetterChain(St, NewChain);
29183 SDValue NewST;
29184 if (St->isTruncatingStore())
29185 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
29186 St->getBasePtr(), St->getMemoryVT(),
29187 St->getMemOperand());
29188 else
29189 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
29190 St->getBasePtr(), St->getMemOperand());
29191
29192 TFOps.push_back(NewST);
29193
29194 // If we improved every element of TFOps, then we've lost the dependence on
29195 // NewChain to successors of St and we need to add it back to TFOps. Do so at
29196 // the beginning to keep relative order consistent with FindBetterChains.
29197 auto hasImprovedChain = [&](SDValue ST) -> bool {
29198 return ST->getOperand(0) != NewChain;
29199 };
29200 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
29201 if (AddNewChain)
29202 TFOps.insert(TFOps.begin(), NewChain);
29203
29204 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
29205 CombineTo(St, TF);
29206
29207 // Add TF and its operands to the worklist.
29208 AddToWorklist(TF.getNode());
29209 for (const SDValue &Op : TF->ops())
29210 AddToWorklist(Op.getNode());
29211 AddToWorklist(STChain);
29212 return true;
29213}
29214
29215bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
29216 if (OptLevel == CodeGenOptLevel::None)
29217 return false;
29218
29220
29221 // We must have a base and an offset.
29222 if (!BasePtr.getBase().getNode())
29223 return false;
29224
29225 // Do not handle stores to undef base pointers.
29226 if (BasePtr.getBase().isUndef())
29227 return false;
29228
29229 // Directly improve a chain of disjoint stores starting at St.
29230 if (parallelizeChainedStores(St))
29231 return true;
29232
29233 // Improve St's Chain..
29234 SDValue BetterChain = FindBetterChain(St, St->getChain());
29235 if (St->getChain() != BetterChain) {
29236 replaceStoreChain(St, BetterChain);
29237 return true;
29238 }
29239 return false;
29240}
29241
29242/// This is the entry point for the file.
29244 CodeGenOptLevel OptLevel) {
29245 /// This is the main entry point to this class.
29246 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
29247}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static const LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:500
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:1122
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1210
bool isNegative() const
Definition: APFloat.h:1445
bool isNormal() const
Definition: APFloat.h:1449
bool isDenormal() const
Definition: APFloat.h:1446
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1428
const fltSemantics & getSemantics() const
Definition: APFloat.h:1453
bool isNaN() const
Definition: APFloat.h:1443
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:1090
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
bool isLargest() const
Definition: APFloat.h:1461
bool isIEEE() const
Definition: APFloat.h:1463
bool isInfinity() const
Definition: APFloat.h:1442
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1945
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1732
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:617
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1773
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:466
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
int32_t exactLogBase2() const
Definition: APInt.h:1761
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1909
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned countLeadingZeros() const
Definition: APInt.h:1585
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1434
unsigned logBase2() const
Definition: APInt.h:1739
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1934
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:207
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:422
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:415
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:371
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:983
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:577
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:499
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static unsigned getHasPredecessorMaxSteps()
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:857
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:891
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:569
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:504
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:874
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:586
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
std::optional< bool > isBoolConstant(SDValue N, bool AllowTruncation=false) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:907
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:937
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
int getNumOccurrences() const
Definition: CommandLine.h:399
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2217
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2222
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2227
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2232
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:502
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ TRUNCATE_SSAT_U
Definition: ISDOpcodes.h:834
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:788
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ TargetConstantFP
Definition: ISDOpcodes.h:165
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ TargetFrameIndex
Definition: ISDOpcodes.h:172
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1377
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1262
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1078
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:267
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1433
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Definition: ISDOpcodes.h:1481
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1211
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1083
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:320
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1576
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1686
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1661
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1681
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1498
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1572
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1572
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1643
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1559
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1655
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:903
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4771
constexpr double e
Definition: MathExtras.h:48
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:355
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1565
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:518
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:298
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:348
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1503
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2087
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:384
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:764
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:323
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:319
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:318
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:329
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:274
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:187
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:251
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:263
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:320
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:53
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:240
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:82
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:59
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
void setDisjoint(bool b)
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:317
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...