LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG. For fast lookup and
174 /// deduplication, the index of the node in this vector is stored in the
175 /// node in SDNode::CombinerWorklistIndex.
177
178 /// This records all nodes attempted to be added to the worklist since we
179 /// considered a new worklist entry. As we keep do not add duplicate nodes
180 /// in the worklist, this is different from the tail of the worklist.
182
183 /// Map from candidate StoreNode to the pair of RootNode and count.
184 /// The count is used to track how many times we have seen the StoreNode
185 /// with the same RootNode bail out in dependence check. If we have seen
186 /// the bail out for the same pair many times over a limit, we won't
187 /// consider the StoreNode with the same RootNode as store merging
188 /// candidate again.
190
191 // AA - Used for DAG load/store alias analysis.
192 AliasAnalysis *AA;
193
194 /// When an instruction is simplified, add all users of the instruction to
195 /// the work lists because they might get more simplified now.
196 void AddUsersToWorklist(SDNode *N) {
197 for (SDNode *Node : N->uses())
198 AddToWorklist(Node);
199 }
200
201 /// Convenient shorthand to add a node and all of its user to the worklist.
202 void AddToWorklistWithUsers(SDNode *N) {
203 AddUsersToWorklist(N);
204 AddToWorklist(N);
205 }
206
207 // Prune potentially dangling nodes. This is called after
208 // any visit to a node, but should also be called during a visit after any
209 // failed combine which may have created a DAG node.
210 void clearAddedDanglingWorklistEntries() {
211 // Check any nodes added to the worklist to see if they are prunable.
212 while (!PruningList.empty()) {
213 auto *N = PruningList.pop_back_val();
214 if (N->use_empty())
215 recursivelyDeleteUnusedNodes(N);
216 }
217 }
218
219 SDNode *getNextWorklistEntry() {
220 // Before we do any work, remove nodes that are not in use.
221 clearAddedDanglingWorklistEntries();
222 SDNode *N = nullptr;
223 // The Worklist holds the SDNodes in order, but it may contain null
224 // entries.
225 while (!N && !Worklist.empty()) {
226 N = Worklist.pop_back_val();
227 }
228
229 if (N) {
230 assert(N->getCombinerWorklistIndex() >= 0 &&
231 "Found a worklist entry without a corresponding map entry!");
232 // Set to -2 to indicate that we combined the node.
233 N->setCombinerWorklistIndex(-2);
234 }
235 return N;
236 }
237
238 /// Call the node-specific routine that folds each particular type of node.
239 SDValue visit(SDNode *N);
240
241 public:
242 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
243 : DAG(D), TLI(D.getTargetLoweringInfo()),
244 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
245 ForCodeSize = DAG.shouldOptForSize();
246 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
247
248 MaximumLegalStoreInBits = 0;
249 // We use the minimum store size here, since that's all we can guarantee
250 // for the scalable vector types.
251 for (MVT VT : MVT::all_valuetypes())
252 if (EVT(VT).isSimple() && VT != MVT::Other &&
253 TLI.isTypeLegal(EVT(VT)) &&
254 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
255 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
256 }
257
258 void ConsiderForPruning(SDNode *N) {
259 // Mark this for potential pruning.
260 PruningList.insert(N);
261 }
262
263 /// Add to the worklist making sure its instance is at the back (next to be
264 /// processed.)
265 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
266 bool SkipIfCombinedBefore = false) {
267 assert(N->getOpcode() != ISD::DELETED_NODE &&
268 "Deleted Node added to Worklist");
269
270 // Skip handle nodes as they can't usefully be combined and confuse the
271 // zero-use deletion strategy.
272 if (N->getOpcode() == ISD::HANDLENODE)
273 return;
274
275 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
276 return;
277
278 if (IsCandidateForPruning)
279 ConsiderForPruning(N);
280
281 if (N->getCombinerWorklistIndex() < 0) {
282 N->setCombinerWorklistIndex(Worklist.size());
283 Worklist.push_back(N);
284 }
285 }
286
287 /// Remove all instances of N from the worklist.
288 void removeFromWorklist(SDNode *N) {
289 PruningList.remove(N);
290 StoreRootCountMap.erase(N);
291
292 int WorklistIndex = N->getCombinerWorklistIndex();
293 // If not in the worklist, the index might be -1 or -2 (was combined
294 // before). As the node gets deleted anyway, there's no need to update
295 // the index.
296 if (WorklistIndex < 0)
297 return; // Not in the worklist.
298
299 // Null out the entry rather than erasing it to avoid a linear operation.
300 Worklist[WorklistIndex] = nullptr;
301 N->setCombinerWorklistIndex(-1);
302 }
303
304 void deleteAndRecombine(SDNode *N);
305 bool recursivelyDeleteUnusedNodes(SDNode *N);
306
307 /// Replaces all uses of the results of one DAG node with new values.
308 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
309 bool AddTo = true);
310
311 /// Replaces all uses of the results of one DAG node with new values.
312 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
313 return CombineTo(N, &Res, 1, AddTo);
314 }
315
316 /// Replaces all uses of the results of one DAG node with new values.
317 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
318 bool AddTo = true) {
319 SDValue To[] = { Res0, Res1 };
320 return CombineTo(N, To, 2, AddTo);
321 }
322
323 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
324
325 private:
326 unsigned MaximumLegalStoreInBits;
327
328 /// Check the specified integer node value to see if it can be simplified or
329 /// if things it uses can be simplified by bit propagation.
330 /// If so, return true.
331 bool SimplifyDemandedBits(SDValue Op) {
332 unsigned BitWidth = Op.getScalarValueSizeInBits();
334 return SimplifyDemandedBits(Op, DemandedBits);
335 }
336
337 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
338 EVT VT = Op.getValueType();
339 APInt DemandedElts = VT.isFixedLengthVector()
341 : APInt(1, 1);
342 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
343 }
344
345 /// Check the specified vector node value to see if it can be simplified or
346 /// if things it uses can be simplified as it only uses some of the
347 /// elements. If so, return true.
348 bool SimplifyDemandedVectorElts(SDValue Op) {
349 // TODO: For now just pretend it cannot be simplified.
350 if (Op.getValueType().isScalableVector())
351 return false;
352
353 unsigned NumElts = Op.getValueType().getVectorNumElements();
354 APInt DemandedElts = APInt::getAllOnes(NumElts);
355 return SimplifyDemandedVectorElts(Op, DemandedElts);
356 }
357
358 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
359 const APInt &DemandedElts,
360 bool AssumeSingleUse = false);
361 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
362 bool AssumeSingleUse = false);
363
364 bool CombineToPreIndexedLoadStore(SDNode *N);
365 bool CombineToPostIndexedLoadStore(SDNode *N);
366 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
367 bool SliceUpLoad(SDNode *N);
368
369 // Looks up the chain to find a unique (unaliased) store feeding the passed
370 // load. If no such store is found, returns a nullptr.
371 // Note: This will look past a CALLSEQ_START if the load is chained to it so
372 // so that it can find stack stores for byval params.
373 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
374 // Scalars have size 0 to distinguish from singleton vectors.
375 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
376 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
377 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
378
379 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
380 /// load.
381 ///
382 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
383 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
384 /// \param EltNo index of the vector element to load.
385 /// \param OriginalLoad load that EVE came from to be replaced.
386 /// \returns EVE on success SDValue() on failure.
387 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
388 SDValue EltNo,
389 LoadSDNode *OriginalLoad);
390 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
391 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
392 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
393 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
394 SDValue PromoteIntBinOp(SDValue Op);
395 SDValue PromoteIntShiftOp(SDValue Op);
396 SDValue PromoteExtend(SDValue Op);
397 bool PromoteLoad(SDValue Op);
398
399 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
400 SDValue RHS, SDValue True, SDValue False,
402
403 /// Call the node-specific routine that knows how to fold each
404 /// particular type of node. If that doesn't do anything, try the
405 /// target-specific DAG combines.
406 SDValue combine(SDNode *N);
407
408 // Visitation implementation - Implement dag node combining for different
409 // node types. The semantics are as follows:
410 // Return Value:
411 // SDValue.getNode() == 0 - No change was made
412 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
413 // otherwise - N should be replaced by the returned Operand.
414 //
415 SDValue visitTokenFactor(SDNode *N);
416 SDValue visitMERGE_VALUES(SDNode *N);
417 SDValue visitADD(SDNode *N);
418 SDValue visitADDLike(SDNode *N);
419 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
420 SDValue visitSUB(SDNode *N);
421 SDValue visitADDSAT(SDNode *N);
422 SDValue visitSUBSAT(SDNode *N);
423 SDValue visitADDC(SDNode *N);
424 SDValue visitADDO(SDNode *N);
425 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
426 SDValue visitSUBC(SDNode *N);
427 SDValue visitSUBO(SDNode *N);
428 SDValue visitADDE(SDNode *N);
429 SDValue visitUADDO_CARRY(SDNode *N);
430 SDValue visitSADDO_CARRY(SDNode *N);
431 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
432 SDNode *N);
433 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
434 SDNode *N);
435 SDValue visitSUBE(SDNode *N);
436 SDValue visitUSUBO_CARRY(SDNode *N);
437 SDValue visitSSUBO_CARRY(SDNode *N);
438 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
439 SDValue visitMULFIX(SDNode *N);
440 SDValue useDivRem(SDNode *N);
441 SDValue visitSDIV(SDNode *N);
442 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
443 SDValue visitUDIV(SDNode *N);
444 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
445 SDValue visitREM(SDNode *N);
446 SDValue visitMULHU(SDNode *N);
447 SDValue visitMULHS(SDNode *N);
448 SDValue visitAVG(SDNode *N);
449 SDValue visitABD(SDNode *N);
450 SDValue visitSMUL_LOHI(SDNode *N);
451 SDValue visitUMUL_LOHI(SDNode *N);
452 SDValue visitMULO(SDNode *N);
453 SDValue visitIMINMAX(SDNode *N);
454 SDValue visitAND(SDNode *N);
455 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitOR(SDNode *N);
457 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
458 SDValue visitXOR(SDNode *N);
459 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
460 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
461 SDValue visitSHL(SDNode *N);
462 SDValue visitSRA(SDNode *N);
463 SDValue visitSRL(SDNode *N);
464 SDValue visitFunnelShift(SDNode *N);
465 SDValue visitSHLSAT(SDNode *N);
466 SDValue visitRotate(SDNode *N);
467 SDValue visitABS(SDNode *N);
468 SDValue visitBSWAP(SDNode *N);
469 SDValue visitBITREVERSE(SDNode *N);
470 SDValue visitCTLZ(SDNode *N);
471 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
472 SDValue visitCTTZ(SDNode *N);
473 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
474 SDValue visitCTPOP(SDNode *N);
475 SDValue visitSELECT(SDNode *N);
476 SDValue visitVSELECT(SDNode *N);
477 SDValue visitVP_SELECT(SDNode *N);
478 SDValue visitSELECT_CC(SDNode *N);
479 SDValue visitSETCC(SDNode *N);
480 SDValue visitSETCCCARRY(SDNode *N);
481 SDValue visitSIGN_EXTEND(SDNode *N);
482 SDValue visitZERO_EXTEND(SDNode *N);
483 SDValue visitANY_EXTEND(SDNode *N);
484 SDValue visitAssertExt(SDNode *N);
485 SDValue visitAssertAlign(SDNode *N);
486 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
487 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
488 SDValue visitTRUNCATE(SDNode *N);
489 SDValue visitBITCAST(SDNode *N);
490 SDValue visitFREEZE(SDNode *N);
491 SDValue visitBUILD_PAIR(SDNode *N);
492 SDValue visitFADD(SDNode *N);
493 SDValue visitVP_FADD(SDNode *N);
494 SDValue visitVP_FSUB(SDNode *N);
495 SDValue visitSTRICT_FADD(SDNode *N);
496 SDValue visitFSUB(SDNode *N);
497 SDValue visitFMUL(SDNode *N);
498 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
499 SDValue visitFMAD(SDNode *N);
500 SDValue visitFDIV(SDNode *N);
501 SDValue visitFREM(SDNode *N);
502 SDValue visitFSQRT(SDNode *N);
503 SDValue visitFCOPYSIGN(SDNode *N);
504 SDValue visitFPOW(SDNode *N);
505 SDValue visitSINT_TO_FP(SDNode *N);
506 SDValue visitUINT_TO_FP(SDNode *N);
507 SDValue visitFP_TO_SINT(SDNode *N);
508 SDValue visitFP_TO_UINT(SDNode *N);
509 SDValue visitXRINT(SDNode *N);
510 SDValue visitFP_ROUND(SDNode *N);
511 SDValue visitFP_EXTEND(SDNode *N);
512 SDValue visitFNEG(SDNode *N);
513 SDValue visitFABS(SDNode *N);
514 SDValue visitFCEIL(SDNode *N);
515 SDValue visitFTRUNC(SDNode *N);
516 SDValue visitFFREXP(SDNode *N);
517 SDValue visitFFLOOR(SDNode *N);
518 SDValue visitFMinMax(SDNode *N);
519 SDValue visitBRCOND(SDNode *N);
520 SDValue visitBR_CC(SDNode *N);
521 SDValue visitLOAD(SDNode *N);
522
523 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
524 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
525 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
526
527 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
528
529 SDValue visitSTORE(SDNode *N);
530 SDValue visitATOMIC_STORE(SDNode *N);
531 SDValue visitLIFETIME_END(SDNode *N);
532 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
533 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
534 SDValue visitBUILD_VECTOR(SDNode *N);
535 SDValue visitCONCAT_VECTORS(SDNode *N);
536 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
537 SDValue visitVECTOR_SHUFFLE(SDNode *N);
538 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
539 SDValue visitINSERT_SUBVECTOR(SDNode *N);
540 SDValue visitMLOAD(SDNode *N);
541 SDValue visitMSTORE(SDNode *N);
542 SDValue visitMGATHER(SDNode *N);
543 SDValue visitMSCATTER(SDNode *N);
544 SDValue visitVPGATHER(SDNode *N);
545 SDValue visitVPSCATTER(SDNode *N);
546 SDValue visitVP_STRIDED_LOAD(SDNode *N);
547 SDValue visitVP_STRIDED_STORE(SDNode *N);
548 SDValue visitFP_TO_FP16(SDNode *N);
549 SDValue visitFP16_TO_FP(SDNode *N);
550 SDValue visitFP_TO_BF16(SDNode *N);
551 SDValue visitBF16_TO_FP(SDNode *N);
552 SDValue visitVECREDUCE(SDNode *N);
553 SDValue visitVPOp(SDNode *N);
554 SDValue visitGET_FPENV_MEM(SDNode *N);
555 SDValue visitSET_FPENV_MEM(SDNode *N);
556
557 template <class MatchContextClass>
558 SDValue visitFADDForFMACombine(SDNode *N);
559 template <class MatchContextClass>
560 SDValue visitFSUBForFMACombine(SDNode *N);
561 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
562
563 SDValue XformToShuffleWithZero(SDNode *N);
564 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
565 const SDLoc &DL,
566 SDNode *N,
567 SDValue N0,
568 SDValue N1);
569 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
570 SDValue N1, SDNodeFlags Flags);
571 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
572 SDValue N1, SDNodeFlags Flags);
573 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
574 EVT VT, SDValue N0, SDValue N1,
575 SDNodeFlags Flags = SDNodeFlags());
576
577 SDValue visitShiftByConstant(SDNode *N);
578
579 SDValue foldSelectOfConstants(SDNode *N);
580 SDValue foldVSelectOfConstants(SDNode *N);
581 SDValue foldBinOpIntoSelect(SDNode *BO);
582 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
583 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
584 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
585 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
587 bool NotExtCompare = false);
588 SDValue convertSelectOfFPConstantsToLoadOffset(
589 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
591 SDValue foldSignChangeInBitcast(SDNode *N);
592 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
594 SDValue foldSelectOfBinops(SDNode *N);
595 SDValue foldSextSetcc(SDNode *N);
596 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
597 const SDLoc &DL);
598 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
599 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
600 SDValue unfoldMaskedMerge(SDNode *N);
601 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
602 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
603 const SDLoc &DL, bool foldBooleans);
604 SDValue rebuildSetCC(SDValue N);
605
606 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
607 SDValue &CC, bool MatchStrict = false) const;
608 bool isOneUseSetCC(SDValue N) const;
609
610 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
611 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
612
613 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
614 unsigned HiOp);
615 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
616 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
617 const TargetLowering &TLI);
618
619 SDValue CombineExtLoad(SDNode *N);
620 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
621 SDValue combineRepeatedFPDivisors(SDNode *N);
622 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
623 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
624 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
625 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
626 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
627 SDValue BuildSDIV(SDNode *N);
628 SDValue BuildSDIVPow2(SDNode *N);
629 SDValue BuildUDIV(SDNode *N);
630 SDValue BuildSREMPow2(SDNode *N);
631 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
632 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
633 bool KnownNeverZero = false,
634 bool InexpensiveOnly = false,
635 std::optional<EVT> OutVT = std::nullopt);
636 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
637 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
638 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
639 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
640 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
641 SDNodeFlags Flags, bool Reciprocal);
642 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
643 SDNodeFlags Flags, bool Reciprocal);
644 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
645 bool DemandHighBits = true);
646 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
647 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
648 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
649 unsigned PosOpcode, unsigned NegOpcode,
650 const SDLoc &DL);
651 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
652 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
653 unsigned PosOpcode, unsigned NegOpcode,
654 const SDLoc &DL);
655 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
656 SDValue MatchLoadCombine(SDNode *N);
657 SDValue mergeTruncStores(StoreSDNode *N);
658 SDValue reduceLoadWidth(SDNode *N);
659 SDValue ReduceLoadOpStoreWidth(SDNode *N);
661 SDValue TransformFPLoadStorePair(SDNode *N);
662 SDValue convertBuildVecZextToZext(SDNode *N);
663 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
664 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
665 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
666 SDValue reduceBuildVecToShuffle(SDNode *N);
667 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
668 ArrayRef<int> VectorMask, SDValue VecIn1,
669 SDValue VecIn2, unsigned LeftIdx,
670 bool DidSplitVec);
671 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
672
673 /// Walk up chain skipping non-aliasing memory nodes,
674 /// looking for aliasing nodes and adding them to the Aliases vector.
675 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
676 SmallVectorImpl<SDValue> &Aliases);
677
678 /// Return true if there is any possibility that the two addresses overlap.
679 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
680
681 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
682 /// chain (aliasing node.)
683 SDValue FindBetterChain(SDNode *N, SDValue Chain);
684
685 /// Try to replace a store and any possibly adjacent stores on
686 /// consecutive chains with better chains. Return true only if St is
687 /// replaced.
688 ///
689 /// Notice that other chains may still be replaced even if the function
690 /// returns false.
691 bool findBetterNeighborChains(StoreSDNode *St);
692
693 // Helper for findBetterNeighborChains. Walk up store chain add additional
694 // chained stores that do not overlap and can be parallelized.
695 bool parallelizeChainedStores(StoreSDNode *St);
696
697 /// Holds a pointer to an LSBaseSDNode as well as information on where it
698 /// is located in a sequence of memory operations connected by a chain.
699 struct MemOpLink {
700 // Ptr to the mem node.
701 LSBaseSDNode *MemNode;
702
703 // Offset from the base ptr.
704 int64_t OffsetFromBase;
705
706 MemOpLink(LSBaseSDNode *N, int64_t Offset)
707 : MemNode(N), OffsetFromBase(Offset) {}
708 };
709
710 // Classify the origin of a stored value.
711 enum class StoreSource { Unknown, Constant, Extract, Load };
712 StoreSource getStoreSource(SDValue StoreVal) {
713 switch (StoreVal.getOpcode()) {
714 case ISD::Constant:
715 case ISD::ConstantFP:
716 return StoreSource::Constant;
720 return StoreSource::Constant;
721 return StoreSource::Unknown;
724 return StoreSource::Extract;
725 case ISD::LOAD:
726 return StoreSource::Load;
727 default:
728 return StoreSource::Unknown;
729 }
730 }
731
732 /// This is a helper function for visitMUL to check the profitability
733 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
734 /// MulNode is the original multiply, AddNode is (add x, c1),
735 /// and ConstNode is c2.
736 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
737 SDValue ConstNode);
738
739 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
740 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
741 /// the type of the loaded value to be extended.
742 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
743 EVT LoadResultTy, EVT &ExtVT);
744
745 /// Helper function to calculate whether the given Load/Store can have its
746 /// width reduced to ExtVT.
747 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
748 EVT &MemVT, unsigned ShAmt = 0);
749
750 /// Used by BackwardsPropagateMask to find suitable loads.
751 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
752 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
753 ConstantSDNode *Mask, SDNode *&NodeToMask);
754 /// Attempt to propagate a given AND node back to load leaves so that they
755 /// can be combined into narrow loads.
756 bool BackwardsPropagateMask(SDNode *N);
757
758 /// Helper function for mergeConsecutiveStores which merges the component
759 /// store chains.
760 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
761 unsigned NumStores);
762
763 /// Helper function for mergeConsecutiveStores which checks if all the store
764 /// nodes have the same underlying object. We can still reuse the first
765 /// store's pointer info if all the stores are from the same object.
766 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
767
768 /// This is a helper function for mergeConsecutiveStores. When the source
769 /// elements of the consecutive stores are all constants or all extracted
770 /// vector elements, try to merge them into one larger store introducing
771 /// bitcasts if necessary. \return True if a merged store was created.
772 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
773 EVT MemVT, unsigned NumStores,
774 bool IsConstantSrc, bool UseVector,
775 bool UseTrunc);
776
777 /// This is a helper function for mergeConsecutiveStores. Stores that
778 /// potentially may be merged with St are placed in StoreNodes. RootNode is
779 /// a chain predecessor to all store candidates.
780 void getStoreMergeCandidates(StoreSDNode *St,
781 SmallVectorImpl<MemOpLink> &StoreNodes,
782 SDNode *&Root);
783
784 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
785 /// have indirect dependency through their operands. RootNode is the
786 /// predecessor to all stores calculated by getStoreMergeCandidates and is
787 /// used to prune the dependency check. \return True if safe to merge.
788 bool checkMergeStoreCandidatesForDependencies(
789 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
790 SDNode *RootNode);
791
792 /// This is a helper function for mergeConsecutiveStores. Given a list of
793 /// store candidates, find the first N that are consecutive in memory.
794 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
795 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
796 int64_t ElementSizeBytes) const;
797
798 /// This is a helper function for mergeConsecutiveStores. It is used for
799 /// store chains that are composed entirely of constant values.
800 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
801 unsigned NumConsecutiveStores,
802 EVT MemVT, SDNode *Root, bool AllowVectors);
803
804 /// This is a helper function for mergeConsecutiveStores. It is used for
805 /// store chains that are composed entirely of extracted vector elements.
806 /// When extracting multiple vector elements, try to store them in one
807 /// vector store rather than a sequence of scalar stores.
808 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
809 unsigned NumConsecutiveStores, EVT MemVT,
810 SDNode *Root);
811
812 /// This is a helper function for mergeConsecutiveStores. It is used for
813 /// store chains that are composed entirely of loaded values.
814 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
815 unsigned NumConsecutiveStores, EVT MemVT,
816 SDNode *Root, bool AllowVectors,
817 bool IsNonTemporalStore, bool IsNonTemporalLoad);
818
819 /// Merge consecutive store operations into a wide store.
820 /// This optimization uses wide integers or vectors when possible.
821 /// \return true if stores were merged.
822 bool mergeConsecutiveStores(StoreSDNode *St);
823
824 /// Try to transform a truncation where C is a constant:
825 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
826 ///
827 /// \p N needs to be a truncation and its first operand an AND. Other
828 /// requirements are checked by the function (e.g. that trunc is
829 /// single-use) and if missed an empty SDValue is returned.
830 SDValue distributeTruncateThroughAnd(SDNode *N);
831
832 /// Helper function to determine whether the target supports operation
833 /// given by \p Opcode for type \p VT, that is, whether the operation
834 /// is legal or custom before legalizing operations, and whether is
835 /// legal (but not custom) after legalization.
836 bool hasOperation(unsigned Opcode, EVT VT) {
837 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
838 }
839
840 public:
841 /// Runs the dag combiner on all nodes in the work list
842 void Run(CombineLevel AtLevel);
843
844 SelectionDAG &getDAG() const { return DAG; }
845
846 /// Returns a type large enough to hold any valid shift amount - before type
847 /// legalization these can be huge.
848 EVT getShiftAmountTy(EVT LHSTy) {
849 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
850 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
851 }
852
853 /// This method returns true if we are running before type legalization or
854 /// if the specified VT is legal.
855 bool isTypeLegal(const EVT &VT) {
856 if (!LegalTypes) return true;
857 return TLI.isTypeLegal(VT);
858 }
859
860 /// Convenience wrapper around TargetLowering::getSetCCResultType
861 EVT getSetCCResultType(EVT VT) const {
862 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
863 }
864
865 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
866 SDValue OrigLoad, SDValue ExtLoad,
867 ISD::NodeType ExtType);
868 };
869
870/// This class is a DAGUpdateListener that removes any deleted
871/// nodes from the worklist.
872class WorklistRemover : public SelectionDAG::DAGUpdateListener {
873 DAGCombiner &DC;
874
875public:
876 explicit WorklistRemover(DAGCombiner &dc)
877 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
878
879 void NodeDeleted(SDNode *N, SDNode *E) override {
880 DC.removeFromWorklist(N);
881 }
882};
883
884class WorklistInserter : public SelectionDAG::DAGUpdateListener {
885 DAGCombiner &DC;
886
887public:
888 explicit WorklistInserter(DAGCombiner &dc)
889 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
890
891 // FIXME: Ideally we could add N to the worklist, but this causes exponential
892 // compile time costs in large DAGs, e.g. Halide.
893 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
894};
895
896} // end anonymous namespace
897
898//===----------------------------------------------------------------------===//
899// TargetLowering::DAGCombinerInfo implementation
900//===----------------------------------------------------------------------===//
901
903 ((DAGCombiner*)DC)->AddToWorklist(N);
904}
905
907CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
908 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
909}
910
912CombineTo(SDNode *N, SDValue Res, bool AddTo) {
913 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
914}
915
917CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
918 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
919}
920
923 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
924}
925
928 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
929}
930
931//===----------------------------------------------------------------------===//
932// Helper Functions
933//===----------------------------------------------------------------------===//
934
935void DAGCombiner::deleteAndRecombine(SDNode *N) {
936 removeFromWorklist(N);
937
938 // If the operands of this node are only used by the node, they will now be
939 // dead. Make sure to re-visit them and recursively delete dead nodes.
940 for (const SDValue &Op : N->ops())
941 // For an operand generating multiple values, one of the values may
942 // become dead allowing further simplification (e.g. split index
943 // arithmetic from an indexed load).
944 if (Op->hasOneUse() || Op->getNumValues() > 1)
945 AddToWorklist(Op.getNode());
946
947 DAG.DeleteNode(N);
948}
949
950// APInts must be the same size for most operations, this helper
951// function zero extends the shorter of the pair so that they match.
952// We provide an Offset so that we can create bitwidths that won't overflow.
953static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
954 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
955 LHS = LHS.zext(Bits);
956 RHS = RHS.zext(Bits);
957}
958
959// Return true if this node is a setcc, or is a select_cc
960// that selects between the target values used for true and false, making it
961// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
962// the appropriate nodes based on the type of node we are checking. This
963// simplifies life a bit for the callers.
964bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
965 SDValue &CC, bool MatchStrict) const {
966 if (N.getOpcode() == ISD::SETCC) {
967 LHS = N.getOperand(0);
968 RHS = N.getOperand(1);
969 CC = N.getOperand(2);
970 return true;
971 }
972
973 if (MatchStrict &&
974 (N.getOpcode() == ISD::STRICT_FSETCC ||
975 N.getOpcode() == ISD::STRICT_FSETCCS)) {
976 LHS = N.getOperand(1);
977 RHS = N.getOperand(2);
978 CC = N.getOperand(3);
979 return true;
980 }
981
982 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
983 !TLI.isConstFalseVal(N.getOperand(3)))
984 return false;
985
986 if (TLI.getBooleanContents(N.getValueType()) ==
988 return false;
989
990 LHS = N.getOperand(0);
991 RHS = N.getOperand(1);
992 CC = N.getOperand(4);
993 return true;
994}
995
996/// Return true if this is a SetCC-equivalent operation with only one use.
997/// If this is true, it allows the users to invert the operation for free when
998/// it is profitable to do so.
999bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1000 SDValue N0, N1, N2;
1001 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1002 return true;
1003 return false;
1004}
1005
1007 if (!ScalarTy.isSimple())
1008 return false;
1009
1010 uint64_t MaskForTy = 0ULL;
1011 switch (ScalarTy.getSimpleVT().SimpleTy) {
1012 case MVT::i8:
1013 MaskForTy = 0xFFULL;
1014 break;
1015 case MVT::i16:
1016 MaskForTy = 0xFFFFULL;
1017 break;
1018 case MVT::i32:
1019 MaskForTy = 0xFFFFFFFFULL;
1020 break;
1021 default:
1022 return false;
1023 break;
1024 }
1025
1026 APInt Val;
1027 if (ISD::isConstantSplatVector(N, Val))
1028 return Val.getLimitedValue() == MaskForTy;
1029
1030 return false;
1031}
1032
1033// Determines if it is a constant integer or a splat/build vector of constant
1034// integers (and undefs).
1035// Do not permit build vector implicit truncation.
1036static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1037 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1038 return !(Const->isOpaque() && NoOpaques);
1039 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1040 return false;
1041 unsigned BitWidth = N.getScalarValueSizeInBits();
1042 for (const SDValue &Op : N->op_values()) {
1043 if (Op.isUndef())
1044 continue;
1045 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1046 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1047 (Const->isOpaque() && NoOpaques))
1048 return false;
1049 }
1050 return true;
1051}
1052
1053// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1054// undef's.
1055static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1056 if (V.getOpcode() != ISD::BUILD_VECTOR)
1057 return false;
1058 return isConstantOrConstantVector(V, NoOpaques) ||
1060}
1061
1062// Determine if this an indexed load with an opaque target constant index.
1063static bool canSplitIdx(LoadSDNode *LD) {
1064 return MaySplitLoadIndex &&
1065 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1066 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1067}
1068
1069bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1070 const SDLoc &DL,
1071 SDNode *N,
1072 SDValue N0,
1073 SDValue N1) {
1074 // Currently this only tries to ensure we don't undo the GEP splits done by
1075 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1076 // we check if the following transformation would be problematic:
1077 // (load/store (add, (add, x, offset1), offset2)) ->
1078 // (load/store (add, x, offset1+offset2)).
1079
1080 // (load/store (add, (add, x, y), offset2)) ->
1081 // (load/store (add, (add, x, offset2), y)).
1082
1083 if (N0.getOpcode() != ISD::ADD)
1084 return false;
1085
1086 // Check for vscale addressing modes.
1087 // (load/store (add/sub (add x, y), vscale))
1088 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1089 // (load/store (add/sub (add x, y), (mul vscale, C)))
1090 if ((N1.getOpcode() == ISD::VSCALE ||
1091 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1092 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1093 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1094 N1.getValueType().getFixedSizeInBits() <= 64) {
1095 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1096 ? N1.getConstantOperandVal(0)
1097 : (N1.getOperand(0).getConstantOperandVal(0) *
1098 (N1.getOpcode() == ISD::SHL
1099 ? (1LL << N1.getConstantOperandVal(1))
1100 : N1.getConstantOperandVal(1)));
1101 if (Opc == ISD::SUB)
1102 ScalableOffset = -ScalableOffset;
1103 if (all_of(N->uses(), [&](SDNode *Node) {
1104 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1105 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1107 AM.HasBaseReg = true;
1108 AM.ScalableOffset = ScalableOffset;
1109 EVT VT = LoadStore->getMemoryVT();
1110 unsigned AS = LoadStore->getAddressSpace();
1111 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1112 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1113 AS);
1114 }
1115 return false;
1116 }))
1117 return true;
1118 }
1119
1120 if (Opc != ISD::ADD)
1121 return false;
1122
1123 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1124 if (!C2)
1125 return false;
1126
1127 const APInt &C2APIntVal = C2->getAPIntValue();
1128 if (C2APIntVal.getSignificantBits() > 64)
1129 return false;
1130
1131 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1132 if (N0.hasOneUse())
1133 return false;
1134
1135 const APInt &C1APIntVal = C1->getAPIntValue();
1136 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1137 if (CombinedValueIntVal.getSignificantBits() > 64)
1138 return false;
1139 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1140
1141 for (SDNode *Node : N->uses()) {
1142 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1143 // Is x[offset2] already not a legal addressing mode? If so then
1144 // reassociating the constants breaks nothing (we test offset2 because
1145 // that's the one we hope to fold into the load or store).
1147 AM.HasBaseReg = true;
1148 AM.BaseOffs = C2APIntVal.getSExtValue();
1149 EVT VT = LoadStore->getMemoryVT();
1150 unsigned AS = LoadStore->getAddressSpace();
1151 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1152 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1153 continue;
1154
1155 // Would x[offset1+offset2] still be a legal addressing mode?
1156 AM.BaseOffs = CombinedValue;
1157 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1158 return true;
1159 }
1160 }
1161 } else {
1162 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1163 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1164 return false;
1165
1166 for (SDNode *Node : N->uses()) {
1167 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1168 if (!LoadStore)
1169 return false;
1170
1171 // Is x[offset2] a legal addressing mode? If so then
1172 // reassociating the constants breaks address pattern
1174 AM.HasBaseReg = true;
1175 AM.BaseOffs = C2APIntVal.getSExtValue();
1176 EVT VT = LoadStore->getMemoryVT();
1177 unsigned AS = LoadStore->getAddressSpace();
1178 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1179 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1180 return false;
1181 }
1182 return true;
1183 }
1184
1185 return false;
1186}
1187
1188/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1189/// \p N0 is the same kind of operation as \p Opc.
1190SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1191 SDValue N0, SDValue N1,
1192 SDNodeFlags Flags) {
1193 EVT VT = N0.getValueType();
1194
1195 if (N0.getOpcode() != Opc)
1196 return SDValue();
1197
1198 SDValue N00 = N0.getOperand(0);
1199 SDValue N01 = N0.getOperand(1);
1200
1202 SDNodeFlags NewFlags;
1203 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1204 Flags.hasNoUnsignedWrap())
1205 NewFlags.setNoUnsignedWrap(true);
1206
1208 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1209 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1210 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1211 return SDValue();
1212 }
1213 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1214 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1215 // iff (op x, c1) has one use
1216 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1217 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1218 }
1219 }
1220
1221 // Check for repeated operand logic simplifications.
1222 if (Opc == ISD::AND || Opc == ISD::OR) {
1223 // (N00 & N01) & N00 --> N00 & N01
1224 // (N00 & N01) & N01 --> N00 & N01
1225 // (N00 | N01) | N00 --> N00 | N01
1226 // (N00 | N01) | N01 --> N00 | N01
1227 if (N1 == N00 || N1 == N01)
1228 return N0;
1229 }
1230 if (Opc == ISD::XOR) {
1231 // (N00 ^ N01) ^ N00 --> N01
1232 if (N1 == N00)
1233 return N01;
1234 // (N00 ^ N01) ^ N01 --> N00
1235 if (N1 == N01)
1236 return N00;
1237 }
1238
1239 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1240 if (N1 != N01) {
1241 // Reassociate if (op N00, N1) already exist
1242 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1243 // if Op (Op N00, N1), N01 already exist
1244 // we need to stop reassciate to avoid dead loop
1245 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1246 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1247 }
1248 }
1249
1250 if (N1 != N00) {
1251 // Reassociate if (op N01, N1) already exist
1252 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1253 // if Op (Op N01, N1), N00 already exist
1254 // we need to stop reassciate to avoid dead loop
1255 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1256 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1257 }
1258 }
1259
1260 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1261 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1262 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1263 // comparisons with the same predicate. This enables optimizations as the
1264 // following one:
1265 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1266 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1267 if (Opc == ISD::AND || Opc == ISD::OR) {
1268 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1269 N01->getOpcode() == ISD::SETCC) {
1270 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1271 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1272 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1273 if (CC1 == CC00 && CC1 != CC01) {
1274 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1275 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1276 }
1277 if (CC1 == CC01 && CC1 != CC00) {
1278 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1279 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1280 }
1281 }
1282 }
1283 }
1284
1285 return SDValue();
1286}
1287
1288/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1289/// same kind of operation as \p Opc.
1290SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1291 SDValue N1, SDNodeFlags Flags) {
1292 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1293
1294 // Floating-point reassociation is not allowed without loose FP math.
1295 if (N0.getValueType().isFloatingPoint() ||
1297 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1298 return SDValue();
1299
1300 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1301 return Combined;
1302 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1303 return Combined;
1304 return SDValue();
1305}
1306
1307// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1308// Note that we only expect Flags to be passed from FP operations. For integer
1309// operations they need to be dropped.
1310SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1311 const SDLoc &DL, EVT VT, SDValue N0,
1312 SDValue N1, SDNodeFlags Flags) {
1313 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1314 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1315 N0->hasOneUse() && N1->hasOneUse() &&
1317 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1318 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1319 return DAG.getNode(RedOpc, DL, VT,
1320 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1321 N0.getOperand(0), N1.getOperand(0)));
1322 }
1323 return SDValue();
1324}
1325
1326SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1327 bool AddTo) {
1328 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1329 ++NodesCombined;
1330 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1331 To[0].dump(&DAG);
1332 dbgs() << " and " << NumTo - 1 << " other values\n");
1333 for (unsigned i = 0, e = NumTo; i != e; ++i)
1334 assert((!To[i].getNode() ||
1335 N->getValueType(i) == To[i].getValueType()) &&
1336 "Cannot combine value to value of different type!");
1337
1338 WorklistRemover DeadNodes(*this);
1339 DAG.ReplaceAllUsesWith(N, To);
1340 if (AddTo) {
1341 // Push the new nodes and any users onto the worklist
1342 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1343 if (To[i].getNode())
1344 AddToWorklistWithUsers(To[i].getNode());
1345 }
1346 }
1347
1348 // Finally, if the node is now dead, remove it from the graph. The node
1349 // may not be dead if the replacement process recursively simplified to
1350 // something else needing this node.
1351 if (N->use_empty())
1352 deleteAndRecombine(N);
1353 return SDValue(N, 0);
1354}
1355
1356void DAGCombiner::
1357CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1358 // Replace the old value with the new one.
1359 ++NodesCombined;
1360 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1361 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1362
1363 // Replace all uses.
1364 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1365
1366 // Push the new node and any (possibly new) users onto the worklist.
1367 AddToWorklistWithUsers(TLO.New.getNode());
1368
1369 // Finally, if the node is now dead, remove it from the graph.
1370 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1371}
1372
1373/// Check the specified integer node value to see if it can be simplified or if
1374/// things it uses can be simplified by bit propagation. If so, return true.
1375bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1376 const APInt &DemandedElts,
1377 bool AssumeSingleUse) {
1378 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1379 KnownBits Known;
1380 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1381 AssumeSingleUse))
1382 return false;
1383
1384 // Revisit the node.
1385 AddToWorklist(Op.getNode());
1386
1387 CommitTargetLoweringOpt(TLO);
1388 return true;
1389}
1390
1391/// Check the specified vector node value to see if it can be simplified or
1392/// if things it uses can be simplified as it only uses some of the elements.
1393/// If so, return true.
1394bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1395 const APInt &DemandedElts,
1396 bool AssumeSingleUse) {
1397 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1398 APInt KnownUndef, KnownZero;
1399 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1400 TLO, 0, AssumeSingleUse))
1401 return false;
1402
1403 // Revisit the node.
1404 AddToWorklist(Op.getNode());
1405
1406 CommitTargetLoweringOpt(TLO);
1407 return true;
1408}
1409
1410void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1411 SDLoc DL(Load);
1412 EVT VT = Load->getValueType(0);
1413 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1414
1415 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1416 Trunc.dump(&DAG); dbgs() << '\n');
1417
1418 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1419 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1420
1421 AddToWorklist(Trunc.getNode());
1422 recursivelyDeleteUnusedNodes(Load);
1423}
1424
1425SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1426 Replace = false;
1427 SDLoc DL(Op);
1428 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1429 LoadSDNode *LD = cast<LoadSDNode>(Op);
1430 EVT MemVT = LD->getMemoryVT();
1432 : LD->getExtensionType();
1433 Replace = true;
1434 return DAG.getExtLoad(ExtType, DL, PVT,
1435 LD->getChain(), LD->getBasePtr(),
1436 MemVT, LD->getMemOperand());
1437 }
1438
1439 unsigned Opc = Op.getOpcode();
1440 switch (Opc) {
1441 default: break;
1442 case ISD::AssertSext:
1443 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1444 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1445 break;
1446 case ISD::AssertZext:
1447 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1448 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1449 break;
1450 case ISD::Constant: {
1451 unsigned ExtOpc =
1452 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1453 return DAG.getNode(ExtOpc, DL, PVT, Op);
1454 }
1455 }
1456
1457 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1458 return SDValue();
1459 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1460}
1461
1462SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1464 return SDValue();
1465 EVT OldVT = Op.getValueType();
1466 SDLoc DL(Op);
1467 bool Replace = false;
1468 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1469 if (!NewOp.getNode())
1470 return SDValue();
1471 AddToWorklist(NewOp.getNode());
1472
1473 if (Replace)
1474 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1475 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1476 DAG.getValueType(OldVT));
1477}
1478
1479SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1480 EVT OldVT = Op.getValueType();
1481 SDLoc DL(Op);
1482 bool Replace = false;
1483 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1484 if (!NewOp.getNode())
1485 return SDValue();
1486 AddToWorklist(NewOp.getNode());
1487
1488 if (Replace)
1489 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1490 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1491}
1492
1493/// Promote the specified integer binary operation if the target indicates it is
1494/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1495/// i32 since i16 instructions are longer.
1496SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1497 if (!LegalOperations)
1498 return SDValue();
1499
1500 EVT VT = Op.getValueType();
1501 if (VT.isVector() || !VT.isInteger())
1502 return SDValue();
1503
1504 // If operation type is 'undesirable', e.g. i16 on x86, consider
1505 // promoting it.
1506 unsigned Opc = Op.getOpcode();
1507 if (TLI.isTypeDesirableForOp(Opc, VT))
1508 return SDValue();
1509
1510 EVT PVT = VT;
1511 // Consult target whether it is a good idea to promote this operation and
1512 // what's the right type to promote it to.
1513 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1514 assert(PVT != VT && "Don't know what type to promote to!");
1515
1516 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1517
1518 bool Replace0 = false;
1519 SDValue N0 = Op.getOperand(0);
1520 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1521
1522 bool Replace1 = false;
1523 SDValue N1 = Op.getOperand(1);
1524 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1525 SDLoc DL(Op);
1526
1527 SDValue RV =
1528 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1529
1530 // We are always replacing N0/N1's use in N and only need additional
1531 // replacements if there are additional uses.
1532 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1533 // (SDValue) here because the node may reference multiple values
1534 // (for example, the chain value of a load node).
1535 Replace0 &= !N0->hasOneUse();
1536 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1537
1538 // Combine Op here so it is preserved past replacements.
1539 CombineTo(Op.getNode(), RV);
1540
1541 // If operands have a use ordering, make sure we deal with
1542 // predecessor first.
1543 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1544 std::swap(N0, N1);
1545 std::swap(NN0, NN1);
1546 }
1547
1548 if (Replace0) {
1549 AddToWorklist(NN0.getNode());
1550 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1551 }
1552 if (Replace1) {
1553 AddToWorklist(NN1.getNode());
1554 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1555 }
1556 return Op;
1557 }
1558 return SDValue();
1559}
1560
1561/// Promote the specified integer shift operation if the target indicates it is
1562/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1563/// i32 since i16 instructions are longer.
1564SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1565 if (!LegalOperations)
1566 return SDValue();
1567
1568 EVT VT = Op.getValueType();
1569 if (VT.isVector() || !VT.isInteger())
1570 return SDValue();
1571
1572 // If operation type is 'undesirable', e.g. i16 on x86, consider
1573 // promoting it.
1574 unsigned Opc = Op.getOpcode();
1575 if (TLI.isTypeDesirableForOp(Opc, VT))
1576 return SDValue();
1577
1578 EVT PVT = VT;
1579 // Consult target whether it is a good idea to promote this operation and
1580 // what's the right type to promote it to.
1581 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1582 assert(PVT != VT && "Don't know what type to promote to!");
1583
1584 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1585
1586 bool Replace = false;
1587 SDValue N0 = Op.getOperand(0);
1588 if (Opc == ISD::SRA)
1589 N0 = SExtPromoteOperand(N0, PVT);
1590 else if (Opc == ISD::SRL)
1591 N0 = ZExtPromoteOperand(N0, PVT);
1592 else
1593 N0 = PromoteOperand(N0, PVT, Replace);
1594
1595 if (!N0.getNode())
1596 return SDValue();
1597
1598 SDLoc DL(Op);
1599 SDValue N1 = Op.getOperand(1);
1600 SDValue RV =
1601 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1602
1603 if (Replace)
1604 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1605
1606 // Deal with Op being deleted.
1607 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1608 return RV;
1609 }
1610 return SDValue();
1611}
1612
1613SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1614 if (!LegalOperations)
1615 return SDValue();
1616
1617 EVT VT = Op.getValueType();
1618 if (VT.isVector() || !VT.isInteger())
1619 return SDValue();
1620
1621 // If operation type is 'undesirable', e.g. i16 on x86, consider
1622 // promoting it.
1623 unsigned Opc = Op.getOpcode();
1624 if (TLI.isTypeDesirableForOp(Opc, VT))
1625 return SDValue();
1626
1627 EVT PVT = VT;
1628 // Consult target whether it is a good idea to promote this operation and
1629 // what's the right type to promote it to.
1630 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1631 assert(PVT != VT && "Don't know what type to promote to!");
1632 // fold (aext (aext x)) -> (aext x)
1633 // fold (aext (zext x)) -> (zext x)
1634 // fold (aext (sext x)) -> (sext x)
1635 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1636 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1637 }
1638 return SDValue();
1639}
1640
1641bool DAGCombiner::PromoteLoad(SDValue Op) {
1642 if (!LegalOperations)
1643 return false;
1644
1645 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1646 return false;
1647
1648 EVT VT = Op.getValueType();
1649 if (VT.isVector() || !VT.isInteger())
1650 return false;
1651
1652 // If operation type is 'undesirable', e.g. i16 on x86, consider
1653 // promoting it.
1654 unsigned Opc = Op.getOpcode();
1655 if (TLI.isTypeDesirableForOp(Opc, VT))
1656 return false;
1657
1658 EVT PVT = VT;
1659 // Consult target whether it is a good idea to promote this operation and
1660 // what's the right type to promote it to.
1661 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1662 assert(PVT != VT && "Don't know what type to promote to!");
1663
1664 SDLoc DL(Op);
1665 SDNode *N = Op.getNode();
1666 LoadSDNode *LD = cast<LoadSDNode>(N);
1667 EVT MemVT = LD->getMemoryVT();
1669 : LD->getExtensionType();
1670 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1671 LD->getChain(), LD->getBasePtr(),
1672 MemVT, LD->getMemOperand());
1673 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1674
1675 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1676 Result.dump(&DAG); dbgs() << '\n');
1677
1679 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1680
1681 AddToWorklist(Result.getNode());
1682 recursivelyDeleteUnusedNodes(N);
1683 return true;
1684 }
1685
1686 return false;
1687}
1688
1689/// Recursively delete a node which has no uses and any operands for
1690/// which it is the only use.
1691///
1692/// Note that this both deletes the nodes and removes them from the worklist.
1693/// It also adds any nodes who have had a user deleted to the worklist as they
1694/// may now have only one use and subject to other combines.
1695bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1696 if (!N->use_empty())
1697 return false;
1698
1700 Nodes.insert(N);
1701 do {
1702 N = Nodes.pop_back_val();
1703 if (!N)
1704 continue;
1705
1706 if (N->use_empty()) {
1707 for (const SDValue &ChildN : N->op_values())
1708 Nodes.insert(ChildN.getNode());
1709
1710 removeFromWorklist(N);
1711 DAG.DeleteNode(N);
1712 } else {
1713 AddToWorklist(N);
1714 }
1715 } while (!Nodes.empty());
1716 return true;
1717}
1718
1719//===----------------------------------------------------------------------===//
1720// Main DAG Combiner implementation
1721//===----------------------------------------------------------------------===//
1722
1723void DAGCombiner::Run(CombineLevel AtLevel) {
1724 // set the instance variables, so that the various visit routines may use it.
1725 Level = AtLevel;
1726 LegalDAG = Level >= AfterLegalizeDAG;
1727 LegalOperations = Level >= AfterLegalizeVectorOps;
1728 LegalTypes = Level >= AfterLegalizeTypes;
1729
1730 WorklistInserter AddNodes(*this);
1731
1732 // Add all the dag nodes to the worklist.
1733 //
1734 // Note: All nodes are not added to PruningList here, this is because the only
1735 // nodes which can be deleted are those which have no uses and all other nodes
1736 // which would otherwise be added to the worklist by the first call to
1737 // getNextWorklistEntry are already present in it.
1738 for (SDNode &Node : DAG.allnodes())
1739 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1740
1741 // Create a dummy node (which is not added to allnodes), that adds a reference
1742 // to the root node, preventing it from being deleted, and tracking any
1743 // changes of the root.
1744 HandleSDNode Dummy(DAG.getRoot());
1745
1746 // While we have a valid worklist entry node, try to combine it.
1747 while (SDNode *N = getNextWorklistEntry()) {
1748 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1749 // N is deleted from the DAG, since they too may now be dead or may have a
1750 // reduced number of uses, allowing other xforms.
1751 if (recursivelyDeleteUnusedNodes(N))
1752 continue;
1753
1754 WorklistRemover DeadNodes(*this);
1755
1756 // If this combine is running after legalizing the DAG, re-legalize any
1757 // nodes pulled off the worklist.
1758 if (LegalDAG) {
1759 SmallSetVector<SDNode *, 16> UpdatedNodes;
1760 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1761
1762 for (SDNode *LN : UpdatedNodes)
1763 AddToWorklistWithUsers(LN);
1764
1765 if (!NIsValid)
1766 continue;
1767 }
1768
1769 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1770
1771 // Add any operands of the new node which have not yet been combined to the
1772 // worklist as well. getNextWorklistEntry flags nodes that have been
1773 // combined before. Because the worklist uniques things already, this won't
1774 // repeatedly process the same operand.
1775 for (const SDValue &ChildN : N->op_values())
1776 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1777 /*SkipIfCombinedBefore=*/true);
1778
1779 SDValue RV = combine(N);
1780
1781 if (!RV.getNode())
1782 continue;
1783
1784 ++NodesCombined;
1785
1786 // If we get back the same node we passed in, rather than a new node or
1787 // zero, we know that the node must have defined multiple values and
1788 // CombineTo was used. Since CombineTo takes care of the worklist
1789 // mechanics for us, we have no work to do in this case.
1790 if (RV.getNode() == N)
1791 continue;
1792
1793 assert(N->getOpcode() != ISD::DELETED_NODE &&
1794 RV.getOpcode() != ISD::DELETED_NODE &&
1795 "Node was deleted but visit returned new node!");
1796
1797 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1798
1799 if (N->getNumValues() == RV->getNumValues())
1800 DAG.ReplaceAllUsesWith(N, RV.getNode());
1801 else {
1802 assert(N->getValueType(0) == RV.getValueType() &&
1803 N->getNumValues() == 1 && "Type mismatch");
1804 DAG.ReplaceAllUsesWith(N, &RV);
1805 }
1806
1807 // Push the new node and any users onto the worklist. Omit this if the
1808 // new node is the EntryToken (e.g. if a store managed to get optimized
1809 // out), because re-visiting the EntryToken and its users will not uncover
1810 // any additional opportunities, but there may be a large number of such
1811 // users, potentially causing compile time explosion.
1812 if (RV.getOpcode() != ISD::EntryToken)
1813 AddToWorklistWithUsers(RV.getNode());
1814
1815 // Finally, if the node is now dead, remove it from the graph. The node
1816 // may not be dead if the replacement process recursively simplified to
1817 // something else needing this node. This will also take care of adding any
1818 // operands which have lost a user to the worklist.
1819 recursivelyDeleteUnusedNodes(N);
1820 }
1821
1822 // If the root changed (e.g. it was a dead load, update the root).
1823 DAG.setRoot(Dummy.getValue());
1824 DAG.RemoveDeadNodes();
1825}
1826
1827SDValue DAGCombiner::visit(SDNode *N) {
1828 // clang-format off
1829 switch (N->getOpcode()) {
1830 default: break;
1831 case ISD::TokenFactor: return visitTokenFactor(N);
1832 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1833 case ISD::ADD: return visitADD(N);
1834 case ISD::SUB: return visitSUB(N);
1835 case ISD::SADDSAT:
1836 case ISD::UADDSAT: return visitADDSAT(N);
1837 case ISD::SSUBSAT:
1838 case ISD::USUBSAT: return visitSUBSAT(N);
1839 case ISD::ADDC: return visitADDC(N);
1840 case ISD::SADDO:
1841 case ISD::UADDO: return visitADDO(N);
1842 case ISD::SUBC: return visitSUBC(N);
1843 case ISD::SSUBO:
1844 case ISD::USUBO: return visitSUBO(N);
1845 case ISD::ADDE: return visitADDE(N);
1846 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1847 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1848 case ISD::SUBE: return visitSUBE(N);
1849 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1850 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1851 case ISD::SMULFIX:
1852 case ISD::SMULFIXSAT:
1853 case ISD::UMULFIX:
1854 case ISD::UMULFIXSAT: return visitMULFIX(N);
1855 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1856 case ISD::SDIV: return visitSDIV(N);
1857 case ISD::UDIV: return visitUDIV(N);
1858 case ISD::SREM:
1859 case ISD::UREM: return visitREM(N);
1860 case ISD::MULHU: return visitMULHU(N);
1861 case ISD::MULHS: return visitMULHS(N);
1862 case ISD::AVGFLOORS:
1863 case ISD::AVGFLOORU:
1864 case ISD::AVGCEILS:
1865 case ISD::AVGCEILU: return visitAVG(N);
1866 case ISD::ABDS:
1867 case ISD::ABDU: return visitABD(N);
1868 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1869 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1870 case ISD::SMULO:
1871 case ISD::UMULO: return visitMULO(N);
1872 case ISD::SMIN:
1873 case ISD::SMAX:
1874 case ISD::UMIN:
1875 case ISD::UMAX: return visitIMINMAX(N);
1876 case ISD::AND: return visitAND(N);
1877 case ISD::OR: return visitOR(N);
1878 case ISD::XOR: return visitXOR(N);
1879 case ISD::SHL: return visitSHL(N);
1880 case ISD::SRA: return visitSRA(N);
1881 case ISD::SRL: return visitSRL(N);
1882 case ISD::ROTR:
1883 case ISD::ROTL: return visitRotate(N);
1884 case ISD::FSHL:
1885 case ISD::FSHR: return visitFunnelShift(N);
1886 case ISD::SSHLSAT:
1887 case ISD::USHLSAT: return visitSHLSAT(N);
1888 case ISD::ABS: return visitABS(N);
1889 case ISD::BSWAP: return visitBSWAP(N);
1890 case ISD::BITREVERSE: return visitBITREVERSE(N);
1891 case ISD::CTLZ: return visitCTLZ(N);
1892 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1893 case ISD::CTTZ: return visitCTTZ(N);
1894 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1895 case ISD::CTPOP: return visitCTPOP(N);
1896 case ISD::SELECT: return visitSELECT(N);
1897 case ISD::VSELECT: return visitVSELECT(N);
1898 case ISD::SELECT_CC: return visitSELECT_CC(N);
1899 case ISD::SETCC: return visitSETCC(N);
1900 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1901 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1902 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1903 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1904 case ISD::AssertSext:
1905 case ISD::AssertZext: return visitAssertExt(N);
1906 case ISD::AssertAlign: return visitAssertAlign(N);
1907 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1910 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1911 case ISD::TRUNCATE: return visitTRUNCATE(N);
1912 case ISD::BITCAST: return visitBITCAST(N);
1913 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1914 case ISD::FADD: return visitFADD(N);
1915 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1916 case ISD::FSUB: return visitFSUB(N);
1917 case ISD::FMUL: return visitFMUL(N);
1918 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1919 case ISD::FMAD: return visitFMAD(N);
1920 case ISD::FDIV: return visitFDIV(N);
1921 case ISD::FREM: return visitFREM(N);
1922 case ISD::FSQRT: return visitFSQRT(N);
1923 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1924 case ISD::FPOW: return visitFPOW(N);
1925 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1926 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1927 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1928 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1929 case ISD::LRINT:
1930 case ISD::LLRINT: return visitXRINT(N);
1931 case ISD::FP_ROUND: return visitFP_ROUND(N);
1932 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1933 case ISD::FNEG: return visitFNEG(N);
1934 case ISD::FABS: return visitFABS(N);
1935 case ISD::FFLOOR: return visitFFLOOR(N);
1936 case ISD::FMINNUM:
1937 case ISD::FMAXNUM:
1938 case ISD::FMINIMUM:
1939 case ISD::FMAXIMUM: return visitFMinMax(N);
1940 case ISD::FCEIL: return visitFCEIL(N);
1941 case ISD::FTRUNC: return visitFTRUNC(N);
1942 case ISD::FFREXP: return visitFFREXP(N);
1943 case ISD::BRCOND: return visitBRCOND(N);
1944 case ISD::BR_CC: return visitBR_CC(N);
1945 case ISD::LOAD: return visitLOAD(N);
1946 case ISD::STORE: return visitSTORE(N);
1947 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1948 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1949 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1950 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1951 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1952 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1953 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1954 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1955 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1956 case ISD::MGATHER: return visitMGATHER(N);
1957 case ISD::MLOAD: return visitMLOAD(N);
1958 case ISD::MSCATTER: return visitMSCATTER(N);
1959 case ISD::MSTORE: return visitMSTORE(N);
1960 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1961 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1962 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1963 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1964 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1965 case ISD::FREEZE: return visitFREEZE(N);
1966 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1967 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1970 case ISD::VECREDUCE_ADD:
1971 case ISD::VECREDUCE_MUL:
1972 case ISD::VECREDUCE_AND:
1973 case ISD::VECREDUCE_OR:
1974 case ISD::VECREDUCE_XOR:
1982 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1983#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1984#include "llvm/IR/VPIntrinsics.def"
1985 return visitVPOp(N);
1986 }
1987 // clang-format on
1988 return SDValue();
1989}
1990
1991SDValue DAGCombiner::combine(SDNode *N) {
1992 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1993 return SDValue();
1994
1995 SDValue RV;
1996 if (!DisableGenericCombines)
1997 RV = visit(N);
1998
1999 // If nothing happened, try a target-specific DAG combine.
2000 if (!RV.getNode()) {
2001 assert(N->getOpcode() != ISD::DELETED_NODE &&
2002 "Node was deleted but visit returned NULL!");
2003
2004 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2005 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2006
2007 // Expose the DAG combiner to the target combiner impls.
2009 DagCombineInfo(DAG, Level, false, this);
2010
2011 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2012 }
2013 }
2014
2015 // If nothing happened still, try promoting the operation.
2016 if (!RV.getNode()) {
2017 switch (N->getOpcode()) {
2018 default: break;
2019 case ISD::ADD:
2020 case ISD::SUB:
2021 case ISD::MUL:
2022 case ISD::AND:
2023 case ISD::OR:
2024 case ISD::XOR:
2025 RV = PromoteIntBinOp(SDValue(N, 0));
2026 break;
2027 case ISD::SHL:
2028 case ISD::SRA:
2029 case ISD::SRL:
2030 RV = PromoteIntShiftOp(SDValue(N, 0));
2031 break;
2032 case ISD::SIGN_EXTEND:
2033 case ISD::ZERO_EXTEND:
2034 case ISD::ANY_EXTEND:
2035 RV = PromoteExtend(SDValue(N, 0));
2036 break;
2037 case ISD::LOAD:
2038 if (PromoteLoad(SDValue(N, 0)))
2039 RV = SDValue(N, 0);
2040 break;
2041 }
2042 }
2043
2044 // If N is a commutative binary node, try to eliminate it if the commuted
2045 // version is already present in the DAG.
2046 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2047 SDValue N0 = N->getOperand(0);
2048 SDValue N1 = N->getOperand(1);
2049
2050 // Constant operands are canonicalized to RHS.
2051 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2052 SDValue Ops[] = {N1, N0};
2053 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2054 N->getFlags());
2055 if (CSENode)
2056 return SDValue(CSENode, 0);
2057 }
2058 }
2059
2060 return RV;
2061}
2062
2063/// Given a node, return its input chain if it has one, otherwise return a null
2064/// sd operand.
2066 if (unsigned NumOps = N->getNumOperands()) {
2067 if (N->getOperand(0).getValueType() == MVT::Other)
2068 return N->getOperand(0);
2069 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2070 return N->getOperand(NumOps-1);
2071 for (unsigned i = 1; i < NumOps-1; ++i)
2072 if (N->getOperand(i).getValueType() == MVT::Other)
2073 return N->getOperand(i);
2074 }
2075 return SDValue();
2076}
2077
2078SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2079 // If N has two operands, where one has an input chain equal to the other,
2080 // the 'other' chain is redundant.
2081 if (N->getNumOperands() == 2) {
2082 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2083 return N->getOperand(0);
2084 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2085 return N->getOperand(1);
2086 }
2087
2088 // Don't simplify token factors if optnone.
2089 if (OptLevel == CodeGenOptLevel::None)
2090 return SDValue();
2091
2092 // Don't simplify the token factor if the node itself has too many operands.
2093 if (N->getNumOperands() > TokenFactorInlineLimit)
2094 return SDValue();
2095
2096 // If the sole user is a token factor, we should make sure we have a
2097 // chance to merge them together. This prevents TF chains from inhibiting
2098 // optimizations.
2099 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2100 AddToWorklist(*(N->use_begin()));
2101
2102 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2103 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2105 bool Changed = false; // If we should replace this token factor.
2106
2107 // Start out with this token factor.
2108 TFs.push_back(N);
2109
2110 // Iterate through token factors. The TFs grows when new token factors are
2111 // encountered.
2112 for (unsigned i = 0; i < TFs.size(); ++i) {
2113 // Limit number of nodes to inline, to avoid quadratic compile times.
2114 // We have to add the outstanding Token Factors to Ops, otherwise we might
2115 // drop Ops from the resulting Token Factors.
2116 if (Ops.size() > TokenFactorInlineLimit) {
2117 for (unsigned j = i; j < TFs.size(); j++)
2118 Ops.emplace_back(TFs[j], 0);
2119 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2120 // combiner worklist later.
2121 TFs.resize(i);
2122 break;
2123 }
2124
2125 SDNode *TF = TFs[i];
2126 // Check each of the operands.
2127 for (const SDValue &Op : TF->op_values()) {
2128 switch (Op.getOpcode()) {
2129 case ISD::EntryToken:
2130 // Entry tokens don't need to be added to the list. They are
2131 // redundant.
2132 Changed = true;
2133 break;
2134
2135 case ISD::TokenFactor:
2136 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2137 // Queue up for processing.
2138 TFs.push_back(Op.getNode());
2139 Changed = true;
2140 break;
2141 }
2142 [[fallthrough]];
2143
2144 default:
2145 // Only add if it isn't already in the list.
2146 if (SeenOps.insert(Op.getNode()).second)
2147 Ops.push_back(Op);
2148 else
2149 Changed = true;
2150 break;
2151 }
2152 }
2153 }
2154
2155 // Re-visit inlined Token Factors, to clean them up in case they have been
2156 // removed. Skip the first Token Factor, as this is the current node.
2157 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2158 AddToWorklist(TFs[i]);
2159
2160 // Remove Nodes that are chained to another node in the list. Do so
2161 // by walking up chains breath-first stopping when we've seen
2162 // another operand. In general we must climb to the EntryNode, but we can exit
2163 // early if we find all remaining work is associated with just one operand as
2164 // no further pruning is possible.
2165
2166 // List of nodes to search through and original Ops from which they originate.
2168 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2169 SmallPtrSet<SDNode *, 16> SeenChains;
2170 bool DidPruneOps = false;
2171
2172 unsigned NumLeftToConsider = 0;
2173 for (const SDValue &Op : Ops) {
2174 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2175 OpWorkCount.push_back(1);
2176 }
2177
2178 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2179 // If this is an Op, we can remove the op from the list. Remark any
2180 // search associated with it as from the current OpNumber.
2181 if (SeenOps.contains(Op)) {
2182 Changed = true;
2183 DidPruneOps = true;
2184 unsigned OrigOpNumber = 0;
2185 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2186 OrigOpNumber++;
2187 assert((OrigOpNumber != Ops.size()) &&
2188 "expected to find TokenFactor Operand");
2189 // Re-mark worklist from OrigOpNumber to OpNumber
2190 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2191 if (Worklist[i].second == OrigOpNumber) {
2192 Worklist[i].second = OpNumber;
2193 }
2194 }
2195 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2196 OpWorkCount[OrigOpNumber] = 0;
2197 NumLeftToConsider--;
2198 }
2199 // Add if it's a new chain
2200 if (SeenChains.insert(Op).second) {
2201 OpWorkCount[OpNumber]++;
2202 Worklist.push_back(std::make_pair(Op, OpNumber));
2203 }
2204 };
2205
2206 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2207 // We need at least be consider at least 2 Ops to prune.
2208 if (NumLeftToConsider <= 1)
2209 break;
2210 auto CurNode = Worklist[i].first;
2211 auto CurOpNumber = Worklist[i].second;
2212 assert((OpWorkCount[CurOpNumber] > 0) &&
2213 "Node should not appear in worklist");
2214 switch (CurNode->getOpcode()) {
2215 case ISD::EntryToken:
2216 // Hitting EntryToken is the only way for the search to terminate without
2217 // hitting
2218 // another operand's search. Prevent us from marking this operand
2219 // considered.
2220 NumLeftToConsider++;
2221 break;
2222 case ISD::TokenFactor:
2223 for (const SDValue &Op : CurNode->op_values())
2224 AddToWorklist(i, Op.getNode(), CurOpNumber);
2225 break;
2227 case ISD::LIFETIME_END:
2228 case ISD::CopyFromReg:
2229 case ISD::CopyToReg:
2230 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2231 break;
2232 default:
2233 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2234 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2235 break;
2236 }
2237 OpWorkCount[CurOpNumber]--;
2238 if (OpWorkCount[CurOpNumber] == 0)
2239 NumLeftToConsider--;
2240 }
2241
2242 // If we've changed things around then replace token factor.
2243 if (Changed) {
2245 if (Ops.empty()) {
2246 // The entry token is the only possible outcome.
2247 Result = DAG.getEntryNode();
2248 } else {
2249 if (DidPruneOps) {
2250 SmallVector<SDValue, 8> PrunedOps;
2251 //
2252 for (const SDValue &Op : Ops) {
2253 if (SeenChains.count(Op.getNode()) == 0)
2254 PrunedOps.push_back(Op);
2255 }
2256 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2257 } else {
2258 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2259 }
2260 }
2261 return Result;
2262 }
2263 return SDValue();
2264}
2265
2266/// MERGE_VALUES can always be eliminated.
2267SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2268 WorklistRemover DeadNodes(*this);
2269 // Replacing results may cause a different MERGE_VALUES to suddenly
2270 // be CSE'd with N, and carry its uses with it. Iterate until no
2271 // uses remain, to ensure that the node can be safely deleted.
2272 // First add the users of this node to the work list so that they
2273 // can be tried again once they have new operands.
2274 AddUsersToWorklist(N);
2275 do {
2276 // Do as a single replacement to avoid rewalking use lists.
2278 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2279 Ops.push_back(N->getOperand(i));
2280 DAG.ReplaceAllUsesWith(N, Ops.data());
2281 } while (!N->use_empty());
2282 deleteAndRecombine(N);
2283 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2284}
2285
2286/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2287/// ConstantSDNode pointer else nullptr.
2289 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2290 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2291}
2292
2293// isTruncateOf - If N is a truncate of some other value, return true, record
2294// the value being truncated in Op and which of Op's bits are zero/one in Known.
2295// This function computes KnownBits to avoid a duplicated call to
2296// computeKnownBits in the caller.
2298 KnownBits &Known) {
2299 if (N->getOpcode() == ISD::TRUNCATE) {
2300 Op = N->getOperand(0);
2301 Known = DAG.computeKnownBits(Op);
2302 return true;
2303 }
2304
2305 if (N.getOpcode() != ISD::SETCC ||
2306 N.getValueType().getScalarType() != MVT::i1 ||
2307 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2308 return false;
2309
2310 SDValue Op0 = N->getOperand(0);
2311 SDValue Op1 = N->getOperand(1);
2312 assert(Op0.getValueType() == Op1.getValueType());
2313
2314 if (isNullOrNullSplat(Op0))
2315 Op = Op1;
2316 else if (isNullOrNullSplat(Op1))
2317 Op = Op0;
2318 else
2319 return false;
2320
2321 Known = DAG.computeKnownBits(Op);
2322
2323 return (Known.Zero | 1).isAllOnes();
2324}
2325
2326/// Return true if 'Use' is a load or a store that uses N as its base pointer
2327/// and that N may be folded in the load / store addressing mode.
2329 const TargetLowering &TLI) {
2330 EVT VT;
2331 unsigned AS;
2332
2333 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2334 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2335 return false;
2336 VT = LD->getMemoryVT();
2337 AS = LD->getAddressSpace();
2338 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2339 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2340 return false;
2341 VT = ST->getMemoryVT();
2342 AS = ST->getAddressSpace();
2343 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2344 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2345 return false;
2346 VT = LD->getMemoryVT();
2347 AS = LD->getAddressSpace();
2348 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2349 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2350 return false;
2351 VT = ST->getMemoryVT();
2352 AS = ST->getAddressSpace();
2353 } else {
2354 return false;
2355 }
2356
2358 if (N->getOpcode() == ISD::ADD) {
2359 AM.HasBaseReg = true;
2360 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2361 if (Offset)
2362 // [reg +/- imm]
2363 AM.BaseOffs = Offset->getSExtValue();
2364 else
2365 // [reg +/- reg]
2366 AM.Scale = 1;
2367 } else if (N->getOpcode() == ISD::SUB) {
2368 AM.HasBaseReg = true;
2369 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2370 if (Offset)
2371 // [reg +/- imm]
2372 AM.BaseOffs = -Offset->getSExtValue();
2373 else
2374 // [reg +/- reg]
2375 AM.Scale = 1;
2376 } else {
2377 return false;
2378 }
2379
2380 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2381 VT.getTypeForEVT(*DAG.getContext()), AS);
2382}
2383
2384/// This inverts a canonicalization in IR that replaces a variable select arm
2385/// with an identity constant. Codegen improves if we re-use the variable
2386/// operand rather than load a constant. This can also be converted into a
2387/// masked vector operation if the target supports it.
2389 bool ShouldCommuteOperands) {
2390 // Match a select as operand 1. The identity constant that we are looking for
2391 // is only valid as operand 1 of a non-commutative binop.
2392 SDValue N0 = N->getOperand(0);
2393 SDValue N1 = N->getOperand(1);
2394 if (ShouldCommuteOperands)
2395 std::swap(N0, N1);
2396
2397 // TODO: Should this apply to scalar select too?
2398 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2399 return SDValue();
2400
2401 // We can't hoist all instructions because of immediate UB (not speculatable).
2402 // For example div/rem by zero.
2404 return SDValue();
2405
2406 unsigned Opcode = N->getOpcode();
2407 EVT VT = N->getValueType(0);
2408 SDValue Cond = N1.getOperand(0);
2409 SDValue TVal = N1.getOperand(1);
2410 SDValue FVal = N1.getOperand(2);
2411
2412 // This transform increases uses of N0, so freeze it to be safe.
2413 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2414 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2415 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2416 SDValue F0 = DAG.getFreeze(N0);
2417 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2418 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2419 }
2420 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2421 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2422 SDValue F0 = DAG.getFreeze(N0);
2423 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2424 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2425 }
2426
2427 return SDValue();
2428}
2429
2430SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2431 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2432 "Unexpected binary operator");
2433
2434 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2435 auto BinOpcode = BO->getOpcode();
2436 EVT VT = BO->getValueType(0);
2437 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2438 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2439 return Sel;
2440
2441 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2442 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2443 return Sel;
2444 }
2445
2446 // Don't do this unless the old select is going away. We want to eliminate the
2447 // binary operator, not replace a binop with a select.
2448 // TODO: Handle ISD::SELECT_CC.
2449 unsigned SelOpNo = 0;
2450 SDValue Sel = BO->getOperand(0);
2451 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2452 SelOpNo = 1;
2453 Sel = BO->getOperand(1);
2454
2455 // Peek through trunc to shift amount type.
2456 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2457 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2458 // This is valid when the truncated bits of x are already zero.
2459 SDValue Op;
2460 KnownBits Known;
2461 if (isTruncateOf(DAG, Sel, Op, Known) &&
2463 Sel = Op;
2464 }
2465 }
2466
2467 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2468 return SDValue();
2469
2470 SDValue CT = Sel.getOperand(1);
2471 if (!isConstantOrConstantVector(CT, true) &&
2473 return SDValue();
2474
2475 SDValue CF = Sel.getOperand(2);
2476 if (!isConstantOrConstantVector(CF, true) &&
2478 return SDValue();
2479
2480 // Bail out if any constants are opaque because we can't constant fold those.
2481 // The exception is "and" and "or" with either 0 or -1 in which case we can
2482 // propagate non constant operands into select. I.e.:
2483 // and (select Cond, 0, -1), X --> select Cond, 0, X
2484 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2485 bool CanFoldNonConst =
2486 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2489
2490 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2491 if (!CanFoldNonConst &&
2492 !isConstantOrConstantVector(CBO, true) &&
2494 return SDValue();
2495
2496 SDLoc DL(Sel);
2497 SDValue NewCT, NewCF;
2498
2499 if (CanFoldNonConst) {
2500 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2501 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2502 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2503 NewCT = CT;
2504 else
2505 NewCT = CBO;
2506
2507 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2508 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2509 NewCF = CF;
2510 else
2511 NewCF = CBO;
2512 } else {
2513 // We have a select-of-constants followed by a binary operator with a
2514 // constant. Eliminate the binop by pulling the constant math into the
2515 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2516 // CBO, CF + CBO
2517 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2518 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2519 if (!NewCT)
2520 return SDValue();
2521
2522 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2523 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2524 if (!NewCF)
2525 return SDValue();
2526 }
2527
2528 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2529 SelectOp->setFlags(BO->getFlags());
2530 return SelectOp;
2531}
2532
2534 SelectionDAG &DAG) {
2535 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2536 "Expecting add or sub");
2537
2538 // Match a constant operand and a zext operand for the math instruction:
2539 // add Z, C
2540 // sub C, Z
2541 bool IsAdd = N->getOpcode() == ISD::ADD;
2542 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2543 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2544 auto *CN = dyn_cast<ConstantSDNode>(C);
2545 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2546 return SDValue();
2547
2548 // Match the zext operand as a setcc of a boolean.
2549 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2550 Z.getOperand(0).getValueType() != MVT::i1)
2551 return SDValue();
2552
2553 // Match the compare as: setcc (X & 1), 0, eq.
2554 SDValue SetCC = Z.getOperand(0);
2555 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2556 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2557 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2558 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2559 return SDValue();
2560
2561 // We are adding/subtracting a constant and an inverted low bit. Turn that
2562 // into a subtract/add of the low bit with incremented/decremented constant:
2563 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2564 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2565 EVT VT = C.getValueType();
2566 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2567 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2568 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2569 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2570}
2571
2572// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2573SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2574 SDValue N0 = N->getOperand(0);
2575 EVT VT = N0.getValueType();
2576 SDValue A, B;
2577
2578 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2581 m_SpecificInt(1))))) {
2582 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2583 }
2584 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2587 m_SpecificInt(1))))) {
2588 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2589 }
2590 return SDValue();
2591}
2592
2593/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2594/// a shift and add with a different constant.
2596 SelectionDAG &DAG) {
2597 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2598 "Expecting add or sub");
2599
2600 // We need a constant operand for the add/sub, and the other operand is a
2601 // logical shift right: add (srl), C or sub C, (srl).
2602 bool IsAdd = N->getOpcode() == ISD::ADD;
2603 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2604 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2605 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2606 ShiftOp.getOpcode() != ISD::SRL)
2607 return SDValue();
2608
2609 // The shift must be of a 'not' value.
2610 SDValue Not = ShiftOp.getOperand(0);
2611 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2612 return SDValue();
2613
2614 // The shift must be moving the sign bit to the least-significant-bit.
2615 EVT VT = ShiftOp.getValueType();
2616 SDValue ShAmt = ShiftOp.getOperand(1);
2617 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2618 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2619 return SDValue();
2620
2621 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2622 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2623 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2624 if (SDValue NewC = DAG.FoldConstantArithmetic(
2625 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2626 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2627 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2628 Not.getOperand(0), ShAmt);
2629 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2630 }
2631
2632 return SDValue();
2633}
2634
2635static bool
2637 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2638 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2639}
2640
2641/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2642/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2643/// are no common bits set in the operands).
2644SDValue DAGCombiner::visitADDLike(SDNode *N) {
2645 SDValue N0 = N->getOperand(0);
2646 SDValue N1 = N->getOperand(1);
2647 EVT VT = N0.getValueType();
2648 SDLoc DL(N);
2649
2650 // fold (add x, undef) -> undef
2651 if (N0.isUndef())
2652 return N0;
2653 if (N1.isUndef())
2654 return N1;
2655
2656 // fold (add c1, c2) -> c1+c2
2657 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2658 return C;
2659
2660 // canonicalize constant to RHS
2663 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2664
2665 if (areBitwiseNotOfEachother(N0, N1))
2667 SDLoc(N), VT);
2668
2669 // fold vector ops
2670 if (VT.isVector()) {
2671 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2672 return FoldedVOp;
2673
2674 // fold (add x, 0) -> x, vector edition
2676 return N0;
2677 }
2678
2679 // fold (add x, 0) -> x
2680 if (isNullConstant(N1))
2681 return N0;
2682
2683 if (N0.getOpcode() == ISD::SUB) {
2684 SDValue N00 = N0.getOperand(0);
2685 SDValue N01 = N0.getOperand(1);
2686
2687 // fold ((A-c1)+c2) -> (A+(c2-c1))
2688 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2689 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2690
2691 // fold ((c1-A)+c2) -> (c1+c2)-A
2692 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2693 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2694 }
2695
2696 // add (sext i1 X), 1 -> zext (not i1 X)
2697 // We don't transform this pattern:
2698 // add (zext i1 X), -1 -> sext (not i1 X)
2699 // because most (?) targets generate better code for the zext form.
2700 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2701 isOneOrOneSplat(N1)) {
2702 SDValue X = N0.getOperand(0);
2703 if ((!LegalOperations ||
2704 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2706 X.getScalarValueSizeInBits() == 1) {
2707 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2708 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2709 }
2710 }
2711
2712 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2713 // iff (or x, c0) is equivalent to (add x, c0).
2714 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2715 // iff (xor x, c0) is equivalent to (add x, c0).
2716 if (DAG.isADDLike(N0)) {
2717 SDValue N01 = N0.getOperand(1);
2718 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2719 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2720 }
2721
2722 if (SDValue NewSel = foldBinOpIntoSelect(N))
2723 return NewSel;
2724
2725 // reassociate add
2726 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2727 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2728 return RADD;
2729
2730 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2731 // equivalent to (add x, c).
2732 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2733 // equivalent to (add x, c).
2734 // Do this optimization only when adding c does not introduce instructions
2735 // for adding carries.
2736 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2737 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2738 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2739 // If N0's type does not split or is a sign mask, it does not introduce
2740 // add carry.
2741 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2742 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2745 if (NoAddCarry)
2746 return DAG.getNode(
2747 ISD::ADD, DL, VT,
2748 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2749 N0.getOperand(1));
2750 }
2751 return SDValue();
2752 };
2753 if (SDValue Add = ReassociateAddOr(N0, N1))
2754 return Add;
2755 if (SDValue Add = ReassociateAddOr(N1, N0))
2756 return Add;
2757
2758 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2759 if (SDValue SD =
2760 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2761 return SD;
2762 }
2763
2764 SDValue A, B, C;
2765
2766 // fold ((0-A) + B) -> B-A
2767 if (sd_match(N0, m_Neg(m_Value(A))))
2768 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2769
2770 // fold (A + (0-B)) -> A-B
2771 if (sd_match(N1, m_Neg(m_Value(B))))
2772 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2773
2774 // fold (A+(B-A)) -> B
2775 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2776 return B;
2777
2778 // fold ((B-A)+A) -> B
2779 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2780 return B;
2781
2782 // fold ((A-B)+(C-A)) -> (C-B)
2783 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2785 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2786
2787 // fold ((A-B)+(B-C)) -> (A-C)
2788 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2790 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2791
2792 // fold (A+(B-(A+C))) to (B-C)
2793 // fold (A+(B-(C+A))) to (B-C)
2794 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2795 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2796
2797 // fold (A+((B-A)+or-C)) to (B+or-C)
2798 if (sd_match(N1,
2800 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2801 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2802
2803 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2804 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2805 N0->hasOneUse() && N1->hasOneUse()) {
2806 SDValue N00 = N0.getOperand(0);
2807 SDValue N01 = N0.getOperand(1);
2808 SDValue N10 = N1.getOperand(0);
2809 SDValue N11 = N1.getOperand(1);
2810
2812 return DAG.getNode(ISD::SUB, DL, VT,
2813 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2814 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2815 }
2816
2817 // fold (add (umax X, C), -C) --> (usubsat X, C)
2818 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2819 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2820 return (!Max && !Op) ||
2821 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2822 };
2823 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2824 /*AllowUndefs*/ true))
2825 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2826 N0.getOperand(1));
2827 }
2828
2830 return SDValue(N, 0);
2831
2832 if (isOneOrOneSplat(N1)) {
2833 // fold (add (xor a, -1), 1) -> (sub 0, a)
2834 if (isBitwiseNot(N0))
2835 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2836 N0.getOperand(0));
2837
2838 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2839 if (N0.getOpcode() == ISD::ADD) {
2840 SDValue A, Xor;
2841
2842 if (isBitwiseNot(N0.getOperand(0))) {
2843 A = N0.getOperand(1);
2844 Xor = N0.getOperand(0);
2845 } else if (isBitwiseNot(N0.getOperand(1))) {
2846 A = N0.getOperand(0);
2847 Xor = N0.getOperand(1);
2848 }
2849
2850 if (Xor)
2851 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2852 }
2853
2854 // Look for:
2855 // add (add x, y), 1
2856 // And if the target does not like this form then turn into:
2857 // sub y, (xor x, -1)
2858 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2859 N0.hasOneUse() &&
2860 // Limit this to after legalization if the add has wrap flags
2861 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2862 !N->getFlags().hasNoSignedWrap()))) {
2863 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2864 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2865 }
2866 }
2867
2868 // (x - y) + -1 -> add (xor y, -1), x
2869 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2870 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2871 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2872 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2873 }
2874
2875 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2876 // This can help if the inner add has multiple uses.
2877 APInt CM, CA;
2878 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2879 if (VT.getScalarSizeInBits() <= 64) {
2881 m_ConstInt(CM)))) &&
2883 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2885 // If all the inputs are nuw, the outputs can be nuw. If all the input
2886 // are _also_ nsw the outputs can be too.
2887 if (N->getFlags().hasNoUnsignedWrap() &&
2888 N0->getFlags().hasNoUnsignedWrap() &&
2890 Flags.setNoUnsignedWrap(true);
2891 if (N->getFlags().hasNoSignedWrap() &&
2892 N0->getFlags().hasNoSignedWrap() &&
2894 Flags.setNoSignedWrap(true);
2895 }
2896 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2897 DAG.getConstant(CM, DL, VT), Flags);
2898 return DAG.getNode(
2899 ISD::ADD, DL, VT, Mul,
2900 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2901 }
2902 // Also look in case there is an intermediate add.
2903 if (sd_match(N0, m_OneUse(m_Add(
2905 m_ConstInt(CM))),
2906 m_Value(B)))) &&
2908 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2910 // If all the inputs are nuw, the outputs can be nuw. If all the input
2911 // are _also_ nsw the outputs can be too.
2912 SDValue OMul =
2913 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2914 if (N->getFlags().hasNoUnsignedWrap() &&
2915 N0->getFlags().hasNoUnsignedWrap() &&
2916 OMul->getFlags().hasNoUnsignedWrap() &&
2917 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2918 Flags.setNoUnsignedWrap(true);
2919 if (N->getFlags().hasNoSignedWrap() &&
2920 N0->getFlags().hasNoSignedWrap() &&
2921 OMul->getFlags().hasNoSignedWrap() &&
2922 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2923 Flags.setNoSignedWrap(true);
2924 }
2925 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2926 DAG.getConstant(CM, DL, VT), Flags);
2927 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2928 return DAG.getNode(
2929 ISD::ADD, DL, VT, Add,
2930 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2931 }
2932 }
2933 }
2934
2935 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2936 return Combined;
2937
2938 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2939 return Combined;
2940
2941 return SDValue();
2942}
2943
2944// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2945SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2946 SDValue N0 = N->getOperand(0);
2947 EVT VT = N0.getValueType();
2948 SDValue A, B;
2949
2950 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2953 m_SpecificInt(1))))) {
2954 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2955 }
2956 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2959 m_SpecificInt(1))))) {
2960 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2961 }
2962
2963 return SDValue();
2964}
2965
2966SDValue DAGCombiner::visitADD(SDNode *N) {
2967 SDValue N0 = N->getOperand(0);
2968 SDValue N1 = N->getOperand(1);
2969 EVT VT = N0.getValueType();
2970 SDLoc DL(N);
2971
2972 if (SDValue Combined = visitADDLike(N))
2973 return Combined;
2974
2975 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2976 return V;
2977
2978 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2979 return V;
2980
2981 // Try to match AVGFLOOR fixedwidth pattern
2982 if (SDValue V = foldAddToAvg(N, DL))
2983 return V;
2984
2985 // fold (a+b) -> (a|b) iff a and b share no bits.
2986 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2987 DAG.haveNoCommonBitsSet(N0, N1)) {
2989 Flags.setDisjoint(true);
2990 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2991 }
2992
2993 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2994 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2995 const APInt &C0 = N0->getConstantOperandAPInt(0);
2996 const APInt &C1 = N1->getConstantOperandAPInt(0);
2997 return DAG.getVScale(DL, VT, C0 + C1);
2998 }
2999
3000 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3001 if (N0.getOpcode() == ISD::ADD &&
3002 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3003 N1.getOpcode() == ISD::VSCALE) {
3004 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3005 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3006 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3007 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3008 }
3009
3010 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3011 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3012 N1.getOpcode() == ISD::STEP_VECTOR) {
3013 const APInt &C0 = N0->getConstantOperandAPInt(0);
3014 const APInt &C1 = N1->getConstantOperandAPInt(0);
3015 APInt NewStep = C0 + C1;
3016 return DAG.getStepVector(DL, VT, NewStep);
3017 }
3018
3019 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3020 if (N0.getOpcode() == ISD::ADD &&
3022 N1.getOpcode() == ISD::STEP_VECTOR) {
3023 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3024 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3025 APInt NewStep = SV0 + SV1;
3026 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3027 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3028 }
3029
3030 return SDValue();
3031}
3032
3033SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3034 unsigned Opcode = N->getOpcode();
3035 SDValue N0 = N->getOperand(0);
3036 SDValue N1 = N->getOperand(1);
3037 EVT VT = N0.getValueType();
3038 bool IsSigned = Opcode == ISD::SADDSAT;
3039 SDLoc DL(N);
3040
3041 // fold (add_sat x, undef) -> -1
3042 if (N0.isUndef() || N1.isUndef())
3043 return DAG.getAllOnesConstant(DL, VT);
3044
3045 // fold (add_sat c1, c2) -> c3
3046 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3047 return C;
3048
3049 // canonicalize constant to RHS
3052 return DAG.getNode(Opcode, DL, VT, N1, N0);
3053
3054 // fold vector ops
3055 if (VT.isVector()) {
3056 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3057 return FoldedVOp;
3058
3059 // fold (add_sat x, 0) -> x, vector edition
3061 return N0;
3062 }
3063
3064 // fold (add_sat x, 0) -> x
3065 if (isNullConstant(N1))
3066 return N0;
3067
3068 // If it cannot overflow, transform into an add.
3069 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3070 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3071
3072 return SDValue();
3073}
3074
3076 bool ForceCarryReconstruction = false) {
3077 bool Masked = false;
3078
3079 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3080 while (true) {
3081 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3082 V = V.getOperand(0);
3083 continue;
3084 }
3085
3086 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3087 if (ForceCarryReconstruction)
3088 return V;
3089
3090 Masked = true;
3091 V = V.getOperand(0);
3092 continue;
3093 }
3094
3095 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3096 return V;
3097
3098 break;
3099 }
3100
3101 // If this is not a carry, return.
3102 if (V.getResNo() != 1)
3103 return SDValue();
3104
3105 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3106 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3107 return SDValue();
3108
3109 EVT VT = V->getValueType(0);
3110 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3111 return SDValue();
3112
3113 // If the result is masked, then no matter what kind of bool it is we can
3114 // return. If it isn't, then we need to make sure the bool type is either 0 or
3115 // 1 and not other values.
3116 if (Masked ||
3117 TLI.getBooleanContents(V.getValueType()) ==
3119 return V;
3120
3121 return SDValue();
3122}
3123
3124/// Given the operands of an add/sub operation, see if the 2nd operand is a
3125/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3126/// the opcode and bypass the mask operation.
3127static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3128 SelectionDAG &DAG, const SDLoc &DL) {
3129 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3130 N1 = N1.getOperand(0);
3131
3132 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3133 return SDValue();
3134
3135 EVT VT = N0.getValueType();
3136 SDValue N10 = N1.getOperand(0);
3137 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3138 N10 = N10.getOperand(0);
3139
3140 if (N10.getValueType() != VT)
3141 return SDValue();
3142
3143 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3144 return SDValue();
3145
3146 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3147 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3148 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3149}
3150
3151/// Helper for doing combines based on N0 and N1 being added to each other.
3152SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3153 SDNode *LocReference) {
3154 EVT VT = N0.getValueType();
3155 SDLoc DL(LocReference);
3156
3157 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3158 SDValue Y, N;
3159 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3160 return DAG.getNode(ISD::SUB, DL, VT, N0,
3161 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3162
3163 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3164 return V;
3165
3166 // Look for:
3167 // add (add x, 1), y
3168 // And if the target does not like this form then turn into:
3169 // sub y, (xor x, -1)
3170 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3171 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3172 // Limit this to after legalization if the add has wrap flags
3173 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3174 !N0->getFlags().hasNoSignedWrap()))) {
3175 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3176 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3177 }
3178
3179 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3180 // Hoist one-use subtraction by non-opaque constant:
3181 // (x - C) + y -> (x + y) - C
3182 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3183 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3184 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3185 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3186 }
3187 // Hoist one-use subtraction from non-opaque constant:
3188 // (C - x) + y -> (y - x) + C
3189 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3190 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3191 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3192 }
3193 }
3194
3195 // add (mul x, C), x -> mul x, C+1
3196 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3197 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3198 N0.hasOneUse()) {
3199 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3200 DAG.getConstant(1, DL, VT));
3201 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3202 }
3203
3204 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3205 // rather than 'add 0/-1' (the zext should get folded).
3206 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3207 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3208 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3210 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3211 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3212 }
3213
3214 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3215 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3216 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3217 if (TN->getVT() == MVT::i1) {
3218 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3219 DAG.getConstant(1, DL, VT));
3220 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3221 }
3222 }
3223
3224 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3225 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3226 N1.getResNo() == 0)
3227 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3228 N0, N1.getOperand(0), N1.getOperand(2));
3229
3230 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3232 if (SDValue Carry = getAsCarry(TLI, N1))
3233 return DAG.getNode(ISD::UADDO_CARRY, DL,
3234 DAG.getVTList(VT, Carry.getValueType()), N0,
3235 DAG.getConstant(0, DL, VT), Carry);
3236
3237 return SDValue();
3238}
3239
3240SDValue DAGCombiner::visitADDC(SDNode *N) {
3241 SDValue N0 = N->getOperand(0);
3242 SDValue N1 = N->getOperand(1);
3243 EVT VT = N0.getValueType();
3244 SDLoc DL(N);
3245
3246 // If the flag result is dead, turn this into an ADD.
3247 if (!N->hasAnyUseOfValue(1))
3248 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3249 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3250
3251 // canonicalize constant to RHS.
3252 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3253 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3254 if (N0C && !N1C)
3255 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3256
3257 // fold (addc x, 0) -> x + no carry out
3258 if (isNullConstant(N1))
3259 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3260 DL, MVT::Glue));
3261
3262 // If it cannot overflow, transform into an add.
3264 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3265 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3266
3267 return SDValue();
3268}
3269
3270/**
3271 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3272 * then the flip also occurs if computing the inverse is the same cost.
3273 * This function returns an empty SDValue in case it cannot flip the boolean
3274 * without increasing the cost of the computation. If you want to flip a boolean
3275 * no matter what, use DAG.getLogicalNOT.
3276 */
3278 const TargetLowering &TLI,
3279 bool Force) {
3280 if (Force && isa<ConstantSDNode>(V))
3281 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3282
3283 if (V.getOpcode() != ISD::XOR)
3284 return SDValue();
3285
3286 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3287 if (!Const)
3288 return SDValue();
3289
3290 EVT VT = V.getValueType();
3291
3292 bool IsFlip = false;
3293 switch(TLI.getBooleanContents(VT)) {
3295 IsFlip = Const->isOne();
3296 break;
3298 IsFlip = Const->isAllOnes();
3299 break;
3301 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3302 break;
3303 }
3304
3305 if (IsFlip)
3306 return V.getOperand(0);
3307 if (Force)
3308 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3309 return SDValue();
3310}
3311
3312SDValue DAGCombiner::visitADDO(SDNode *N) {
3313 SDValue N0 = N->getOperand(0);
3314 SDValue N1 = N->getOperand(1);
3315 EVT VT = N0.getValueType();
3316 bool IsSigned = (ISD::SADDO == N->getOpcode());
3317
3318 EVT CarryVT = N->getValueType(1);
3319 SDLoc DL(N);
3320
3321 // If the flag result is dead, turn this into an ADD.
3322 if (!N->hasAnyUseOfValue(1))
3323 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3324 DAG.getUNDEF(CarryVT));
3325
3326 // canonicalize constant to RHS.
3329 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3330
3331 // fold (addo x, 0) -> x + no carry out
3332 if (isNullOrNullSplat(N1))
3333 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3334
3335 // If it cannot overflow, transform into an add.
3336 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3337 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3338 DAG.getConstant(0, DL, CarryVT));
3339
3340 if (IsSigned) {
3341 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3342 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3343 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3344 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3345 } else {
3346 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3347 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3348 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3349 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3350 return CombineTo(
3351 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3352 }
3353
3354 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3355 return Combined;
3356
3357 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3358 return Combined;
3359 }
3360
3361 return SDValue();
3362}
3363
3364SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3365 EVT VT = N0.getValueType();
3366 if (VT.isVector())
3367 return SDValue();
3368
3369 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3370 // If Y + 1 cannot overflow.
3371 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3372 SDValue Y = N1.getOperand(0);
3373 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3375 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3376 N1.getOperand(2));
3377 }
3378
3379 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3381 if (SDValue Carry = getAsCarry(TLI, N1))
3382 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3383 DAG.getConstant(0, SDLoc(N), VT), Carry);
3384
3385 return SDValue();
3386}
3387
3388SDValue DAGCombiner::visitADDE(SDNode *N) {
3389 SDValue N0 = N->getOperand(0);
3390 SDValue N1 = N->getOperand(1);
3391 SDValue CarryIn = N->getOperand(2);
3392
3393 // canonicalize constant to RHS
3394 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3395 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3396 if (N0C && !N1C)
3397 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3398 N1, N0, CarryIn);
3399
3400 // fold (adde x, y, false) -> (addc x, y)
3401 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3402 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3403
3404 return SDValue();
3405}
3406
3407SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3408 SDValue N0 = N->getOperand(0);
3409 SDValue N1 = N->getOperand(1);
3410 SDValue CarryIn = N->getOperand(2);
3411 SDLoc DL(N);
3412
3413 // canonicalize constant to RHS
3414 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3415 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3416 if (N0C && !N1C)
3417 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3418
3419 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3420 if (isNullConstant(CarryIn)) {
3421 if (!LegalOperations ||
3422 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3423 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3424 }
3425
3426 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3427 if (isNullConstant(N0) && isNullConstant(N1)) {
3428 EVT VT = N0.getValueType();
3429 EVT CarryVT = CarryIn.getValueType();
3430 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3431 AddToWorklist(CarryExt.getNode());
3432 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3433 DAG.getConstant(1, DL, VT)),
3434 DAG.getConstant(0, DL, CarryVT));
3435 }
3436
3437 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3438 return Combined;
3439
3440 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3441 return Combined;
3442
3443 // We want to avoid useless duplication.
3444 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3445 // not a binary operation, this is not really possible to leverage this
3446 // existing mechanism for it. However, if more operations require the same
3447 // deduplication logic, then it may be worth generalize.
3448 SDValue Ops[] = {N1, N0, CarryIn};
3449 SDNode *CSENode =
3450 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3451 if (CSENode)
3452 return SDValue(CSENode, 0);
3453
3454 return SDValue();
3455}
3456
3457/**
3458 * If we are facing some sort of diamond carry propagation pattern try to
3459 * break it up to generate something like:
3460 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3461 *
3462 * The end result is usually an increase in operation required, but because the
3463 * carry is now linearized, other transforms can kick in and optimize the DAG.
3464 *
3465 * Patterns typically look something like
3466 * (uaddo A, B)
3467 * / \
3468 * Carry Sum
3469 * | \
3470 * | (uaddo_carry *, 0, Z)
3471 * | /
3472 * \ Carry
3473 * | /
3474 * (uaddo_carry X, *, *)
3475 *
3476 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3477 * produce a combine with a single path for carry propagation.
3478 */
3480 SelectionDAG &DAG, SDValue X,
3481 SDValue Carry0, SDValue Carry1,
3482 SDNode *N) {
3483 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3484 return SDValue();
3485 if (Carry1.getOpcode() != ISD::UADDO)
3486 return SDValue();
3487
3488 SDValue Z;
3489
3490 /**
3491 * First look for a suitable Z. It will present itself in the form of
3492 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3493 */
3494 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3495 isNullConstant(Carry0.getOperand(1))) {
3496 Z = Carry0.getOperand(2);
3497 } else if (Carry0.getOpcode() == ISD::UADDO &&
3498 isOneConstant(Carry0.getOperand(1))) {
3499 EVT VT = Carry0->getValueType(1);
3500 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3501 } else {
3502 // We couldn't find a suitable Z.
3503 return SDValue();
3504 }
3505
3506
3507 auto cancelDiamond = [&](SDValue A,SDValue B) {
3508 SDLoc DL(N);
3509 SDValue NewY =
3510 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3511 Combiner.AddToWorklist(NewY.getNode());
3512 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3513 DAG.getConstant(0, DL, X.getValueType()),
3514 NewY.getValue(1));
3515 };
3516
3517 /**
3518 * (uaddo A, B)
3519 * |
3520 * Sum
3521 * |
3522 * (uaddo_carry *, 0, Z)
3523 */
3524 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3525 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3526 }
3527
3528 /**
3529 * (uaddo_carry A, 0, Z)
3530 * |
3531 * Sum
3532 * |
3533 * (uaddo *, B)
3534 */
3535 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3536 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3537 }
3538
3539 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3540 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3541 }
3542
3543 return SDValue();
3544}
3545
3546// If we are facing some sort of diamond carry/borrow in/out pattern try to
3547// match patterns like:
3548//
3549// (uaddo A, B) CarryIn
3550// | \ |
3551// | \ |
3552// PartialSum PartialCarryOutX /
3553// | | /
3554// | ____|____________/
3555// | / |
3556// (uaddo *, *) \________
3557// | \ \
3558// | \ |
3559// | PartialCarryOutY |
3560// | \ |
3561// | \ /
3562// AddCarrySum | ______/
3563// | /
3564// CarryOut = (or *, *)
3565//
3566// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3567//
3568// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3569//
3570// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3571// with a single path for carry/borrow out propagation.
3573 SDValue N0, SDValue N1, SDNode *N) {
3574 SDValue Carry0 = getAsCarry(TLI, N0);
3575 if (!Carry0)
3576 return SDValue();
3577 SDValue Carry1 = getAsCarry(TLI, N1);
3578 if (!Carry1)
3579 return SDValue();
3580
3581 unsigned Opcode = Carry0.getOpcode();
3582 if (Opcode != Carry1.getOpcode())
3583 return SDValue();
3584 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3585 return SDValue();
3586 // Guarantee identical type of CarryOut
3587 EVT CarryOutType = N->getValueType(0);
3588 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3589 CarryOutType != Carry1.getValue(1).getValueType())
3590 return SDValue();
3591
3592 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3593 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3594 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3595 std::swap(Carry0, Carry1);
3596
3597 // Check if nodes are connected in expected way.
3598 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3599 Carry1.getOperand(1) != Carry0.getValue(0))
3600 return SDValue();
3601
3602 // The carry in value must be on the righthand side for subtraction.
3603 unsigned CarryInOperandNum =
3604 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3605 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3606 return SDValue();
3607 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3608
3609 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3610 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3611 return SDValue();
3612
3613 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3614 CarryIn = getAsCarry(TLI, CarryIn, true);
3615 if (!CarryIn)
3616 return SDValue();
3617
3618 SDLoc DL(N);
3619 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3620 Carry1->getValueType(0));
3621 SDValue Merged =
3622 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3623 Carry0.getOperand(1), CarryIn);
3624
3625 // Please note that because we have proven that the result of the UADDO/USUBO
3626 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3627 // therefore prove that if the first UADDO/USUBO overflows, the second
3628 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3629 // maximum value.
3630 //
3631 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3632 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3633 //
3634 // This is important because it means that OR and XOR can be used to merge
3635 // carry flags; and that AND can return a constant zero.
3636 //
3637 // TODO: match other operations that can merge flags (ADD, etc)
3638 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3639 if (N->getOpcode() == ISD::AND)
3640 return DAG.getConstant(0, DL, CarryOutType);
3641 return Merged.getValue(1);
3642}
3643
3644SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3645 SDValue CarryIn, SDNode *N) {
3646 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3647 // carry.
3648 if (isBitwiseNot(N0))
3649 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3650 SDLoc DL(N);
3651 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3652 N0.getOperand(0), NotC);
3653 return CombineTo(
3654 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3655 }
3656
3657 // Iff the flag result is dead:
3658 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3659 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3660 // or the dependency between the instructions.
3661 if ((N0.getOpcode() == ISD::ADD ||
3662 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3663 N0.getValue(1) != CarryIn)) &&
3664 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3665 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3666 N0.getOperand(0), N0.getOperand(1), CarryIn);
3667
3668 /**
3669 * When one of the uaddo_carry argument is itself a carry, we may be facing
3670 * a diamond carry propagation. In which case we try to transform the DAG
3671 * to ensure linear carry propagation if that is possible.
3672 */
3673 if (auto Y = getAsCarry(TLI, N1)) {
3674 // Because both are carries, Y and Z can be swapped.
3675 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3676 return R;
3677 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3678 return R;
3679 }
3680
3681 return SDValue();
3682}
3683
3684SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3685 SDValue CarryIn, SDNode *N) {
3686 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3687 if (isBitwiseNot(N0)) {
3688 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3689 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3690 N0.getOperand(0), NotC);
3691 }
3692
3693 return SDValue();
3694}
3695
3696SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3697 SDValue N0 = N->getOperand(0);
3698 SDValue N1 = N->getOperand(1);
3699 SDValue CarryIn = N->getOperand(2);
3700 SDLoc DL(N);
3701
3702 // canonicalize constant to RHS
3703 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3704 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3705 if (N0C && !N1C)
3706 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3707
3708 // fold (saddo_carry x, y, false) -> (saddo x, y)
3709 if (isNullConstant(CarryIn)) {
3710 if (!LegalOperations ||
3711 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3712 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3713 }
3714
3715 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3716 return Combined;
3717
3718 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3719 return Combined;
3720
3721 return SDValue();
3722}
3723
3724// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3725// clamp/truncation if necessary.
3726static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3727 SDValue RHS, SelectionDAG &DAG,
3728 const SDLoc &DL) {
3729 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3730 "Illegal truncation");
3731
3732 if (DstVT == SrcVT)
3733 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3734
3735 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3736 // clamping RHS.
3738 DstVT.getScalarSizeInBits());
3739 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3740 return SDValue();
3741
3742 SDValue SatLimit =
3744 DstVT.getScalarSizeInBits()),
3745 DL, SrcVT);
3746 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3747 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3748 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3749 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3750}
3751
3752// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3753// usubsat(a,b), optionally as a truncated type.
3754SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3755 if (N->getOpcode() != ISD::SUB ||
3756 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3757 return SDValue();
3758
3759 EVT SubVT = N->getValueType(0);
3760 SDValue Op0 = N->getOperand(0);
3761 SDValue Op1 = N->getOperand(1);
3762
3763 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3764 // they may be converted to usubsat(a,b).
3765 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3766 SDValue MaxLHS = Op0.getOperand(0);
3767 SDValue MaxRHS = Op0.getOperand(1);
3768 if (MaxLHS == Op1)
3769 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3770 if (MaxRHS == Op1)
3771 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3772 }
3773
3774 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3775 SDValue MinLHS = Op1.getOperand(0);
3776 SDValue MinRHS = Op1.getOperand(1);
3777 if (MinLHS == Op0)
3778 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3779 if (MinRHS == Op0)
3780 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3781 }
3782
3783 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3784 if (Op1.getOpcode() == ISD::TRUNCATE &&
3785 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3786 Op1.getOperand(0).hasOneUse()) {
3787 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3788 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3789 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3790 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3791 DAG, DL);
3792 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3793 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3794 DAG, DL);
3795 }
3796
3797 return SDValue();
3798}
3799
3800// Since it may not be valid to emit a fold to zero for vector initializers
3801// check if we can before folding.
3802static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3803 SelectionDAG &DAG, bool LegalOperations) {
3804 if (!VT.isVector())
3805 return DAG.getConstant(0, DL, VT);
3806 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3807 return DAG.getConstant(0, DL, VT);
3808 return SDValue();
3809}
3810
3811SDValue DAGCombiner::visitSUB(SDNode *N) {
3812 SDValue N0 = N->getOperand(0);
3813 SDValue N1 = N->getOperand(1);
3814 EVT VT = N0.getValueType();
3815 unsigned BitWidth = VT.getScalarSizeInBits();
3816 SDLoc DL(N);
3817
3818 auto PeekThroughFreeze = [](SDValue N) {
3819 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3820 return N->getOperand(0);
3821 return N;
3822 };
3823
3824 // fold (sub x, x) -> 0
3825 // FIXME: Refactor this and xor and other similar operations together.
3826 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3827 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3828
3829 // fold (sub c1, c2) -> c3
3830 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3831 return C;
3832
3833 // fold vector ops
3834 if (VT.isVector()) {
3835 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3836 return FoldedVOp;
3837
3838 // fold (sub x, 0) -> x, vector edition
3840 return N0;
3841 }
3842
3843 if (SDValue NewSel = foldBinOpIntoSelect(N))
3844 return NewSel;
3845
3846 // fold (sub x, c) -> (add x, -c)
3848 return DAG.getNode(ISD::ADD, DL, VT, N0,
3849 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3850
3851 if (isNullOrNullSplat(N0)) {
3852 // Right-shifting everything out but the sign bit followed by negation is
3853 // the same as flipping arithmetic/logical shift type without the negation:
3854 // -(X >>u 31) -> (X >>s 31)
3855 // -(X >>s 31) -> (X >>u 31)
3856 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3858 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3859 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3860 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3861 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3862 }
3863 }
3864
3865 // 0 - X --> 0 if the sub is NUW.
3866 if (N->getFlags().hasNoUnsignedWrap())
3867 return N0;
3868
3870 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3871 // N1 must be 0 because negating the minimum signed value is undefined.
3872 if (N->getFlags().hasNoSignedWrap())
3873 return N0;
3874
3875 // 0 - X --> X if X is 0 or the minimum signed value.
3876 return N1;
3877 }
3878
3879 // Convert 0 - abs(x).
3880 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3882 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3883 return Result;
3884
3885 // Fold neg(splat(neg(x)) -> splat(x)
3886 if (VT.isVector()) {
3887 SDValue N1S = DAG.getSplatValue(N1, true);
3888 if (N1S && N1S.getOpcode() == ISD::SUB &&
3889 isNullConstant(N1S.getOperand(0)))
3890 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3891 }
3892 }
3893
3894 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3896 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3897
3898 // fold (A - (0-B)) -> A+B
3899 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3900 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3901
3902 // fold A-(A-B) -> B
3903 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3904 return N1.getOperand(1);
3905
3906 // fold (A+B)-A -> B
3907 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3908 return N0.getOperand(1);
3909
3910 // fold (A+B)-B -> A
3911 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3912 return N0.getOperand(0);
3913
3914 // fold (A+C1)-C2 -> A+(C1-C2)
3915 if (N0.getOpcode() == ISD::ADD) {
3916 SDValue N01 = N0.getOperand(1);
3917 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3918 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3919 }
3920
3921 // fold C2-(A+C1) -> (C2-C1)-A
3922 if (N1.getOpcode() == ISD::ADD) {
3923 SDValue N11 = N1.getOperand(1);
3924 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3925 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3926 }
3927
3928 // fold (A-C1)-C2 -> A-(C1+C2)
3929 if (N0.getOpcode() == ISD::SUB) {
3930 SDValue N01 = N0.getOperand(1);
3931 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3932 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3933 }
3934
3935 // fold (c1-A)-c2 -> (c1-c2)-A
3936 if (N0.getOpcode() == ISD::SUB) {
3937 SDValue N00 = N0.getOperand(0);
3938 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3939 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3940 }
3941
3942 SDValue A, B, C;
3943
3944 // fold ((A+(B+C))-B) -> A+C
3945 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3946 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3947
3948 // fold ((A+(B-C))-B) -> A-C
3949 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3950 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3951
3952 // fold ((A-(B-C))-C) -> A-B
3953 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3954 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3955
3956 // fold (A-(B-C)) -> A+(C-B)
3957 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3958 return DAG.getNode(ISD::ADD, DL, VT, N0,
3959 DAG.getNode(ISD::SUB, DL, VT, C, B));
3960
3961 // A - (A & B) -> A & (~B)
3962 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3963 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3964 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3965
3966 // fold (A - (-B * C)) -> (A + (B * C))
3967 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3968 return DAG.getNode(ISD::ADD, DL, VT, N0,
3969 DAG.getNode(ISD::MUL, DL, VT, B, C));
3970
3971 // If either operand of a sub is undef, the result is undef
3972 if (N0.isUndef())
3973 return N0;
3974 if (N1.isUndef())
3975 return N1;
3976
3977 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3978 return V;
3979
3980 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3981 return V;
3982
3983 // Try to match AVGCEIL fixedwidth pattern
3984 if (SDValue V = foldSubToAvg(N, DL))
3985 return V;
3986
3987 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3988 return V;
3989
3990 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3991 return V;
3992
3993 // (A - B) - 1 -> add (xor B, -1), A
3995 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3996
3997 // Look for:
3998 // sub y, (xor x, -1)
3999 // And if the target does not like this form then turn into:
4000 // add (add x, y), 1
4001 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4002 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4003 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4004 }
4005
4006 // Hoist one-use addition by non-opaque constant:
4007 // (x + C) - y -> (x - y) + C
4008 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4009 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4010 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4011 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4012 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4013 }
4014 // y - (x + C) -> (y - x) - C
4015 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4016 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4017 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4018 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4019 }
4020 // (x - C) - y -> (x - y) - C
4021 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4022 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4023 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4024 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4025 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4026 }
4027 // (C - x) - y -> C - (x + y)
4028 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4029 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4030 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4031 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4032 }
4033
4034 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4035 // rather than 'sub 0/1' (the sext should get folded).
4036 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4037 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4038 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4039 TLI.getBooleanContents(VT) ==
4041 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4042 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4043 }
4044
4045 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4046 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4048 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4049 return DAG.getNode(ISD::ABS, DL, VT, A);
4050
4051 // If the relocation model supports it, consider symbol offsets.
4052 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4053 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4054 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4055 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4056 if (GA->getGlobal() == GB->getGlobal())
4057 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4058 DL, VT);
4059 }
4060
4061 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4062 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4063 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4064 if (TN->getVT() == MVT::i1) {
4065 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4066 DAG.getConstant(1, DL, VT));
4067 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4068 }
4069 }
4070
4071 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4072 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4073 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4074 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4075 }
4076
4077 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4078 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4079 APInt NewStep = -N1.getConstantOperandAPInt(0);
4080 return DAG.getNode(ISD::ADD, DL, VT, N0,
4081 DAG.getStepVector(DL, VT, NewStep));
4082 }
4083
4084 // Prefer an add for more folding potential and possibly better codegen:
4085 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4086 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4087 SDValue ShAmt = N1.getOperand(1);
4088 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4089 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4090 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4091 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4092 }
4093 }
4094
4095 // As with the previous fold, prefer add for more folding potential.
4096 // Subtracting SMIN/0 is the same as adding SMIN/0:
4097 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4098 if (N1.getOpcode() == ISD::SHL) {
4100 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4101 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4102 }
4103
4104 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4105 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4106 N0.getResNo() == 0 && N0.hasOneUse())
4107 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4108 N0.getOperand(0), N1, N0.getOperand(2));
4109
4111 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4112 if (SDValue Carry = getAsCarry(TLI, N0)) {
4113 SDValue X = N1;
4114 SDValue Zero = DAG.getConstant(0, DL, VT);
4115 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4116 return DAG.getNode(ISD::UADDO_CARRY, DL,
4117 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4118 Carry);
4119 }
4120 }
4121
4122 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4123 // sub C0, X --> xor X, C0
4124 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4125 if (!C0->isOpaque()) {
4126 const APInt &C0Val = C0->getAPIntValue();
4127 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4128 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4129 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4130 }
4131 }
4132
4133 // smax(a,b) - smin(a,b) --> abds(a,b)
4134 if (hasOperation(ISD::ABDS, VT) &&
4135 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4137 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4138
4139 // umax(a,b) - umin(a,b) --> abdu(a,b)
4140 if (hasOperation(ISD::ABDU, VT) &&
4141 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4143 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4144
4145 return SDValue();
4146}
4147
4148SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4149 unsigned Opcode = N->getOpcode();
4150 SDValue N0 = N->getOperand(0);
4151 SDValue N1 = N->getOperand(1);
4152 EVT VT = N0.getValueType();
4153 bool IsSigned = Opcode == ISD::SSUBSAT;
4154 SDLoc DL(N);
4155
4156 // fold (sub_sat x, undef) -> 0
4157 if (N0.isUndef() || N1.isUndef())
4158 return DAG.getConstant(0, DL, VT);
4159
4160 // fold (sub_sat x, x) -> 0
4161 if (N0 == N1)
4162 return DAG.getConstant(0, DL, VT);
4163
4164 // fold (sub_sat c1, c2) -> c3
4165 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4166 return C;
4167
4168 // fold vector ops
4169 if (VT.isVector()) {
4170 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4171 return FoldedVOp;
4172
4173 // fold (sub_sat x, 0) -> x, vector edition
4175 return N0;
4176 }
4177
4178 // fold (sub_sat x, 0) -> x
4179 if (isNullConstant(N1))
4180 return N0;
4181
4182 // If it cannot overflow, transform into an sub.
4183 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4184 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4185
4186 return SDValue();
4187}
4188
4189SDValue DAGCombiner::visitSUBC(SDNode *N) {
4190 SDValue N0 = N->getOperand(0);
4191 SDValue N1 = N->getOperand(1);
4192 EVT VT = N0.getValueType();
4193 SDLoc DL(N);
4194
4195 // If the flag result is dead, turn this into an SUB.
4196 if (!N->hasAnyUseOfValue(1))
4197 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4198 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4199
4200 // fold (subc x, x) -> 0 + no borrow
4201 if (N0 == N1)
4202 return CombineTo(N, DAG.getConstant(0, DL, VT),
4203 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4204
4205 // fold (subc x, 0) -> x + no borrow
4206 if (isNullConstant(N1))
4207 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4208
4209 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4210 if (isAllOnesConstant(N0))
4211 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4212 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4213
4214 return SDValue();
4215}
4216
4217SDValue DAGCombiner::visitSUBO(SDNode *N) {
4218 SDValue N0 = N->getOperand(0);
4219 SDValue N1 = N->getOperand(1);
4220 EVT VT = N0.getValueType();
4221 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4222
4223 EVT CarryVT = N->getValueType(1);
4224 SDLoc DL(N);
4225
4226 // If the flag result is dead, turn this into an SUB.
4227 if (!N->hasAnyUseOfValue(1))
4228 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4229 DAG.getUNDEF(CarryVT));
4230
4231 // fold (subo x, x) -> 0 + no borrow
4232 if (N0 == N1)
4233 return CombineTo(N, DAG.getConstant(0, DL, VT),
4234 DAG.getConstant(0, DL, CarryVT));
4235
4236 // fold (subox, c) -> (addo x, -c)
4238 if (IsSigned && !N1C->isMinSignedValue())
4239 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4240 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4241
4242 // fold (subo x, 0) -> x + no borrow
4243 if (isNullOrNullSplat(N1))
4244 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4245
4246 // If it cannot overflow, transform into an sub.
4247 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4248 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4249 DAG.getConstant(0, DL, CarryVT));
4250
4251 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4252 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4253 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4254 DAG.getConstant(0, DL, CarryVT));
4255
4256 return SDValue();
4257}
4258
4259SDValue DAGCombiner::visitSUBE(SDNode *N) {
4260 SDValue N0 = N->getOperand(0);
4261 SDValue N1 = N->getOperand(1);
4262 SDValue CarryIn = N->getOperand(2);
4263
4264 // fold (sube x, y, false) -> (subc x, y)
4265 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4266 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4267
4268 return SDValue();
4269}
4270
4271SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4272 SDValue N0 = N->getOperand(0);
4273 SDValue N1 = N->getOperand(1);
4274 SDValue CarryIn = N->getOperand(2);
4275
4276 // fold (usubo_carry x, y, false) -> (usubo x, y)
4277 if (isNullConstant(CarryIn)) {
4278 if (!LegalOperations ||
4279 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4280 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4281 }
4282
4283 return SDValue();
4284}
4285
4286SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4287 SDValue N0 = N->getOperand(0);
4288 SDValue N1 = N->getOperand(1);
4289 SDValue CarryIn = N->getOperand(2);
4290
4291 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4292 if (isNullConstant(CarryIn)) {
4293 if (!LegalOperations ||
4294 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4295 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4296 }
4297
4298 return SDValue();
4299}
4300
4301// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4302// UMULFIXSAT here.
4303SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4304 SDValue N0 = N->getOperand(0);
4305 SDValue N1 = N->getOperand(1);
4306 SDValue Scale = N->getOperand(2);
4307 EVT VT = N0.getValueType();
4308
4309 // fold (mulfix x, undef, scale) -> 0
4310 if (N0.isUndef() || N1.isUndef())
4311 return DAG.getConstant(0, SDLoc(N), VT);
4312
4313 // Canonicalize constant to RHS (vector doesn't have to splat)
4316 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4317
4318 // fold (mulfix x, 0, scale) -> 0
4319 if (isNullConstant(N1))
4320 return DAG.getConstant(0, SDLoc(N), VT);
4321
4322 return SDValue();
4323}
4324
4325template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4326 SDValue N0 = N->getOperand(0);
4327 SDValue N1 = N->getOperand(1);
4328 EVT VT = N0.getValueType();
4329 unsigned BitWidth = VT.getScalarSizeInBits();
4330 SDLoc DL(N);
4331 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4332 MatchContextClass Matcher(DAG, TLI, N);
4333
4334 // fold (mul x, undef) -> 0
4335 if (N0.isUndef() || N1.isUndef())
4336 return DAG.getConstant(0, DL, VT);
4337
4338 // fold (mul c1, c2) -> c1*c2
4339 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4340 return C;
4341
4342 // canonicalize constant to RHS (vector doesn't have to splat)
4345 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4346
4347 bool N1IsConst = false;
4348 bool N1IsOpaqueConst = false;
4349 APInt ConstValue1;
4350
4351 // fold vector ops
4352 if (VT.isVector()) {
4353 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4354 if (!UseVP)
4355 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4356 return FoldedVOp;
4357
4358 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4359 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4360 "Splat APInt should be element width");
4361 } else {
4362 N1IsConst = isa<ConstantSDNode>(N1);
4363 if (N1IsConst) {
4364 ConstValue1 = N1->getAsAPIntVal();
4365 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4366 }
4367 }
4368
4369 // fold (mul x, 0) -> 0
4370 if (N1IsConst && ConstValue1.isZero())
4371 return N1;
4372
4373 // fold (mul x, 1) -> x
4374 if (N1IsConst && ConstValue1.isOne())
4375 return N0;
4376
4377 if (!UseVP)
4378 if (SDValue NewSel = foldBinOpIntoSelect(N))
4379 return NewSel;
4380
4381 // fold (mul x, -1) -> 0-x
4382 if (N1IsConst && ConstValue1.isAllOnes())
4383 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4384
4385 // fold (mul x, (1 << c)) -> x << c
4386 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4387 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4388 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4389 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4390 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4391 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4392 }
4393 }
4394
4395 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4396 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4397 unsigned Log2Val = (-ConstValue1).logBase2();
4398 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4399
4400 // FIXME: If the input is something that is easily negated (e.g. a
4401 // single-use add), we should put the negate there.
4402 return Matcher.getNode(
4403 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4404 Matcher.getNode(ISD::SHL, DL, VT, N0,
4405 DAG.getConstant(Log2Val, DL, ShiftVT)));
4406 }
4407
4408 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4409 // hi result is in use in case we hit this mid-legalization.
4410 if (!UseVP) {
4411 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4412 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4413 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4414 // TODO: Can we match commutable operands with getNodeIfExists?
4415 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4416 if (LoHi->hasAnyUseOfValue(1))
4417 return SDValue(LoHi, 0);
4418 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4419 if (LoHi->hasAnyUseOfValue(1))
4420 return SDValue(LoHi, 0);
4421 }
4422 }
4423 }
4424
4425 // Try to transform:
4426 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4427 // mul x, (2^N + 1) --> add (shl x, N), x
4428 // mul x, (2^N - 1) --> sub (shl x, N), x
4429 // Examples: x * 33 --> (x << 5) + x
4430 // x * 15 --> (x << 4) - x
4431 // x * -33 --> -((x << 5) + x)
4432 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4433 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4434 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4435 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4436 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4437 // x * 0xf800 --> (x << 16) - (x << 11)
4438 // x * -0x8800 --> -((x << 15) + (x << 11))
4439 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4440 if (!UseVP && N1IsConst &&
4441 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4442 // TODO: We could handle more general decomposition of any constant by
4443 // having the target set a limit on number of ops and making a
4444 // callback to determine that sequence (similar to sqrt expansion).
4445 unsigned MathOp = ISD::DELETED_NODE;
4446 APInt MulC = ConstValue1.abs();
4447 // The constant `2` should be treated as (2^0 + 1).
4448 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4449 MulC.lshrInPlace(TZeros);
4450 if ((MulC - 1).isPowerOf2())
4451 MathOp = ISD::ADD;
4452 else if ((MulC + 1).isPowerOf2())
4453 MathOp = ISD::SUB;
4454
4455 if (MathOp != ISD::DELETED_NODE) {
4456 unsigned ShAmt =
4457 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4458 ShAmt += TZeros;
4459 assert(ShAmt < BitWidth &&
4460 "multiply-by-constant generated out of bounds shift");
4461 SDValue Shl =
4462 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4463 SDValue R =
4464 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4465 DAG.getNode(ISD::SHL, DL, VT, N0,
4466 DAG.getConstant(TZeros, DL, VT)))
4467 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4468 if (ConstValue1.isNegative())
4469 R = DAG.getNegative(R, DL, VT);
4470 return R;
4471 }
4472 }
4473
4474 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4475 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4476 SDValue N01 = N0.getOperand(1);
4477 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4478 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4479 }
4480
4481 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4482 // use.
4483 {
4484 SDValue Sh, Y;
4485
4486 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4487 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4489 Sh = N0; Y = N1;
4490 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4492 Sh = N1; Y = N0;
4493 }
4494
4495 if (Sh.getNode()) {
4496 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4497 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4498 }
4499 }
4500
4501 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4502 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4506 return Matcher.getNode(
4507 ISD::ADD, DL, VT,
4508 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4509 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4510
4511 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4513 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4514 const APInt &C0 = N0.getConstantOperandAPInt(0);
4515 const APInt &C1 = NC1->getAPIntValue();
4516 return DAG.getVScale(DL, VT, C0 * C1);
4517 }
4518
4519 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4520 APInt MulVal;
4521 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4522 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4523 const APInt &C0 = N0.getConstantOperandAPInt(0);
4524 APInt NewStep = C0 * MulVal;
4525 return DAG.getStepVector(DL, VT, NewStep);
4526 }
4527
4528 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4529 SDValue X;
4530 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4532 N, Matcher,
4534 m_Deferred(X)))) {
4535 return Matcher.getNode(ISD::ABS, DL, VT, X);
4536 }
4537
4538 // Fold ((mul x, 0/undef) -> 0,
4539 // (mul x, 1) -> x) -> x)
4540 // -> and(x, mask)
4541 // We can replace vectors with '0' and '1' factors with a clearing mask.
4542 if (VT.isFixedLengthVector()) {
4543 unsigned NumElts = VT.getVectorNumElements();
4544 SmallBitVector ClearMask;
4545 ClearMask.reserve(NumElts);
4546 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4547 if (!V || V->isZero()) {
4548 ClearMask.push_back(true);
4549 return true;
4550 }
4551 ClearMask.push_back(false);
4552 return V->isOne();
4553 };
4554 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4555 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4556 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4557 EVT LegalSVT = N1.getOperand(0).getValueType();
4558 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4559 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4561 for (unsigned I = 0; I != NumElts; ++I)
4562 if (ClearMask[I])
4563 Mask[I] = Zero;
4564 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4565 }
4566 }
4567
4568 // reassociate mul
4569 // TODO: Change reassociateOps to support vp ops.
4570 if (!UseVP)
4571 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4572 return RMUL;
4573
4574 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4575 // TODO: Change reassociateReduction to support vp ops.
4576 if (!UseVP)
4577 if (SDValue SD =
4578 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4579 return SD;
4580
4581 // Simplify the operands using demanded-bits information.
4583 return SDValue(N, 0);
4584
4585 return SDValue();
4586}
4587
4588/// Return true if divmod libcall is available.
4590 const TargetLowering &TLI) {
4591 RTLIB::Libcall LC;
4592 EVT NodeType = Node->getValueType(0);
4593 if (!NodeType.isSimple())
4594 return false;
4595 switch (NodeType.getSimpleVT().SimpleTy) {
4596 default: return false; // No libcall for vector types.
4597 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4598 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4599 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4600 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4601 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4602 }
4603
4604 return TLI.getLibcallName(LC) != nullptr;
4605}
4606
4607/// Issue divrem if both quotient and remainder are needed.
4608SDValue DAGCombiner::useDivRem(SDNode *Node) {
4609 if (Node->use_empty())
4610 return SDValue(); // This is a dead node, leave it alone.
4611
4612 unsigned Opcode = Node->getOpcode();
4613 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4614 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4615
4616 // DivMod lib calls can still work on non-legal types if using lib-calls.
4617 EVT VT = Node->getValueType(0);
4618 if (VT.isVector() || !VT.isInteger())
4619 return SDValue();
4620
4621 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4622 return SDValue();
4623
4624 // If DIVREM is going to get expanded into a libcall,
4625 // but there is no libcall available, then don't combine.
4626 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4628 return SDValue();
4629
4630 // If div is legal, it's better to do the normal expansion
4631 unsigned OtherOpcode = 0;
4632 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4633 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4634 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4635 return SDValue();
4636 } else {
4637 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4638 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4639 return SDValue();
4640 }
4641
4642 SDValue Op0 = Node->getOperand(0);
4643 SDValue Op1 = Node->getOperand(1);
4644 SDValue combined;
4645 for (SDNode *User : Op0->uses()) {
4646 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4647 User->use_empty())
4648 continue;
4649 // Convert the other matching node(s), too;
4650 // otherwise, the DIVREM may get target-legalized into something
4651 // target-specific that we won't be able to recognize.
4652 unsigned UserOpc = User->getOpcode();
4653 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4654 User->getOperand(0) == Op0 &&
4655 User->getOperand(1) == Op1) {
4656 if (!combined) {
4657 if (UserOpc == OtherOpcode) {
4658 SDVTList VTs = DAG.getVTList(VT, VT);
4659 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4660 } else if (UserOpc == DivRemOpc) {
4661 combined = SDValue(User, 0);
4662 } else {
4663 assert(UserOpc == Opcode);
4664 continue;
4665 }
4666 }
4667 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4668 CombineTo(User, combined);
4669 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4670 CombineTo(User, combined.getValue(1));
4671 }
4672 }
4673 return combined;
4674}
4675
4677 SDValue N0 = N->getOperand(0);
4678 SDValue N1 = N->getOperand(1);
4679 EVT VT = N->getValueType(0);
4680 SDLoc DL(N);
4681
4682 unsigned Opc = N->getOpcode();
4683 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4685
4686 // X / undef -> undef
4687 // X % undef -> undef
4688 // X / 0 -> undef
4689 // X % 0 -> undef
4690 // NOTE: This includes vectors where any divisor element is zero/undef.
4691 if (DAG.isUndef(Opc, {N0, N1}))
4692 return DAG.getUNDEF(VT);
4693
4694 // undef / X -> 0
4695 // undef % X -> 0
4696 if (N0.isUndef())
4697 return DAG.getConstant(0, DL, VT);
4698
4699 // 0 / X -> 0
4700 // 0 % X -> 0
4702 if (N0C && N0C->isZero())
4703 return N0;
4704
4705 // X / X -> 1
4706 // X % X -> 0
4707 if (N0 == N1)
4708 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4709
4710 // X / 1 -> X
4711 // X % 1 -> 0
4712 // If this is a boolean op (single-bit element type), we can't have
4713 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4714 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4715 // it's a 1.
4716 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4717 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4718
4719 return SDValue();
4720}
4721
4722SDValue DAGCombiner::visitSDIV(SDNode *N) {
4723 SDValue N0 = N->getOperand(0);
4724 SDValue N1 = N->getOperand(1);
4725 EVT VT = N->getValueType(0);
4726 EVT CCVT = getSetCCResultType(VT);
4727 SDLoc DL(N);
4728
4729 // fold (sdiv c1, c2) -> c1/c2
4730 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4731 return C;
4732
4733 // fold vector ops
4734 if (VT.isVector())
4735 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4736 return FoldedVOp;
4737
4738 // fold (sdiv X, -1) -> 0-X
4740 if (N1C && N1C->isAllOnes())
4741 return DAG.getNegative(N0, DL, VT);
4742
4743 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4744 if (N1C && N1C->isMinSignedValue())
4745 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4746 DAG.getConstant(1, DL, VT),
4747 DAG.getConstant(0, DL, VT));
4748
4749 if (SDValue V = simplifyDivRem(N, DAG))
4750 return V;
4751
4752 if (SDValue NewSel = foldBinOpIntoSelect(N))
4753 return NewSel;
4754
4755 // If we know the sign bits of both operands are zero, strength reduce to a
4756 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4757 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4758 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4759
4760 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4761 // If the corresponding remainder node exists, update its users with
4762 // (Dividend - (Quotient * Divisor).
4763 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4764 { N0, N1 })) {
4765 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4766 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4767 AddToWorklist(Mul.getNode());
4768 AddToWorklist(Sub.getNode());
4769 CombineTo(RemNode, Sub);
4770 }
4771 return V;
4772 }
4773
4774 // sdiv, srem -> sdivrem
4775 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4776 // true. Otherwise, we break the simplification logic in visitREM().
4778 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4779 if (SDValue DivRem = useDivRem(N))
4780 return DivRem;
4781
4782 return SDValue();
4783}
4784
4785static bool isDivisorPowerOfTwo(SDValue Divisor) {
4786 // Helper for determining whether a value is a power-2 constant scalar or a
4787 // vector of such elements.
4788 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4789 if (C->isZero() || C->isOpaque())
4790 return false;
4791 if (C->getAPIntValue().isPowerOf2())
4792 return true;
4793 if (C->getAPIntValue().isNegatedPowerOf2())
4794 return true;
4795 return false;
4796 };
4797
4798 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4799}
4800
4801SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4802 SDLoc DL(N);
4803 EVT VT = N->getValueType(0);
4804 EVT CCVT = getSetCCResultType(VT);
4805 unsigned BitWidth = VT.getScalarSizeInBits();
4806
4807 // fold (sdiv X, pow2) -> simple ops after legalize
4808 // FIXME: We check for the exact bit here because the generic lowering gives
4809 // better results in that case. The target-specific lowering should learn how
4810 // to handle exact sdivs efficiently.
4811 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4812 // Target-specific implementation of sdiv x, pow2.
4813 if (SDValue Res = BuildSDIVPow2(N))
4814 return Res;
4815
4816 // Create constants that are functions of the shift amount value.
4817 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4818 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4819 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4820 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4821 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4822 if (!isConstantOrConstantVector(Inexact))
4823 return SDValue();
4824
4825 // Splat the sign bit into the register
4826 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4827 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4828 AddToWorklist(Sign.getNode());
4829
4830 // Add (N0 < 0) ? abs2 - 1 : 0;
4831 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4832 AddToWorklist(Srl.getNode());
4833 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4834 AddToWorklist(Add.getNode());
4835 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4836 AddToWorklist(Sra.getNode());
4837
4838 // Special case: (sdiv X, 1) -> X
4839 // Special Case: (sdiv X, -1) -> 0-X
4840 SDValue One = DAG.getConstant(1, DL, VT);
4842 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4843 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4844 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4845 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4846
4847 // If dividing by a positive value, we're done. Otherwise, the result must
4848 // be negated.
4849 SDValue Zero = DAG.getConstant(0, DL, VT);
4850 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4851
4852 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4853 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4854 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4855 return Res;
4856 }
4857
4858 // If integer divide is expensive and we satisfy the requirements, emit an
4859 // alternate sequence. Targets may check function attributes for size/speed
4860 // trade-offs.
4863 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4864 if (SDValue Op = BuildSDIV(N))
4865 return Op;
4866
4867 return SDValue();
4868}
4869
4870SDValue DAGCombiner::visitUDIV(SDNode *N) {
4871 SDValue N0 = N->getOperand(0);
4872 SDValue N1 = N->getOperand(1);
4873 EVT VT = N->getValueType(0);
4874 EVT CCVT = getSetCCResultType(VT);
4875 SDLoc DL(N);
4876
4877 // fold (udiv c1, c2) -> c1/c2
4878 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4879 return C;
4880
4881 // fold vector ops
4882 if (VT.isVector())
4883 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4884 return FoldedVOp;
4885
4886 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4888 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4889 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4890 DAG.getConstant(1, DL, VT),
4891 DAG.getConstant(0, DL, VT));
4892 }
4893
4894 if (SDValue V = simplifyDivRem(N, DAG))
4895 return V;
4896
4897 if (SDValue NewSel = foldBinOpIntoSelect(N))
4898 return NewSel;
4899
4900 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4901 // If the corresponding remainder node exists, update its users with
4902 // (Dividend - (Quotient * Divisor).
4903 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4904 { N0, N1 })) {
4905 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4906 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4907 AddToWorklist(Mul.getNode());
4908 AddToWorklist(Sub.getNode());
4909 CombineTo(RemNode, Sub);
4910 }
4911 return V;
4912 }
4913
4914 // sdiv, srem -> sdivrem
4915 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4916 // true. Otherwise, we break the simplification logic in visitREM().
4918 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4919 if (SDValue DivRem = useDivRem(N))
4920 return DivRem;
4921
4922 return SDValue();
4923}
4924
4925SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4926 SDLoc DL(N);
4927 EVT VT = N->getValueType(0);
4928
4929 // fold (udiv x, (1 << c)) -> x >>u c
4930 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4931 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4932 AddToWorklist(LogBase2.getNode());
4933
4934 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4935 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4936 AddToWorklist(Trunc.getNode());
4937 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4938 }
4939 }
4940
4941 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4942 if (N1.getOpcode() == ISD::SHL) {
4943 SDValue N10 = N1.getOperand(0);
4944 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4945 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4946 AddToWorklist(LogBase2.getNode());
4947
4948 EVT ADDVT = N1.getOperand(1).getValueType();
4949 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4950 AddToWorklist(Trunc.getNode());
4951 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4952 AddToWorklist(Add.getNode());
4953 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4954 }
4955 }
4956 }
4957
4958 // fold (udiv x, c) -> alternate
4961 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4962 if (SDValue Op = BuildUDIV(N))
4963 return Op;
4964
4965 return SDValue();
4966}
4967
4968SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4969 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4970 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4971 // Target-specific implementation of srem x, pow2.
4972 if (SDValue Res = BuildSREMPow2(N))
4973 return Res;
4974 }
4975 return SDValue();
4976}
4977
4978// handles ISD::SREM and ISD::UREM
4979SDValue DAGCombiner::visitREM(SDNode *N) {
4980 unsigned Opcode = N->getOpcode();
4981 SDValue N0 = N->getOperand(0);
4982 SDValue N1 = N->getOperand(1);
4983 EVT VT = N->getValueType(0);
4984 EVT CCVT = getSetCCResultType(VT);
4985
4986 bool isSigned = (Opcode == ISD::SREM);
4987 SDLoc DL(N);
4988
4989 // fold (rem c1, c2) -> c1%c2
4990 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4991 return C;
4992
4993 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4994 // Freeze the numerator to avoid a miscompile with an undefined value.
4995 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4996 CCVT.isVector() == VT.isVector()) {
4997 SDValue F0 = DAG.getFreeze(N0);
4998 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4999 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5000 }
5001
5002 if (SDValue V = simplifyDivRem(N, DAG))
5003 return V;
5004
5005 if (SDValue NewSel = foldBinOpIntoSelect(N))
5006 return NewSel;
5007
5008 if (isSigned) {
5009 // If we know the sign bits of both operands are zero, strength reduce to a
5010 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5011 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5012 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5013 } else {
5014 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5015 // fold (urem x, pow2) -> (and x, pow2-1)
5016 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5017 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5018 AddToWorklist(Add.getNode());
5019 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5020 }
5021 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5022 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5023 // TODO: We should sink the following into isKnownToBePowerOfTwo
5024 // using a OrZero parameter analogous to our handling in ValueTracking.
5025 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5027 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5028 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5029 AddToWorklist(Add.getNode());
5030 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5031 }
5032 }
5033
5035
5036 // If X/C can be simplified by the division-by-constant logic, lower
5037 // X%C to the equivalent of X-X/C*C.
5038 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5039 // speculative DIV must not cause a DIVREM conversion. We guard against this
5040 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5041 // combine will not return a DIVREM. Regardless, checking cheapness here
5042 // makes sense since the simplification results in fatter code.
5043 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5044 if (isSigned) {
5045 // check if we can build faster implementation for srem
5046 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5047 return OptimizedRem;
5048 }
5049
5050 SDValue OptimizedDiv =
5051 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5052 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5053 // If the equivalent Div node also exists, update its users.
5054 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5055 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5056 { N0, N1 }))
5057 CombineTo(DivNode, OptimizedDiv);
5058 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5059 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5060 AddToWorklist(OptimizedDiv.getNode());
5061 AddToWorklist(Mul.getNode());
5062 return Sub;
5063 }
5064 }
5065
5066 // sdiv, srem -> sdivrem
5067 if (SDValue DivRem = useDivRem(N))
5068 return DivRem.getValue(1);
5069
5070 return SDValue();
5071}
5072
5073SDValue DAGCombiner::visitMULHS(SDNode *N) {
5074 SDValue N0 = N->getOperand(0);
5075 SDValue N1 = N->getOperand(1);
5076 EVT VT = N->getValueType(0);
5077 SDLoc DL(N);
5078
5079 // fold (mulhs c1, c2)
5080 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5081 return C;
5082
5083 // canonicalize constant to RHS.
5086 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5087
5088 if (VT.isVector()) {
5089 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5090 return FoldedVOp;
5091
5092 // fold (mulhs x, 0) -> 0
5093 // do not return N1, because undef node may exist.
5095 return DAG.getConstant(0, DL, VT);
5096 }
5097
5098 // fold (mulhs x, 0) -> 0
5099 if (isNullConstant(N1))
5100 return N1;
5101
5102 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5103 if (isOneConstant(N1))
5104 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
5107
5108 // fold (mulhs x, undef) -> 0
5109 if (N0.isUndef() || N1.isUndef())
5110 return DAG.getConstant(0, DL, VT);
5111
5112 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5113 // plus a shift.
5114 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5115 !VT.isVector()) {
5116 MVT Simple = VT.getSimpleVT();
5117 unsigned SimpleSize = Simple.getSizeInBits();
5118 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5119 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5120 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5121 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5122 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5123 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5124 DAG.getConstant(SimpleSize, DL,
5126 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5127 }
5128 }
5129
5130 return SDValue();
5131}
5132
5133SDValue DAGCombiner::visitMULHU(SDNode *N) {
5134 SDValue N0 = N->getOperand(0);
5135 SDValue N1 = N->getOperand(1);
5136 EVT VT = N->getValueType(0);
5137 SDLoc DL(N);
5138
5139 // fold (mulhu c1, c2)
5140 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5141 return C;
5142
5143 // canonicalize constant to RHS.
5146 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5147
5148 if (VT.isVector()) {
5149 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5150 return FoldedVOp;
5151
5152 // fold (mulhu x, 0) -> 0
5153 // do not return N1, because undef node may exist.
5155 return DAG.getConstant(0, DL, VT);
5156 }
5157
5158 // fold (mulhu x, 0) -> 0
5159 if (isNullConstant(N1))
5160 return N1;
5161
5162 // fold (mulhu x, 1) -> 0
5163 if (isOneConstant(N1))
5164 return DAG.getConstant(0, DL, N0.getValueType());
5165
5166 // fold (mulhu x, undef) -> 0
5167 if (N0.isUndef() || N1.isUndef())
5168 return DAG.getConstant(0, DL, VT);
5169
5170 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5171 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5172 hasOperation(ISD::SRL, VT)) {
5173 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5174 unsigned NumEltBits = VT.getScalarSizeInBits();
5175 SDValue SRLAmt = DAG.getNode(
5176 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5177 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5178 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5179 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5180 }
5181 }
5182
5183 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5184 // plus a shift.
5185 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5186 !VT.isVector()) {
5187 MVT Simple = VT.getSimpleVT();
5188 unsigned SimpleSize = Simple.getSizeInBits();
5189 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5190 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5191 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5192 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5193 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5194 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5195 DAG.getConstant(SimpleSize, DL,
5197 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5198 }
5199 }
5200
5201 // Simplify the operands using demanded-bits information.
5202 // We don't have demanded bits support for MULHU so this just enables constant
5203 // folding based on known bits.
5205 return SDValue(N, 0);
5206
5207 return SDValue();
5208}
5209
5210SDValue DAGCombiner::visitAVG(SDNode *N) {
5211 unsigned Opcode = N->getOpcode();
5212 SDValue N0 = N->getOperand(0);
5213 SDValue N1 = N->getOperand(1);
5214 EVT VT = N->getValueType(0);
5215 SDLoc DL(N);
5216 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5217
5218 // fold (avg c1, c2)
5219 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5220 return C;
5221
5222 // canonicalize constant to RHS.
5225 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5226
5227 if (VT.isVector())
5228 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5229 return FoldedVOp;
5230
5231 // fold (avg x, undef) -> x
5232 if (N0.isUndef())
5233 return N1;
5234 if (N1.isUndef())
5235 return N0;
5236
5237 // fold (avg x, x) --> x
5238 if (N0 == N1 && Level >= AfterLegalizeTypes)
5239 return N0;
5240
5241 // fold (avgfloor x, 0) -> x >> 1
5242 SDValue X, Y;
5244 return DAG.getNode(ISD::SRA, DL, VT, X,
5245 DAG.getShiftAmountConstant(1, VT, DL));
5247 return DAG.getNode(ISD::SRL, DL, VT, X,
5248 DAG.getShiftAmountConstant(1, VT, DL));
5249
5250 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5251 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5252 if (!IsSigned &&
5253 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5254 X.getValueType() == Y.getValueType() &&
5255 hasOperation(Opcode, X.getValueType())) {
5256 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5257 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5258 }
5259 if (IsSigned &&
5260 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5261 X.getValueType() == Y.getValueType() &&
5262 hasOperation(Opcode, X.getValueType())) {
5263 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5264 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5265 }
5266
5267 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5268 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5269 // Check if avgflooru isn't legal/custom but avgceilu is.
5270 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5271 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5272 if (DAG.isKnownNeverZero(N1))
5273 return DAG.getNode(
5274 ISD::AVGCEILU, DL, VT, N0,
5275 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5276 if (DAG.isKnownNeverZero(N0))
5277 return DAG.getNode(
5278 ISD::AVGCEILU, DL, VT, N1,
5279 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5280 }
5281
5282 return SDValue();
5283}
5284
5285SDValue DAGCombiner::visitABD(SDNode *N) {
5286 unsigned Opcode = N->getOpcode();
5287 SDValue N0 = N->getOperand(0);
5288 SDValue N1 = N->getOperand(1);
5289 EVT VT = N->getValueType(0);
5290 SDLoc DL(N);
5291
5292 // fold (abd c1, c2)
5293 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5294 return C;
5295
5296 // canonicalize constant to RHS.
5299 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5300
5301 if (VT.isVector())
5302 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5303 return FoldedVOp;
5304
5305 // fold (abd x, undef) -> 0
5306 if (N0.isUndef() || N1.isUndef())
5307 return DAG.getConstant(0, DL, VT);
5308
5309 SDValue X;
5310
5311 // fold (abds x, 0) -> abs x
5313 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5314 return DAG.getNode(ISD::ABS, DL, VT, X);
5315
5316 // fold (abdu x, 0) -> x
5318 return X;
5319
5320 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5321 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5322 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5323 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5324
5325 return SDValue();
5326}
5327
5328/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5329/// give the opcodes for the two computations that are being performed. Return
5330/// true if a simplification was made.
5331SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5332 unsigned HiOp) {
5333 // If the high half is not needed, just compute the low half.
5334 bool HiExists = N->hasAnyUseOfValue(1);
5335 if (!HiExists && (!LegalOperations ||
5336 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5337 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5338 return CombineTo(N, Res, Res);
5339 }
5340
5341 // If the low half is not needed, just compute the high half.
5342 bool LoExists = N->hasAnyUseOfValue(0);
5343 if (!LoExists && (!LegalOperations ||
5344 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5345 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5346 return CombineTo(N, Res, Res);
5347 }
5348
5349 // If both halves are used, return as it is.
5350 if (LoExists && HiExists)
5351 return SDValue();
5352
5353 // If the two computed results can be simplified separately, separate them.
5354 if (LoExists) {
5355 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5356 AddToWorklist(Lo.getNode());
5357 SDValue LoOpt = combine(Lo.getNode());
5358 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5359 (!LegalOperations ||
5360 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5361 return CombineTo(N, LoOpt, LoOpt);
5362 }
5363
5364 if (HiExists) {
5365 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5366 AddToWorklist(Hi.getNode());
5367 SDValue HiOpt = combine(Hi.getNode());
5368 if (HiOpt.getNode() && HiOpt != Hi &&
5369 (!LegalOperations ||
5370 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5371 return CombineTo(N, HiOpt, HiOpt);
5372 }
5373
5374 return SDValue();
5375}
5376
5377SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5378 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5379 return Res;
5380
5381 SDValue N0 = N->getOperand(0);
5382 SDValue N1 = N->getOperand(1);
5383 EVT VT = N->getValueType(0);
5384 SDLoc DL(N);
5385
5386 // Constant fold.
5387 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5388 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5389
5390 // canonicalize constant to RHS (vector doesn't have to splat)
5393 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5394
5395 // If the type is twice as wide is legal, transform the mulhu to a wider
5396 // multiply plus a shift.
5397 if (VT.isSimple() && !VT.isVector()) {
5398 MVT Simple = VT.getSimpleVT();
5399 unsigned SimpleSize = Simple.getSizeInBits();
5400 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5401 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5402 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5403 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5404 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5405 // Compute the high part as N1.
5406 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5407 DAG.getConstant(SimpleSize, DL,
5408 getShiftAmountTy(Lo.getValueType())));
5409 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5410 // Compute the low part as N0.
5411 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5412 return CombineTo(N, Lo, Hi);
5413 }
5414 }
5415
5416 return SDValue();
5417}
5418
5419SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5420 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5421 return Res;
5422
5423 SDValue N0 = N->getOperand(0);
5424 SDValue N1 = N->getOperand(1);
5425 EVT VT = N->getValueType(0);
5426 SDLoc DL(N);
5427
5428 // Constant fold.
5429 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5430 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5431
5432 // canonicalize constant to RHS (vector doesn't have to splat)
5435 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5436
5437 // (umul_lohi N0, 0) -> (0, 0)
5438 if (isNullConstant(N1)) {
5439 SDValue Zero = DAG.getConstant(0, DL, VT);
5440 return CombineTo(N, Zero, Zero);
5441 }
5442
5443 // (umul_lohi N0, 1) -> (N0, 0)
5444 if (isOneConstant(N1)) {
5445 SDValue Zero = DAG.getConstant(0, DL, VT);
5446 return CombineTo(N, N0, Zero);
5447 }
5448
5449 // If the type is twice as wide is legal, transform the mulhu to a wider
5450 // multiply plus a shift.
5451 if (VT.isSimple() && !VT.isVector()) {
5452 MVT Simple = VT.getSimpleVT();
5453 unsigned SimpleSize = Simple.getSizeInBits();
5454 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5455 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5456 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5457 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5458 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5459 // Compute the high part as N1.
5460 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5461 DAG.getConstant(SimpleSize, DL,
5462 getShiftAmountTy(Lo.getValueType())));
5463 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5464 // Compute the low part as N0.
5465 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5466 return CombineTo(N, Lo, Hi);
5467 }
5468 }
5469
5470 return SDValue();
5471}
5472
5473SDValue DAGCombiner::visitMULO(SDNode *N) {
5474 SDValue N0 = N->getOperand(0);
5475 SDValue N1 = N->getOperand(1);
5476 EVT VT = N0.getValueType();
5477 bool IsSigned = (ISD::SMULO == N->getOpcode());
5478
5479 EVT CarryVT = N->getValueType(1);
5480 SDLoc DL(N);
5481
5484
5485 // fold operation with constant operands.
5486 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5487 // multiple results.
5488 if (N0C && N1C) {
5489 bool Overflow;
5490 APInt Result =
5491 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5492 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5493 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5494 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5495 }
5496
5497 // canonicalize constant to RHS.
5500 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5501
5502 // fold (mulo x, 0) -> 0 + no carry out
5503 if (isNullOrNullSplat(N1))
5504 return CombineTo(N, DAG.getConstant(0, DL, VT),
5505 DAG.getConstant(0, DL, CarryVT));
5506
5507 // (mulo x, 2) -> (addo x, x)
5508 // FIXME: This needs a freeze.
5509 if (N1C && N1C->getAPIntValue() == 2 &&
5510 (!IsSigned || VT.getScalarSizeInBits() > 2))
5511 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5512 N->getVTList(), N0, N0);
5513
5514 // A 1 bit SMULO overflows if both inputs are 1.
5515 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5516 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5517 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5518 DAG.getConstant(0, DL, VT), ISD::SETNE);
5519 return CombineTo(N, And, Cmp);
5520 }
5521
5522 // If it cannot overflow, transform into a mul.
5523 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5524 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5525 DAG.getConstant(0, DL, CarryVT));
5526 return SDValue();
5527}
5528
5529// Function to calculate whether the Min/Max pair of SDNodes (potentially
5530// swapped around) make a signed saturate pattern, clamping to between a signed
5531// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5532// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5533// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5534// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5536 SDValue N3, ISD::CondCode CC, unsigned &BW,
5537 bool &Unsigned, SelectionDAG &DAG) {
5538 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5539 ISD::CondCode CC) {
5540 // The compare and select operand should be the same or the select operands
5541 // should be truncated versions of the comparison.
5542 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5543 return 0;
5544 // The constants need to be the same or a truncated version of each other.
5547 if (!N1C || !N3C)
5548 return 0;
5549 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5550 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5551 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5552 return 0;
5553 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5554 };
5555
5556 // Check the initial value is a SMIN/SMAX equivalent.
5557 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5558 if (!Opcode0)
5559 return SDValue();
5560
5561 // We could only need one range check, if the fptosi could never produce
5562 // the upper value.
5563 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5564 if (isNullOrNullSplat(N3)) {
5565 EVT IntVT = N0.getValueType().getScalarType();
5566 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5567 if (FPVT.isSimple()) {
5568 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5569 const fltSemantics &Semantics = InputTy->getFltSemantics();
5570 uint32_t MinBitWidth =
5571 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5572 if (IntVT.getSizeInBits() >= MinBitWidth) {
5573 Unsigned = true;
5574 BW = PowerOf2Ceil(MinBitWidth);
5575 return N0;
5576 }
5577 }
5578 }
5579 }
5580
5581 SDValue N00, N01, N02, N03;
5582 ISD::CondCode N0CC;
5583 switch (N0.getOpcode()) {
5584 case ISD::SMIN:
5585 case ISD::SMAX:
5586 N00 = N02 = N0.getOperand(0);
5587 N01 = N03 = N0.getOperand(1);
5588 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5589 break;
5590 case ISD::SELECT_CC:
5591 N00 = N0.getOperand(0);
5592 N01 = N0.getOperand(1);
5593 N02 = N0.getOperand(2);
5594 N03 = N0.getOperand(3);
5595 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5596 break;
5597 case ISD::SELECT:
5598 case ISD::VSELECT:
5599 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5600 return SDValue();
5601 N00 = N0.getOperand(0).getOperand(0);
5602 N01 = N0.getOperand(0).getOperand(1);
5603 N02 = N0.getOperand(1);
5604 N03 = N0.getOperand(2);
5605 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5606 break;
5607 default:
5608 return SDValue();
5609 }
5610
5611 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5612 if (!Opcode1 || Opcode0 == Opcode1)
5613 return SDValue();
5614
5615 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5616 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5617 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5618 return SDValue();
5619
5620 const APInt &MinC = MinCOp->getAPIntValue();
5621 const APInt &MaxC = MaxCOp->getAPIntValue();
5622 APInt MinCPlus1 = MinC + 1;
5623 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5624 BW = MinCPlus1.exactLogBase2() + 1;
5625 Unsigned = false;
5626 return N02;
5627 }
5628
5629 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5630 BW = MinCPlus1.exactLogBase2();
5631 Unsigned = true;
5632 return N02;
5633 }
5634
5635 return SDValue();
5636}
5637
5640 SelectionDAG &DAG) {
5641 unsigned BW;
5642 bool Unsigned;
5643 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5644 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5645 return SDValue();
5646 EVT FPVT = Fp.getOperand(0).getValueType();
5647 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5648 if (FPVT.isVector())
5649 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5650 FPVT.getVectorElementCount());
5651 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5652 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5653 return SDValue();
5654 SDLoc DL(Fp);
5655 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5656 DAG.getValueType(NewVT.getScalarType()));
5657 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5658}
5659
5662 SelectionDAG &DAG) {
5663 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5664 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5665 // be truncated versions of the setcc (N0/N1).
5666 if ((N0 != N2 &&
5667 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5669 return SDValue();
5672 if (!N1C || !N3C)
5673 return SDValue();
5674 const APInt &C1 = N1C->getAPIntValue();
5675 const APInt &C3 = N3C->getAPIntValue();
5676 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5677 C1 != C3.zext(C1.getBitWidth()))
5678 return SDValue();
5679
5680 unsigned BW = (C1 + 1).exactLogBase2();
5681 EVT FPVT = N0.getOperand(0).getValueType();
5682 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5683 if (FPVT.isVector())
5684 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5685 FPVT.getVectorElementCount());
5687 FPVT, NewVT))
5688 return SDValue();
5689
5690 SDValue Sat =
5691 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5692 DAG.getValueType(NewVT.getScalarType()));
5693 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5694}
5695
5696SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5697 SDValue N0 = N->getOperand(0);
5698 SDValue N1 = N->getOperand(1);
5699 EVT VT = N0.getValueType();
5700 unsigned Opcode = N->getOpcode();
5701 SDLoc DL(N);
5702
5703 // fold operation with constant operands.
5704 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5705 return C;
5706
5707 // If the operands are the same, this is a no-op.
5708 if (N0 == N1)
5709 return N0;
5710
5711 // canonicalize constant to RHS
5714 return DAG.getNode(Opcode, DL, VT, N1, N0);
5715
5716 // fold vector ops
5717 if (VT.isVector())
5718 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5719 return FoldedVOp;
5720
5721 // reassociate minmax
5722 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5723 return RMINMAX;
5724
5725 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5726 // Only do this if:
5727 // 1. The current op isn't legal and the flipped is.
5728 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5729 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5730 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5731 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5732 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5733 unsigned AltOpcode;
5734 switch (Opcode) {
5735 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5736 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5737 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5738 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5739 default: llvm_unreachable("Unknown MINMAX opcode");
5740 }
5741 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5742 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5743 }
5744
5745 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5747 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5748 return S;
5749 if (Opcode == ISD::UMIN)
5750 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5751 return S;
5752
5753 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5754 auto ReductionOpcode = [](unsigned Opcode) {
5755 switch (Opcode) {
5756 case ISD::SMIN:
5757 return ISD::VECREDUCE_SMIN;
5758 case ISD::SMAX:
5759 return ISD::VECREDUCE_SMAX;
5760 case ISD::UMIN:
5761 return ISD::VECREDUCE_UMIN;
5762 case ISD::UMAX:
5763 return ISD::VECREDUCE_UMAX;
5764 default:
5765 llvm_unreachable("Unexpected opcode");
5766 }
5767 };
5768 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5769 SDLoc(N), VT, N0, N1))
5770 return SD;
5771
5772 // Simplify the operands using demanded-bits information.
5774 return SDValue(N, 0);
5775
5776 return SDValue();
5777}
5778
5779/// If this is a bitwise logic instruction and both operands have the same
5780/// opcode, try to sink the other opcode after the logic instruction.
5781SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5782 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5783 EVT VT = N0.getValueType();
5784 unsigned LogicOpcode = N->getOpcode();
5785 unsigned HandOpcode = N0.getOpcode();
5786 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5787 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5788
5789 // Bail early if none of these transforms apply.
5790 if (N0.getNumOperands() == 0)
5791 return SDValue();
5792
5793 // FIXME: We should check number of uses of the operands to not increase
5794 // the instruction count for all transforms.
5795
5796 // Handle size-changing casts (or sign_extend_inreg).
5797 SDValue X = N0.getOperand(0);
5798 SDValue Y = N1.getOperand(0);
5799 EVT XVT = X.getValueType();
5800 SDLoc DL(N);
5801 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5802 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5803 N0.getOperand(1) == N1.getOperand(1))) {
5804 // If both operands have other uses, this transform would create extra
5805 // instructions without eliminating anything.
5806 if (!N0.hasOneUse() && !N1.hasOneUse())
5807 return SDValue();
5808 // We need matching integer source types.
5809 if (XVT != Y.getValueType())
5810 return SDValue();
5811 // Don't create an illegal op during or after legalization. Don't ever
5812 // create an unsupported vector op.
5813 if ((VT.isVector() || LegalOperations) &&
5814 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5815 return SDValue();
5816 // Avoid infinite looping with PromoteIntBinOp.
5817 // TODO: Should we apply desirable/legal constraints to all opcodes?
5818 if ((HandOpcode == ISD::ANY_EXTEND ||
5819 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5820 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5821 return SDValue();
5822 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5823 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5824 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5825 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5826 return DAG.getNode(HandOpcode, DL, VT, Logic);
5827 }
5828
5829 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5830 if (HandOpcode == ISD::TRUNCATE) {
5831 // If both operands have other uses, this transform would create extra
5832 // instructions without eliminating anything.
5833 if (!N0.hasOneUse() && !N1.hasOneUse())
5834 return SDValue();
5835 // We need matching source types.
5836 if (XVT != Y.getValueType())
5837 return SDValue();
5838 // Don't create an illegal op during or after legalization.
5839 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5840 return SDValue();
5841 // Be extra careful sinking truncate. If it's free, there's no benefit in
5842 // widening a binop. Also, don't create a logic op on an illegal type.
5843 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5844 return SDValue();
5845 if (!TLI.isTypeLegal(XVT))
5846 return SDValue();
5847 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5848 return DAG.getNode(HandOpcode, DL, VT, Logic);
5849 }
5850
5851 // For binops SHL/SRL/SRA/AND:
5852 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5853 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5854 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5855 N0.getOperand(1) == N1.getOperand(1)) {
5856 // If either operand has other uses, this transform is not an improvement.
5857 if (!N0.hasOneUse() || !N1.hasOneUse())
5858 return SDValue();
5859 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5860 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5861 }
5862
5863 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5864 if (HandOpcode == ISD::BSWAP) {
5865 // If either operand has other uses, this transform is not an improvement.
5866 if (!N0.hasOneUse() || !N1.hasOneUse())
5867 return SDValue();
5868 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5869 return DAG.getNode(HandOpcode, DL, VT, Logic);
5870 }
5871
5872 // For funnel shifts FSHL/FSHR:
5873 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5874 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5875 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5876 N0.getOperand(2) == N1.getOperand(2)) {
5877 if (!N0.hasOneUse() || !N1.hasOneUse())
5878 return SDValue();
5879 SDValue X1 = N0.getOperand(1);
5880 SDValue Y1 = N1.getOperand(1);
5881 SDValue S = N0.getOperand(2);
5882 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5883 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5884 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5885 }
5886
5887 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5888 // Only perform this optimization up until type legalization, before
5889 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5890 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5891 // we don't want to undo this promotion.
5892 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5893 // on scalars.
5894 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5895 Level <= AfterLegalizeTypes) {
5896 // Input types must be integer and the same.
5897 if (XVT.isInteger() && XVT == Y.getValueType() &&
5898 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5899 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5900 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5901 return DAG.getNode(HandOpcode, DL, VT, Logic);
5902 }
5903 }
5904
5905 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5906 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5907 // If both shuffles use the same mask, and both shuffle within a single
5908 // vector, then it is worthwhile to move the swizzle after the operation.
5909 // The type-legalizer generates this pattern when loading illegal
5910 // vector types from memory. In many cases this allows additional shuffle
5911 // optimizations.
5912 // There are other cases where moving the shuffle after the xor/and/or
5913 // is profitable even if shuffles don't perform a swizzle.
5914 // If both shuffles use the same mask, and both shuffles have the same first
5915 // or second operand, then it might still be profitable to move the shuffle
5916 // after the xor/and/or operation.
5917 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5918 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5919 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5920 assert(X.getValueType() == Y.getValueType() &&
5921 "Inputs to shuffles are not the same type");
5922
5923 // Check that both shuffles use the same mask. The masks are known to be of
5924 // the same length because the result vector type is the same.
5925 // Check also that shuffles have only one use to avoid introducing extra
5926 // instructions.
5927 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5928 !SVN0->getMask().equals(SVN1->getMask()))
5929 return SDValue();
5930
5931 // Don't try to fold this node if it requires introducing a
5932 // build vector of all zeros that might be illegal at this stage.
5933 SDValue ShOp = N0.getOperand(1);
5934 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5935 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5936
5937 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5938 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5939 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5940 N0.getOperand(0), N1.getOperand(0));
5941 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5942 }
5943
5944 // Don't try to fold this node if it requires introducing a
5945 // build vector of all zeros that might be illegal at this stage.
5946 ShOp = N0.getOperand(0);
5947 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5948 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5949
5950 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5951 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5952 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5953 N1.getOperand(1));
5954 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5955 }
5956 }
5957
5958 return SDValue();
5959}
5960
5961/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5962SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5963 const SDLoc &DL) {
5964 SDValue LL, LR, RL, RR, N0CC, N1CC;
5965 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5966 !isSetCCEquivalent(N1, RL, RR, N1CC))
5967 return SDValue();
5968
5969 assert(N0.getValueType() == N1.getValueType() &&
5970 "Unexpected operand types for bitwise logic op");
5971 assert(LL.getValueType() == LR.getValueType() &&
5972 RL.getValueType() == RR.getValueType() &&
5973 "Unexpected operand types for setcc");
5974
5975 // If we're here post-legalization or the logic op type is not i1, the logic
5976 // op type must match a setcc result type. Also, all folds require new
5977 // operations on the left and right operands, so those types must match.
5978 EVT VT = N0.getValueType();
5979 EVT OpVT = LL.getValueType();
5980 if (LegalOperations || VT.getScalarType() != MVT::i1)
5981 if (VT != getSetCCResultType(OpVT))
5982 return SDValue();
5983 if (OpVT != RL.getValueType())
5984 return SDValue();
5985
5986 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5987 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5988 bool IsInteger = OpVT.isInteger();
5989 if (LR == RR && CC0 == CC1 && IsInteger) {
5990 bool IsZero = isNullOrNullSplat(LR);
5991 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5992
5993 // All bits clear?
5994 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5995 // All sign bits clear?
5996 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5997 // Any bits set?
5998 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5999 // Any sign bits set?
6000 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6001
6002 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6003 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6004 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6005 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6006 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6007 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6008 AddToWorklist(Or.getNode());
6009 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6010 }
6011
6012 // All bits set?
6013 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6014 // All sign bits set?
6015 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6016 // Any bits clear?
6017 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6018 // Any sign bits clear?
6019 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6020
6021 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6022 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6023 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6024 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6025 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6026 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6027 AddToWorklist(And.getNode());
6028 return DAG.getSetCC(DL, VT, And, LR, CC1);
6029 }
6030 }
6031
6032 // TODO: What is the 'or' equivalent of this fold?
6033 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6034 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6035 IsInteger && CC0 == ISD::SETNE &&
6036 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6037 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6038 SDValue One = DAG.getConstant(1, DL, OpVT);
6039 SDValue Two = DAG.getConstant(2, DL, OpVT);
6040 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6041 AddToWorklist(Add.getNode());
6042 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6043 }
6044
6045 // Try more general transforms if the predicates match and the only user of
6046 // the compares is the 'and' or 'or'.
6047 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6048 N0.hasOneUse() && N1.hasOneUse()) {
6049 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6050 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6051 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6052 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6053 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6054 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6055 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6056 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6057 }
6058
6059 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6060 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6061 // Match a shared variable operand and 2 non-opaque constant operands.
6062 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6063 // The difference of the constants must be a single bit.
6064 const APInt &CMax =
6065 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6066 const APInt &CMin =
6067 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6068 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6069 };
6070 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6071 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6072 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6073 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6074 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6075 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6076 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6077 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6078 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6079 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6080 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6081 }
6082 }
6083 }
6084
6085 // Canonicalize equivalent operands to LL == RL.
6086 if (LL == RR && LR == RL) {
6088 std::swap(RL, RR);
6089 }
6090
6091 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6092 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6093 if (LL == RL && LR == RR) {
6094 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6095 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6096 if (NewCC != ISD::SETCC_INVALID &&
6097 (!LegalOperations ||
6098 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6099 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6100 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6101 }
6102
6103 return SDValue();
6104}
6105
6106static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6107 SelectionDAG &DAG) {
6108 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6109}
6110
6111static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6112 SelectionDAG &DAG) {
6113 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6114}
6115
6116static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6117 ISD::CondCode CC, unsigned OrAndOpcode,
6118 SelectionDAG &DAG,
6119 bool isFMAXNUMFMINNUM_IEEE,
6120 bool isFMAXNUMFMINNUM) {
6121 // The optimization cannot be applied for all the predicates because
6122 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6123 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6124 // applied at all if one of the operands is a signaling NaN.
6125
6126 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6127 // are non NaN values.
6128 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6129 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6130 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6131 isFMAXNUMFMINNUM_IEEE
6134 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6135 (OrAndOpcode == ISD::OR)) ||
6136 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6137 (OrAndOpcode == ISD::AND)))
6138 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6139 isFMAXNUMFMINNUM_IEEE
6142 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6143 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6144 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6145 // that there are not any sNaNs, then the optimization is not valid
6146 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6147 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6148 // we can prove that we do not have any sNaNs, then we can do the
6149 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6150 // cases.
6151 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6152 (OrAndOpcode == ISD::OR)) ||
6153 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6154 (OrAndOpcode == ISD::AND)))
6155 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6156 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6157 isFMAXNUMFMINNUM_IEEE
6160 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6161 (OrAndOpcode == ISD::OR)) ||
6162 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6163 (OrAndOpcode == ISD::AND)))
6164 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6165 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6166 isFMAXNUMFMINNUM_IEEE
6169 return ISD::DELETED_NODE;
6170}
6171
6174 assert(
6175 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6176 "Invalid Op to combine SETCC with");
6177
6178 // TODO: Search past casts/truncates.
6179 SDValue LHS = LogicOp->getOperand(0);
6180 SDValue RHS = LogicOp->getOperand(1);
6181 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6182 !LHS->hasOneUse() || !RHS->hasOneUse())
6183 return SDValue();
6184
6185 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6187 LogicOp, LHS.getNode(), RHS.getNode());
6188
6189 SDValue LHS0 = LHS->getOperand(0);
6190 SDValue RHS0 = RHS->getOperand(0);
6191 SDValue LHS1 = LHS->getOperand(1);
6192 SDValue RHS1 = RHS->getOperand(1);
6193 // TODO: We don't actually need a splat here, for vectors we just need the
6194 // invariants to hold for each element.
6195 auto *LHS1C = isConstOrConstSplat(LHS1);
6196 auto *RHS1C = isConstOrConstSplat(RHS1);
6197 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6198 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6199 EVT VT = LogicOp->getValueType(0);
6200 EVT OpVT = LHS0.getValueType();
6201 SDLoc DL(LogicOp);
6202
6203 // Check if the operands of an and/or operation are comparisons and if they
6204 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6205 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6206 // sequence will be replaced with min-cmp sequence:
6207 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6208 // and and-cmp-cmp will be replaced with max-cmp sequence:
6209 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6210 // The optimization does not work for `==` or `!=` .
6211 // The two comparisons should have either the same predicate or the
6212 // predicate of one of the comparisons is the opposite of the other one.
6213 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6215 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6217 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6218 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6219 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6220 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6221 (OpVT.isFloatingPoint() &&
6222 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6224 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6225 CCL != ISD::SETTRUE &&
6226 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6227
6228 SDValue CommonValue, Operand1, Operand2;
6230 if (CCL == CCR) {
6231 if (LHS0 == RHS0) {
6232 CommonValue = LHS0;
6233 Operand1 = LHS1;
6234 Operand2 = RHS1;
6236 } else if (LHS1 == RHS1) {
6237 CommonValue = LHS1;
6238 Operand1 = LHS0;
6239 Operand2 = RHS0;
6240 CC = CCL;
6241 }
6242 } else {
6243 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6244 if (LHS0 == RHS1) {
6245 CommonValue = LHS0;
6246 Operand1 = LHS1;
6247 Operand2 = RHS0;
6248 CC = CCR;
6249 } else if (RHS0 == LHS1) {
6250 CommonValue = LHS1;
6251 Operand1 = LHS0;
6252 Operand2 = RHS1;
6253 CC = CCL;
6254 }
6255 }
6256
6257 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6258 // handle it using OR/AND.
6259 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6261 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6263
6264 if (CC != ISD::SETCC_INVALID) {
6265 unsigned NewOpcode = ISD::DELETED_NODE;
6266 bool IsSigned = isSignedIntSetCC(CC);
6267 if (OpVT.isInteger()) {
6268 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6269 CC == ISD::SETLT || CC == ISD::SETULT);
6270 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6271 if (IsLess == IsOr)
6272 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6273 else
6274 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6275 } else if (OpVT.isFloatingPoint())
6276 NewOpcode =
6277 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6278 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6279
6280 if (NewOpcode != ISD::DELETED_NODE) {
6281 SDValue MinMaxValue =
6282 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6283 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6284 }
6285 }
6286 }
6287
6288 if (TargetPreference == AndOrSETCCFoldKind::None)
6289 return SDValue();
6290
6291 if (CCL == CCR &&
6292 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6293 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6294 const APInt &APLhs = LHS1C->getAPIntValue();
6295 const APInt &APRhs = RHS1C->getAPIntValue();
6296
6297 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6298 // case this is just a compare).
6299 if (APLhs == (-APRhs) &&
6300 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6301 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6302 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6303 // (icmp eq A, C) | (icmp eq A, -C)
6304 // -> (icmp eq Abs(A), C)
6305 // (icmp ne A, C) & (icmp ne A, -C)
6306 // -> (icmp ne Abs(A), C)
6307 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6308 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6309 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6310 } else if (TargetPreference &
6312
6313 // AndOrSETCCFoldKind::AddAnd:
6314 // A == C0 | A == C1
6315 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6316 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6317 // A != C0 & A != C1
6318 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6319 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6320
6321 // AndOrSETCCFoldKind::NotAnd:
6322 // A == C0 | A == C1
6323 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6324 // -> ~A & smin(C0, C1) == 0
6325 // A != C0 & A != C1
6326 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6327 // -> ~A & smin(C0, C1) != 0
6328
6329 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6330 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6331 APInt Dif = MaxC - MinC;
6332 if (!Dif.isZero() && Dif.isPowerOf2()) {
6333 if (MaxC.isAllOnes() &&
6334 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6335 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6336 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6337 DAG.getConstant(MinC, DL, OpVT));
6338 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6339 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6340 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6341
6342 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6343 DAG.getConstant(-MinC, DL, OpVT));
6344 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6345 DAG.getConstant(~Dif, DL, OpVT));
6346 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6347 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6348 }
6349 }
6350 }
6351 }
6352
6353 return SDValue();
6354}
6355
6356// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6357// We canonicalize to the `select` form in the middle end, but the `and` form
6358// gets better codegen and all tested targets (arm, x86, riscv)
6360 const SDLoc &DL, SelectionDAG &DAG) {
6361 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6362 if (!isNullConstant(F))
6363 return SDValue();
6364
6365 EVT CondVT = Cond.getValueType();
6366 if (TLI.getBooleanContents(CondVT) !=
6368 return SDValue();
6369
6370 if (T.getOpcode() != ISD::AND)
6371 return SDValue();
6372
6373 if (!isOneConstant(T.getOperand(1)))
6374 return SDValue();
6375
6376 EVT OpVT = T.getValueType();
6377
6378 SDValue CondMask =
6379 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6380 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6381}
6382
6383/// This contains all DAGCombine rules which reduce two values combined by
6384/// an And operation to a single value. This makes them reusable in the context
6385/// of visitSELECT(). Rules involving constants are not included as
6386/// visitSELECT() already handles those cases.
6387SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6388 EVT VT = N1.getValueType();
6389 SDLoc DL(N);
6390
6391 // fold (and x, undef) -> 0
6392 if (N0.isUndef() || N1.isUndef())
6393 return DAG.getConstant(0, DL, VT);
6394
6395 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6396 return V;
6397
6398 // Canonicalize:
6399 // and(x, add) -> and(add, x)
6400 if (N1.getOpcode() == ISD::ADD)
6401 std::swap(N0, N1);
6402
6403 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6404 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6405 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6406 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6407 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6408 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6409 // immediate for an add, but it is legal if its top c2 bits are set,
6410 // transform the ADD so the immediate doesn't need to be materialized
6411 // in a register.
6412 APInt ADDC = ADDI->getAPIntValue();
6413 APInt SRLC = SRLI->getAPIntValue();
6414 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6415 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6417 SRLC.getZExtValue());
6418 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6419 ADDC |= Mask;
6420 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6421 SDLoc DL0(N0);
6422 SDValue NewAdd =
6423 DAG.getNode(ISD::ADD, DL0, VT,
6424 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6425 CombineTo(N0.getNode(), NewAdd);
6426 // Return N so it doesn't get rechecked!
6427 return SDValue(N, 0);
6428 }
6429 }
6430 }
6431 }
6432 }
6433 }
6434
6435 return SDValue();
6436}
6437
6438bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6439 EVT LoadResultTy, EVT &ExtVT) {
6440 if (!AndC->getAPIntValue().isMask())
6441 return false;
6442
6443 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6444
6445 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6446 EVT LoadedVT = LoadN->getMemoryVT();
6447
6448 if (ExtVT == LoadedVT &&
6449 (!LegalOperations ||
6450 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6451 // ZEXTLOAD will match without needing to change the size of the value being
6452 // loaded.
6453 return true;
6454 }
6455
6456 // Do not change the width of a volatile or atomic loads.
6457 if (!LoadN->isSimple())
6458 return false;
6459
6460 // Do not generate loads of non-round integer types since these can
6461 // be expensive (and would be wrong if the type is not byte sized).
6462 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6463 return false;
6464
6465 if (LegalOperations &&
6466 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6467 return false;
6468
6469 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6470 return false;
6471
6472 return true;
6473}
6474
6475bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6476 ISD::LoadExtType ExtType, EVT &MemVT,
6477 unsigned ShAmt) {
6478 if (!LDST)
6479 return false;
6480 // Only allow byte offsets.
6481 if (ShAmt % 8)
6482 return false;
6483
6484 // Do not generate loads of non-round integer types since these can
6485 // be expensive (and would be wrong if the type is not byte sized).
6486 if (!MemVT.isRound())
6487 return false;
6488
6489 // Don't change the width of a volatile or atomic loads.
6490 if (!LDST->isSimple())
6491 return false;
6492
6493 EVT LdStMemVT = LDST->getMemoryVT();
6494
6495 // Bail out when changing the scalable property, since we can't be sure that
6496 // we're actually narrowing here.
6497 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6498 return false;
6499
6500 // Verify that we are actually reducing a load width here.
6501 if (LdStMemVT.bitsLT(MemVT))
6502 return false;
6503
6504 // Ensure that this isn't going to produce an unsupported memory access.
6505 if (ShAmt) {
6506 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6507 const unsigned ByteShAmt = ShAmt / 8;
6508 const Align LDSTAlign = LDST->getAlign();
6509 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6510 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6511 LDST->getAddressSpace(), NarrowAlign,
6512 LDST->getMemOperand()->getFlags()))
6513 return false;
6514 }
6515
6516 // It's not possible to generate a constant of extended or untyped type.
6517 EVT PtrType = LDST->getBasePtr().getValueType();
6518 if (PtrType == MVT::Untyped || PtrType.isExtended())
6519 return false;
6520
6521 if (isa<LoadSDNode>(LDST)) {
6522 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6523 // Don't transform one with multiple uses, this would require adding a new
6524 // load.
6525 if (!SDValue(Load, 0).hasOneUse())
6526 return false;
6527
6528 if (LegalOperations &&
6529 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6530 return false;
6531
6532 // For the transform to be legal, the load must produce only two values
6533 // (the value loaded and the chain). Don't transform a pre-increment
6534 // load, for example, which produces an extra value. Otherwise the
6535 // transformation is not equivalent, and the downstream logic to replace
6536 // uses gets things wrong.
6537 if (Load->getNumValues() > 2)
6538 return false;
6539
6540 // If the load that we're shrinking is an extload and we're not just
6541 // discarding the extension we can't simply shrink the load. Bail.
6542 // TODO: It would be possible to merge the extensions in some cases.
6543 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6544 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6545 return false;
6546
6547 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6548 return false;
6549 } else {
6550 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6551 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6552 // Can't write outside the original store
6553 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6554 return false;
6555
6556 if (LegalOperations &&
6557 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6558 return false;
6559 }
6560 return true;
6561}
6562
6563bool DAGCombiner::SearchForAndLoads(SDNode *N,
6565 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6566 ConstantSDNode *Mask,
6567 SDNode *&NodeToMask) {
6568 // Recursively search for the operands, looking for loads which can be
6569 // narrowed.
6570 for (SDValue Op : N->op_values()) {
6571 if (Op.getValueType().isVector())
6572 return false;
6573
6574 // Some constants may need fixing up later if they are too large.
6575 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6576 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6577 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6578 NodesWithConsts.insert(N);
6579 continue;
6580 }
6581
6582 if (!Op.hasOneUse())
6583 return false;
6584
6585 switch(Op.getOpcode()) {
6586 case ISD::LOAD: {
6587 auto *Load = cast<LoadSDNode>(Op);
6588 EVT ExtVT;
6589 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6590 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6591
6592 // ZEXTLOAD is already small enough.
6593 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6594 ExtVT.bitsGE(Load->getMemoryVT()))
6595 continue;
6596
6597 // Use LE to convert equal sized loads to zext.
6598 if (ExtVT.bitsLE(Load->getMemoryVT()))
6599 Loads.push_back(Load);
6600
6601 continue;
6602 }
6603 return false;
6604 }
6605 case ISD::ZERO_EXTEND:
6606 case ISD::AssertZext: {
6607 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6608 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6609 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6610 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6611 Op.getOperand(0).getValueType();
6612
6613 // We can accept extending nodes if the mask is wider or an equal
6614 // width to the original type.
6615 if (ExtVT.bitsGE(VT))
6616 continue;
6617 break;
6618 }
6619 case ISD::OR:
6620 case ISD::XOR:
6621 case ISD::AND:
6622 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6623 NodeToMask))
6624 return false;
6625 continue;
6626 }
6627
6628 // Allow one node which will masked along with any loads found.
6629 if (NodeToMask)
6630 return false;
6631
6632 // Also ensure that the node to be masked only produces one data result.
6633 NodeToMask = Op.getNode();
6634 if (NodeToMask->getNumValues() > 1) {
6635 bool HasValue = false;
6636 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6637 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6638 if (VT != MVT::Glue && VT != MVT::Other) {
6639 if (HasValue) {
6640 NodeToMask = nullptr;
6641 return false;
6642 }
6643 HasValue = true;
6644 }
6645 }
6646 assert(HasValue && "Node to be masked has no data result?");
6647 }
6648 }
6649 return true;
6650}
6651
6652bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6653 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6654 if (!Mask)
6655 return false;
6656
6657 if (!Mask->getAPIntValue().isMask())
6658 return false;
6659
6660 // No need to do anything if the and directly uses a load.
6661 if (isa<LoadSDNode>(N->getOperand(0)))
6662 return false;
6663
6665 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6666 SDNode *FixupNode = nullptr;
6667 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6668 if (Loads.empty())
6669 return false;
6670
6671 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6672 SDValue MaskOp = N->getOperand(1);
6673
6674 // If it exists, fixup the single node we allow in the tree that needs
6675 // masking.
6676 if (FixupNode) {
6677 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6678 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6679 FixupNode->getValueType(0),
6680 SDValue(FixupNode, 0), MaskOp);
6681 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6682 if (And.getOpcode() == ISD ::AND)
6683 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6684 }
6685
6686 // Narrow any constants that need it.
6687 for (auto *LogicN : NodesWithConsts) {
6688 SDValue Op0 = LogicN->getOperand(0);
6689 SDValue Op1 = LogicN->getOperand(1);
6690
6691 if (isa<ConstantSDNode>(Op0))
6692 Op0 =
6693 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6694
6695 if (isa<ConstantSDNode>(Op1))
6696 Op1 =
6697 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6698
6699 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6700 std::swap(Op0, Op1);
6701
6702 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6703 }
6704
6705 // Create narrow loads.
6706 for (auto *Load : Loads) {
6707 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6708 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6709 SDValue(Load, 0), MaskOp);
6710 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6711 if (And.getOpcode() == ISD ::AND)
6712 And = SDValue(
6713 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6714 SDValue NewLoad = reduceLoadWidth(And.getNode());
6715 assert(NewLoad &&
6716 "Shouldn't be masking the load if it can't be narrowed");
6717 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6718 }
6719 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6720 return true;
6721 }
6722 return false;
6723}
6724
6725// Unfold
6726// x & (-1 'logical shift' y)
6727// To
6728// (x 'opposite logical shift' y) 'logical shift' y
6729// if it is better for performance.
6730SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6731 assert(N->getOpcode() == ISD::AND);
6732
6733 SDValue N0 = N->getOperand(0);
6734 SDValue N1 = N->getOperand(1);
6735
6736 // Do we actually prefer shifts over mask?
6738 return SDValue();
6739
6740 // Try to match (-1 '[outer] logical shift' y)
6741 unsigned OuterShift;
6742 unsigned InnerShift; // The opposite direction to the OuterShift.
6743 SDValue Y; // Shift amount.
6744 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6745 if (!M.hasOneUse())
6746 return false;
6747 OuterShift = M->getOpcode();
6748 if (OuterShift == ISD::SHL)
6749 InnerShift = ISD::SRL;
6750 else if (OuterShift == ISD::SRL)
6751 InnerShift = ISD::SHL;
6752 else
6753 return false;
6754 if (!isAllOnesConstant(M->getOperand(0)))
6755 return false;
6756 Y = M->getOperand(1);
6757 return true;
6758 };
6759
6760 SDValue X;
6761 if (matchMask(N1))
6762 X = N0;
6763 else if (matchMask(N0))
6764 X = N1;
6765 else
6766 return SDValue();
6767
6768 SDLoc DL(N);
6769 EVT VT = N->getValueType(0);
6770
6771 // tmp = x 'opposite logical shift' y
6772 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6773 // ret = tmp 'logical shift' y
6774 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6775
6776 return T1;
6777}
6778
6779/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6780/// For a target with a bit test, this is expected to become test + set and save
6781/// at least 1 instruction.
6783 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6784
6785 // Look through an optional extension.
6786 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6787 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6788 And0 = And0.getOperand(0);
6789 if (!isOneConstant(And1) || !And0.hasOneUse())
6790 return SDValue();
6791
6792 SDValue Src = And0;
6793
6794 // Attempt to find a 'not' op.
6795 // TODO: Should we favor test+set even without the 'not' op?
6796 bool FoundNot = false;
6797 if (isBitwiseNot(Src)) {
6798 FoundNot = true;
6799 Src = Src.getOperand(0);
6800
6801 // Look though an optional truncation. The source operand may not be the
6802 // same type as the original 'and', but that is ok because we are masking
6803 // off everything but the low bit.
6804 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6805 Src = Src.getOperand(0);
6806 }
6807
6808 // Match a shift-right by constant.
6809 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6810 return SDValue();
6811
6812 // This is probably not worthwhile without a supported type.
6813 EVT SrcVT = Src.getValueType();
6814 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6815 if (!TLI.isTypeLegal(SrcVT))
6816 return SDValue();
6817
6818 // We might have looked through casts that make this transform invalid.
6819 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6820 SDValue ShiftAmt = Src.getOperand(1);
6821 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6822 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6823 return SDValue();
6824
6825 // Set source to shift source.
6826 Src = Src.getOperand(0);
6827
6828 // Try again to find a 'not' op.
6829 // TODO: Should we favor test+set even with two 'not' ops?
6830 if (!FoundNot) {
6831 if (!isBitwiseNot(Src))
6832 return SDValue();
6833 Src = Src.getOperand(0);
6834 }
6835
6836 if (!TLI.hasBitTest(Src, ShiftAmt))
6837 return SDValue();
6838
6839 // Turn this into a bit-test pattern using mask op + setcc:
6840 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6841 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6842 SDLoc DL(And);
6843 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6844 EVT CCVT =
6845 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6846 SDValue Mask = DAG.getConstant(
6847 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6848 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6849 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6850 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6851 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6852}
6853
6854/// For targets that support usubsat, match a bit-hack form of that operation
6855/// that ends in 'and' and convert it.
6857 EVT VT = N->getValueType(0);
6858 unsigned BitWidth = VT.getScalarSizeInBits();
6859 APInt SignMask = APInt::getSignMask(BitWidth);
6860
6861 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6862 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6863 // xor/add with SMIN (signmask) are logically equivalent.
6864 SDValue X;
6865 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6867 m_SpecificInt(BitWidth - 1))))) &&
6870 m_SpecificInt(BitWidth - 1))))))
6871 return SDValue();
6872
6873 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6874 DAG.getConstant(SignMask, DL, VT));
6875}
6876
6877/// Given a bitwise logic operation N with a matching bitwise logic operand,
6878/// fold a pattern where 2 of the source operands are identically shifted
6879/// values. For example:
6880/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6882 SelectionDAG &DAG) {
6883 unsigned LogicOpcode = N->getOpcode();
6884 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6885 "Expected bitwise logic operation");
6886
6887 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6888 return SDValue();
6889
6890 // Match another bitwise logic op and a shift.
6891 unsigned ShiftOpcode = ShiftOp.getOpcode();
6892 if (LogicOp.getOpcode() != LogicOpcode ||
6893 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6894 ShiftOpcode == ISD::SRA))
6895 return SDValue();
6896
6897 // Match another shift op inside the first logic operand. Handle both commuted
6898 // possibilities.
6899 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6900 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6901 SDValue X1 = ShiftOp.getOperand(0);
6902 SDValue Y = ShiftOp.getOperand(1);
6903 SDValue X0, Z;
6904 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6905 LogicOp.getOperand(0).getOperand(1) == Y) {
6906 X0 = LogicOp.getOperand(0).getOperand(0);
6907 Z = LogicOp.getOperand(1);
6908 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6909 LogicOp.getOperand(1).getOperand(1) == Y) {
6910 X0 = LogicOp.getOperand(1).getOperand(0);
6911 Z = LogicOp.getOperand(0);
6912 } else {
6913 return SDValue();
6914 }
6915
6916 EVT VT = N->getValueType(0);
6917 SDLoc DL(N);
6918 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6919 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6920 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6921}
6922
6923/// Given a tree of logic operations with shape like
6924/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6925/// try to match and fold shift operations with the same shift amount.
6926/// For example:
6927/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6928/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6930 SDValue RightHand, SelectionDAG &DAG) {
6931 unsigned LogicOpcode = N->getOpcode();
6932 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6933 "Expected bitwise logic operation");
6934 if (LeftHand.getOpcode() != LogicOpcode ||
6935 RightHand.getOpcode() != LogicOpcode)
6936 return SDValue();
6937 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6938 return SDValue();
6939
6940 // Try to match one of following patterns:
6941 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6942 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6943 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6944 // itself.
6945 SDValue CombinedShifts, W;
6946 SDValue R0 = RightHand.getOperand(0);
6947 SDValue R1 = RightHand.getOperand(1);
6948 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6949 W = R1;
6950 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6951 W = R0;
6952 else
6953 return SDValue();
6954
6955 EVT VT = N->getValueType(0);
6956 SDLoc DL(N);
6957 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6958}
6959
6960SDValue DAGCombiner::visitAND(SDNode *N) {
6961 SDValue N0 = N->getOperand(0);
6962 SDValue N1 = N->getOperand(1);
6963 EVT VT = N1.getValueType();
6964 SDLoc DL(N);
6965
6966 // x & x --> x
6967 if (N0 == N1)
6968 return N0;
6969
6970 // fold (and c1, c2) -> c1&c2
6971 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6972 return C;
6973
6974 // canonicalize constant to RHS
6977 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6978
6979 if (areBitwiseNotOfEachother(N0, N1))
6980 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6981
6982 // fold vector ops
6983 if (VT.isVector()) {
6984 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6985 return FoldedVOp;
6986
6987 // fold (and x, 0) -> 0, vector edition
6989 // do not return N1, because undef node may exist in N1
6991 N1.getValueType());
6992
6993 // fold (and x, -1) -> x, vector edition
6995 return N0;
6996
6997 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6998 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6999 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7000 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
7001 N1.hasOneUse()) {
7002 EVT LoadVT = MLoad->getMemoryVT();
7003 EVT ExtVT = VT;
7004 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7005 // For this AND to be a zero extension of the masked load the elements
7006 // of the BuildVec must mask the bottom bits of the extended element
7007 // type
7008 uint64_t ElementSize =
7010 if (Splat->getAPIntValue().isMask(ElementSize)) {
7011 SDValue NewLoad = DAG.getMaskedLoad(
7012 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7013 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7014 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7015 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7016 bool LoadHasOtherUsers = !N0.hasOneUse();
7017 CombineTo(N, NewLoad);
7018 if (LoadHasOtherUsers)
7019 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7020 return SDValue(N, 0);
7021 }
7022 }
7023 }
7024 }
7025
7026 // fold (and x, -1) -> x
7027 if (isAllOnesConstant(N1))
7028 return N0;
7029
7030 // if (and x, c) is known to be zero, return 0
7031 unsigned BitWidth = VT.getScalarSizeInBits();
7034 return DAG.getConstant(0, DL, VT);
7035
7036 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7037 return R;
7038
7039 if (SDValue NewSel = foldBinOpIntoSelect(N))
7040 return NewSel;
7041
7042 // reassociate and
7043 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7044 return RAND;
7045
7046 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7047 if (SDValue SD =
7048 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7049 return SD;
7050
7051 // fold (and (or x, C), D) -> D if (C & D) == D
7052 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7053 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7054 };
7055 if (N0.getOpcode() == ISD::OR &&
7056 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7057 return N1;
7058
7059 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7060 SDValue N0Op0 = N0.getOperand(0);
7061 EVT SrcVT = N0Op0.getValueType();
7062 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7063 APInt Mask = ~N1C->getAPIntValue();
7064 Mask = Mask.trunc(SrcBitWidth);
7065
7066 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7067 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7068 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7069
7070 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7071 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7072 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7073 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7074 TLI.isNarrowingProfitable(VT, SrcVT))
7075 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7076 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7077 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7078 }
7079
7080 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7081 if (ISD::isExtOpcode(N0.getOpcode())) {
7082 unsigned ExtOpc = N0.getOpcode();
7083 SDValue N0Op0 = N0.getOperand(0);
7084 if (N0Op0.getOpcode() == ISD::AND &&
7085 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7088 N0->hasOneUse() && N0Op0->hasOneUse()) {
7089 SDValue NewMask =
7090 DAG.getNode(ISD::AND, DL, VT, N1,
7091 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
7092 return DAG.getNode(ISD::AND, DL, VT,
7093 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7094 NewMask);
7095 }
7096 }
7097
7098 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7099 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7100 // already be zero by virtue of the width of the base type of the load.
7101 //
7102 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7103 // more cases.
7104 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7106 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7107 N0.getOperand(0).getResNo() == 0) ||
7108 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7109 auto *Load =
7110 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7111
7112 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7113 // This can be a pure constant or a vector splat, in which case we treat the
7114 // vector as a scalar and use the splat value.
7117 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7118 Constant = C->getAPIntValue();
7119 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7120 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7121 APInt SplatValue, SplatUndef;
7122 unsigned SplatBitSize;
7123 bool HasAnyUndefs;
7124 // Endianness should not matter here. Code below makes sure that we only
7125 // use the result if the SplatBitSize is a multiple of the vector element
7126 // size. And after that we AND all element sized parts of the splat
7127 // together. So the end result should be the same regardless of in which
7128 // order we do those operations.
7129 const bool IsBigEndian = false;
7130 bool IsSplat =
7131 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7132 HasAnyUndefs, EltBitWidth, IsBigEndian);
7133
7134 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7135 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7136 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7137 // Undef bits can contribute to a possible optimisation if set, so
7138 // set them.
7139 SplatValue |= SplatUndef;
7140
7141 // The splat value may be something like "0x00FFFFFF", which means 0 for
7142 // the first vector value and FF for the rest, repeating. We need a mask
7143 // that will apply equally to all members of the vector, so AND all the
7144 // lanes of the constant together.
7145 Constant = APInt::getAllOnes(EltBitWidth);
7146 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7147 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7148 }
7149 }
7150
7151 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7152 // actually legal and isn't going to get expanded, else this is a false
7153 // optimisation.
7154 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7155 Load->getValueType(0),
7156 Load->getMemoryVT());
7157
7158 // Resize the constant to the same size as the original memory access before
7159 // extension. If it is still the AllOnesValue then this AND is completely
7160 // unneeded.
7161 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7162
7163 bool B;
7164 switch (Load->getExtensionType()) {
7165 default: B = false; break;
7166 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7167 case ISD::ZEXTLOAD:
7168 case ISD::NON_EXTLOAD: B = true; break;
7169 }
7170
7171 if (B && Constant.isAllOnes()) {
7172 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7173 // preserve semantics once we get rid of the AND.
7174 SDValue NewLoad(Load, 0);
7175
7176 // Fold the AND away. NewLoad may get replaced immediately.
7177 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7178
7179 if (Load->getExtensionType() == ISD::EXTLOAD) {
7180 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7181 Load->getValueType(0), SDLoc(Load),
7182 Load->getChain(), Load->getBasePtr(),
7183 Load->getOffset(), Load->getMemoryVT(),
7184 Load->getMemOperand());
7185 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7186 if (Load->getNumValues() == 3) {
7187 // PRE/POST_INC loads have 3 values.
7188 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7189 NewLoad.getValue(2) };
7190 CombineTo(Load, To, 3, true);
7191 } else {
7192 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7193 }
7194 }
7195
7196 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7197 }
7198 }
7199
7200 // Try to convert a constant mask AND into a shuffle clear mask.
7201 if (VT.isVector())
7202 if (SDValue Shuffle = XformToShuffleWithZero(N))
7203 return Shuffle;
7204
7205 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7206 return Combined;
7207
7208 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7210 SDValue Ext = N0.getOperand(0);
7211 EVT ExtVT = Ext->getValueType(0);
7212 SDValue Extendee = Ext->getOperand(0);
7213
7214 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7215 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7216 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7217 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7218 // => (extract_subvector (iN_zeroext v))
7219 SDValue ZeroExtExtendee =
7220 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7221
7222 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7223 N0.getOperand(1));
7224 }
7225 }
7226
7227 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7228 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7229 EVT MemVT = GN0->getMemoryVT();
7230 EVT ScalarVT = MemVT.getScalarType();
7231
7232 if (SDValue(GN0, 0).hasOneUse() &&
7233 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7235 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7236 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7237
7238 SDValue ZExtLoad = DAG.getMaskedGather(
7239 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7240 GN0->getIndexType(), ISD::ZEXTLOAD);
7241
7242 CombineTo(N, ZExtLoad);
7243 AddToWorklist(ZExtLoad.getNode());
7244 // Avoid recheck of N.
7245 return SDValue(N, 0);
7246 }
7247 }
7248
7249 // fold (and (load x), 255) -> (zextload x, i8)
7250 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7251 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7252 if (SDValue Res = reduceLoadWidth(N))
7253 return Res;
7254
7255 if (LegalTypes) {
7256 // Attempt to propagate the AND back up to the leaves which, if they're
7257 // loads, can be combined to narrow loads and the AND node can be removed.
7258 // Perform after legalization so that extend nodes will already be
7259 // combined into the loads.
7260 if (BackwardsPropagateMask(N))
7261 return SDValue(N, 0);
7262 }
7263
7264 if (SDValue Combined = visitANDLike(N0, N1, N))
7265 return Combined;
7266
7267 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7268 if (N0.getOpcode() == N1.getOpcode())
7269 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7270 return V;
7271
7272 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7273 return R;
7274 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7275 return R;
7276
7277 // Masking the negated extension of a boolean is just the zero-extended
7278 // boolean:
7279 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7280 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7281 //
7282 // Note: the SimplifyDemandedBits fold below can make an information-losing
7283 // transform, and then we have no way to find this better fold.
7284 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7285 if (isNullOrNullSplat(N0.getOperand(0))) {
7286 SDValue SubRHS = N0.getOperand(1);
7287 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7288 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7289 return SubRHS;
7290 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7291 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7292 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7293 }
7294 }
7295
7296 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7297 // fold (and (sra)) -> (and (srl)) when possible.
7299 return SDValue(N, 0);
7300
7301 // fold (zext_inreg (extload x)) -> (zextload x)
7302 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7303 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7304 (ISD::isEXTLoad(N0.getNode()) ||
7305 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7306 auto *LN0 = cast<LoadSDNode>(N0);
7307 EVT MemVT = LN0->getMemoryVT();
7308 // If we zero all the possible extended bits, then we can turn this into
7309 // a zextload if we are running before legalize or the operation is legal.
7310 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7311 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7312 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7313 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7314 ((!LegalOperations && LN0->isSimple()) ||
7315 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7316 SDValue ExtLoad =
7317 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7318 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7319 AddToWorklist(N);
7320 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7321 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7322 }
7323 }
7324
7325 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7326 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7327 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7328 N0.getOperand(1), false))
7329 return BSwap;
7330 }
7331
7332 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7333 return Shifts;
7334
7335 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7336 return V;
7337
7338 // Recognize the following pattern:
7339 //
7340 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7341 //
7342 // where bitmask is a mask that clears the upper bits of AndVT. The
7343 // number of bits in bitmask must be a power of two.
7344 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7345 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7346 return false;
7347
7348 auto *C = dyn_cast<ConstantSDNode>(RHS);
7349 if (!C)
7350 return false;
7351
7352 if (!C->getAPIntValue().isMask(
7353 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7354 return false;
7355
7356 return true;
7357 };
7358
7359 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7360 if (IsAndZeroExtMask(N0, N1))
7361 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7362
7363 if (hasOperation(ISD::USUBSAT, VT))
7364 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7365 return V;
7366
7367 // Postpone until legalization completed to avoid interference with bswap
7368 // folding
7369 if (LegalOperations || VT.isVector())
7370 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7371 return R;
7372
7373 return SDValue();
7374}
7375
7376/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7377SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7378 bool DemandHighBits) {
7379 if (!LegalOperations)
7380 return SDValue();
7381
7382 EVT VT = N->getValueType(0);
7383 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7384 return SDValue();
7386 return SDValue();
7387
7388 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7389 bool LookPassAnd0 = false;
7390 bool LookPassAnd1 = false;
7391 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7392 std::swap(N0, N1);
7393 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7394 std::swap(N0, N1);
7395 if (N0.getOpcode() == ISD::AND) {
7396 if (!N0->hasOneUse())
7397 return SDValue();
7398 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7399 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7400 // This is needed for X86.
7401 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7402 N01C->getZExtValue() != 0xFFFF))
7403 return SDValue();
7404 N0 = N0.getOperand(0);
7405 LookPassAnd0 = true;
7406 }
7407
7408 if (N1.getOpcode() == ISD::AND) {
7409 if (!N1->hasOneUse())
7410 return SDValue();
7411 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7412 if (!N11C || N11C->getZExtValue() != 0xFF)
7413 return SDValue();
7414 N1 = N1.getOperand(0);
7415 LookPassAnd1 = true;
7416 }
7417
7418 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7419 std::swap(N0, N1);
7420 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7421 return SDValue();
7422 if (!N0->hasOneUse() || !N1->hasOneUse())
7423 return SDValue();
7424
7425 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7426 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7427 if (!N01C || !N11C)
7428 return SDValue();
7429 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7430 return SDValue();
7431
7432 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7433 SDValue N00 = N0->getOperand(0);
7434 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7435 if (!N00->hasOneUse())
7436 return SDValue();
7437 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7438 if (!N001C || N001C->getZExtValue() != 0xFF)
7439 return SDValue();
7440 N00 = N00.getOperand(0);
7441 LookPassAnd0 = true;
7442 }
7443
7444 SDValue N10 = N1->getOperand(0);
7445 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7446 if (!N10->hasOneUse())
7447 return SDValue();
7448 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7449 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7450 // for X86.
7451 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7452 N101C->getZExtValue() != 0xFFFF))
7453 return SDValue();
7454 N10 = N10.getOperand(0);
7455 LookPassAnd1 = true;
7456 }
7457
7458 if (N00 != N10)
7459 return SDValue();
7460
7461 // Make sure everything beyond the low halfword gets set to zero since the SRL
7462 // 16 will clear the top bits.
7463 unsigned OpSizeInBits = VT.getSizeInBits();
7464 if (OpSizeInBits > 16) {
7465 // If the left-shift isn't masked out then the only way this is a bswap is
7466 // if all bits beyond the low 8 are 0. In that case the entire pattern
7467 // reduces to a left shift anyway: leave it for other parts of the combiner.
7468 if (DemandHighBits && !LookPassAnd0)
7469 return SDValue();
7470
7471 // However, if the right shift isn't masked out then it might be because
7472 // it's not needed. See if we can spot that too. If the high bits aren't
7473 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7474 // upper bits to be zero.
7475 if (!LookPassAnd1) {
7476 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7477 if (!DAG.MaskedValueIsZero(N10,
7478 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7479 return SDValue();
7480 }
7481 }
7482
7483 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7484 if (OpSizeInBits > 16) {
7485 SDLoc DL(N);
7486 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7487 DAG.getConstant(OpSizeInBits - 16, DL,
7488 getShiftAmountTy(VT)));
7489 }
7490 return Res;
7491}
7492
7493/// Return true if the specified node is an element that makes up a 32-bit
7494/// packed halfword byteswap.
7495/// ((x & 0x000000ff) << 8) |
7496/// ((x & 0x0000ff00) >> 8) |
7497/// ((x & 0x00ff0000) << 8) |
7498/// ((x & 0xff000000) >> 8)
7500 if (!N->hasOneUse())
7501 return false;
7502
7503 unsigned Opc = N.getOpcode();
7504 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7505 return false;
7506
7507 SDValue N0 = N.getOperand(0);
7508 unsigned Opc0 = N0.getOpcode();
7509 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7510 return false;
7511
7512 ConstantSDNode *N1C = nullptr;
7513 // SHL or SRL: look upstream for AND mask operand
7514 if (Opc == ISD::AND)
7515 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7516 else if (Opc0 == ISD::AND)
7517 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7518 if (!N1C)
7519 return false;
7520
7521 unsigned MaskByteOffset;
7522 switch (N1C->getZExtValue()) {
7523 default:
7524 return false;
7525 case 0xFF: MaskByteOffset = 0; break;
7526 case 0xFF00: MaskByteOffset = 1; break;
7527 case 0xFFFF:
7528 // In case demanded bits didn't clear the bits that will be shifted out.
7529 // This is needed for X86.
7530 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7531 MaskByteOffset = 1;
7532 break;
7533 }
7534 return false;
7535 case 0xFF0000: MaskByteOffset = 2; break;
7536 case 0xFF000000: MaskByteOffset = 3; break;
7537 }
7538
7539 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7540 if (Opc == ISD::AND) {
7541 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7542 // (x >> 8) & 0xff
7543 // (x >> 8) & 0xff0000
7544 if (Opc0 != ISD::SRL)
7545 return false;
7546 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7547 if (!C || C->getZExtValue() != 8)
7548 return false;
7549 } else {
7550 // (x << 8) & 0xff00
7551 // (x << 8) & 0xff000000
7552 if (Opc0 != ISD::SHL)
7553 return false;
7554 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7555 if (!C || C->getZExtValue() != 8)
7556 return false;
7557 }
7558 } else if (Opc == ISD::SHL) {
7559 // (x & 0xff) << 8
7560 // (x & 0xff0000) << 8
7561 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7562 return false;
7563 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7564 if (!C || C->getZExtValue() != 8)
7565 return false;
7566 } else { // Opc == ISD::SRL
7567 // (x & 0xff00) >> 8
7568 // (x & 0xff000000) >> 8
7569 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7570 return false;
7571 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7572 if (!C || C->getZExtValue() != 8)
7573 return false;
7574 }
7575
7576 if (Parts[MaskByteOffset])
7577 return false;
7578
7579 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7580 return true;
7581}
7582
7583// Match 2 elements of a packed halfword bswap.
7585 if (N.getOpcode() == ISD::OR)
7586 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7587 isBSwapHWordElement(N.getOperand(1), Parts);
7588
7589 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7590 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7591 if (!C || C->getAPIntValue() != 16)
7592 return false;
7593 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7594 return true;
7595 }
7596
7597 return false;
7598}
7599
7600// Match this pattern:
7601// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7602// And rewrite this to:
7603// (rotr (bswap A), 16)
7605 SelectionDAG &DAG, SDNode *N, SDValue N0,
7606 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7607 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7608 "MatchBSwapHWordOrAndAnd: expecting i32");
7609 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7610 return SDValue();
7611 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7612 return SDValue();
7613 // TODO: this is too restrictive; lifting this restriction requires more tests
7614 if (!N0->hasOneUse() || !N1->hasOneUse())
7615 return SDValue();
7618 if (!Mask0 || !Mask1)
7619 return SDValue();
7620 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7621 Mask1->getAPIntValue() != 0x00ff00ff)
7622 return SDValue();
7623 SDValue Shift0 = N0.getOperand(0);
7624 SDValue Shift1 = N1.getOperand(0);
7625 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7626 return SDValue();
7627 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7628 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7629 if (!ShiftAmt0 || !ShiftAmt1)
7630 return SDValue();
7631 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7632 return SDValue();
7633 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7634 return SDValue();
7635
7636 SDLoc DL(N);
7637 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7638 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7639 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7640}
7641
7642/// Match a 32-bit packed halfword bswap. That is
7643/// ((x & 0x000000ff) << 8) |
7644/// ((x & 0x0000ff00) >> 8) |
7645/// ((x & 0x00ff0000) << 8) |
7646/// ((x & 0xff000000) >> 8)
7647/// => (rotl (bswap x), 16)
7648SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7649 if (!LegalOperations)
7650 return SDValue();
7651
7652 EVT VT = N->getValueType(0);
7653 if (VT != MVT::i32)
7654 return SDValue();
7656 return SDValue();
7657
7658 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7659 getShiftAmountTy(VT)))
7660 return BSwap;
7661
7662 // Try again with commuted operands.
7663 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7664 getShiftAmountTy(VT)))
7665 return BSwap;
7666
7667
7668 // Look for either
7669 // (or (bswaphpair), (bswaphpair))
7670 // (or (or (bswaphpair), (and)), (and))
7671 // (or (or (and), (bswaphpair)), (and))
7672 SDNode *Parts[4] = {};
7673
7674 if (isBSwapHWordPair(N0, Parts)) {
7675 // (or (or (and), (and)), (or (and), (and)))
7676 if (!isBSwapHWordPair(N1, Parts))
7677 return SDValue();
7678 } else if (N0.getOpcode() == ISD::OR) {
7679 // (or (or (or (and), (and)), (and)), (and))
7680 if (!isBSwapHWordElement(N1, Parts))
7681 return SDValue();
7682 SDValue N00 = N0.getOperand(0);
7683 SDValue N01 = N0.getOperand(1);
7684 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7685 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7686 return SDValue();
7687 } else {
7688 return SDValue();
7689 }
7690
7691 // Make sure the parts are all coming from the same node.
7692 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7693 return SDValue();
7694
7695 SDLoc DL(N);
7696 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7697 SDValue(Parts[0], 0));
7698
7699 // Result of the bswap should be rotated by 16. If it's not legal, then
7700 // do (x << 16) | (x >> 16).
7701 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7703 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7705 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7706 return DAG.getNode(ISD::OR, DL, VT,
7707 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7708 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7709}
7710
7711/// This contains all DAGCombine rules which reduce two values combined by
7712/// an Or operation to a single value \see visitANDLike().
7713SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7714 EVT VT = N1.getValueType();
7715
7716 // fold (or x, undef) -> -1
7717 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7718 return DAG.getAllOnesConstant(DL, VT);
7719
7720 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7721 return V;
7722
7723 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7724 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7725 // Don't increase # computations.
7726 (N0->hasOneUse() || N1->hasOneUse())) {
7727 // We can only do this xform if we know that bits from X that are set in C2
7728 // but not in C1 are already zero. Likewise for Y.
7729 if (const ConstantSDNode *N0O1C =
7731 if (const ConstantSDNode *N1O1C =
7733 // We can only do this xform if we know that bits from X that are set in
7734 // C2 but not in C1 are already zero. Likewise for Y.
7735 const APInt &LHSMask = N0O1C->getAPIntValue();
7736 const APInt &RHSMask = N1O1C->getAPIntValue();
7737
7738 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7739 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7740 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7741 N0.getOperand(0), N1.getOperand(0));
7742 return DAG.getNode(ISD::AND, DL, VT, X,
7743 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7744 }
7745 }
7746 }
7747 }
7748
7749 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7750 if (N0.getOpcode() == ISD::AND &&
7751 N1.getOpcode() == ISD::AND &&
7752 N0.getOperand(0) == N1.getOperand(0) &&
7753 // Don't increase # computations.
7754 (N0->hasOneUse() || N1->hasOneUse())) {
7755 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7756 N0.getOperand(1), N1.getOperand(1));
7757 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7758 }
7759
7760 return SDValue();
7761}
7762
7763/// OR combines for which the commuted variant will be tried as well.
7765 SDNode *N) {
7766 EVT VT = N0.getValueType();
7767 unsigned BW = VT.getScalarSizeInBits();
7768 SDLoc DL(N);
7769
7770 auto peekThroughResize = [](SDValue V) {
7771 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7772 return V->getOperand(0);
7773 return V;
7774 };
7775
7776 SDValue N0Resized = peekThroughResize(N0);
7777 if (N0Resized.getOpcode() == ISD::AND) {
7778 SDValue N1Resized = peekThroughResize(N1);
7779 SDValue N00 = N0Resized.getOperand(0);
7780 SDValue N01 = N0Resized.getOperand(1);
7781
7782 // fold or (and x, y), x --> x
7783 if (N00 == N1Resized || N01 == N1Resized)
7784 return N1;
7785
7786 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7787 // TODO: Set AllowUndefs = true.
7788 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7789 /* AllowUndefs */ false)) {
7790 if (peekThroughResize(NotOperand) == N1Resized)
7791 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7792 N1);
7793 }
7794
7795 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7796 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7797 /* AllowUndefs */ false)) {
7798 if (peekThroughResize(NotOperand) == N1Resized)
7799 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7800 N1);
7801 }
7802 }
7803
7804 SDValue X, Y;
7805
7806 // fold or (xor X, N1), N1 --> or X, N1
7807 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7808 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7809
7810 // fold or (xor x, y), (x and/or y) --> or x, y
7811 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7812 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7814 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7815
7816 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7817 return R;
7818
7819 auto peekThroughZext = [](SDValue V) {
7820 if (V->getOpcode() == ISD::ZERO_EXTEND)
7821 return V->getOperand(0);
7822 return V;
7823 };
7824
7825 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7826 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7827 N0.getOperand(0) == N1.getOperand(0) &&
7828 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7829 return N0;
7830
7831 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7832 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7833 N0.getOperand(1) == N1.getOperand(0) &&
7834 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7835 return N0;
7836
7837 // Attempt to match a legalized build_pair-esque pattern:
7838 // or(shl(aext(Hi),BW/2),zext(Lo))
7839 SDValue Lo, Hi;
7840 if (sd_match(N0,
7842 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7843 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7844 Lo.getValueType() == Hi.getValueType()) {
7845 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7846 SDValue NotLo, NotHi;
7847 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7848 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7849 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7850 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7851 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7852 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7853 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7854 }
7855 }
7856
7857 return SDValue();
7858}
7859
7860SDValue DAGCombiner::visitOR(SDNode *N) {
7861 SDValue N0 = N->getOperand(0);
7862 SDValue N1 = N->getOperand(1);
7863 EVT VT = N1.getValueType();
7864 SDLoc DL(N);
7865
7866 // x | x --> x
7867 if (N0 == N1)
7868 return N0;
7869
7870 // fold (or c1, c2) -> c1|c2
7871 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7872 return C;
7873
7874 // canonicalize constant to RHS
7877 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7878
7879 // fold vector ops
7880 if (VT.isVector()) {
7881 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7882 return FoldedVOp;
7883
7884 // fold (or x, 0) -> x, vector edition
7886 return N0;
7887
7888 // fold (or x, -1) -> -1, vector edition
7890 // do not return N1, because undef node may exist in N1
7891 return DAG.getAllOnesConstant(DL, N1.getValueType());
7892
7893 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7894 // Do this only if the resulting type / shuffle is legal.
7895 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7896 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7897 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7898 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7899 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7900 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7901 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7902 // Ensure both shuffles have a zero input.
7903 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7904 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7905 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7906 bool CanFold = true;
7907 int NumElts = VT.getVectorNumElements();
7908 SmallVector<int, 4> Mask(NumElts, -1);
7909
7910 for (int i = 0; i != NumElts; ++i) {
7911 int M0 = SV0->getMaskElt(i);
7912 int M1 = SV1->getMaskElt(i);
7913
7914 // Determine if either index is pointing to a zero vector.
7915 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7916 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7917
7918 // If one element is zero and the otherside is undef, keep undef.
7919 // This also handles the case that both are undef.
7920 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7921 continue;
7922
7923 // Make sure only one of the elements is zero.
7924 if (M0Zero == M1Zero) {
7925 CanFold = false;
7926 break;
7927 }
7928
7929 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7930
7931 // We have a zero and non-zero element. If the non-zero came from
7932 // SV0 make the index a LHS index. If it came from SV1, make it
7933 // a RHS index. We need to mod by NumElts because we don't care
7934 // which operand it came from in the original shuffles.
7935 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7936 }
7937
7938 if (CanFold) {
7939 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7940 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7941 SDValue LegalShuffle =
7942 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7943 if (LegalShuffle)
7944 return LegalShuffle;
7945 }
7946 }
7947 }
7948 }
7949
7950 // fold (or x, 0) -> x
7951 if (isNullConstant(N1))
7952 return N0;
7953
7954 // fold (or x, -1) -> -1
7955 if (isAllOnesConstant(N1))
7956 return N1;
7957
7958 if (SDValue NewSel = foldBinOpIntoSelect(N))
7959 return NewSel;
7960
7961 // fold (or x, c) -> c iff (x & ~c) == 0
7962 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7963 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7964 return N1;
7965
7966 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7967 return R;
7968
7969 if (SDValue Combined = visitORLike(N0, N1, DL))
7970 return Combined;
7971
7972 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7973 return Combined;
7974
7975 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7976 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7977 return BSwap;
7978 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7979 return BSwap;
7980
7981 // reassociate or
7982 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7983 return ROR;
7984
7985 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7986 if (SDValue SD =
7987 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7988 return SD;
7989
7990 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7991 // iff (c1 & c2) != 0 or c1/c2 are undef.
7992 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7993 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7994 };
7995 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7996 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7997 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7998 {N1, N0.getOperand(1)})) {
7999 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8000 AddToWorklist(IOR.getNode());
8001 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8002 }
8003 }
8004
8005 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8006 return Combined;
8007 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8008 return Combined;
8009
8010 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8011 if (N0.getOpcode() == N1.getOpcode())
8012 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8013 return V;
8014
8015 // See if this is some rotate idiom.
8016 if (SDValue Rot = MatchRotate(N0, N1, DL))
8017 return Rot;
8018
8019 if (SDValue Load = MatchLoadCombine(N))
8020 return Load;
8021
8022 // Simplify the operands using demanded-bits information.
8024 return SDValue(N, 0);
8025
8026 // If OR can be rewritten into ADD, try combines based on ADD.
8027 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8028 DAG.isADDLike(SDValue(N, 0)))
8029 if (SDValue Combined = visitADDLike(N))
8030 return Combined;
8031
8032 // Postpone until legalization completed to avoid interference with bswap
8033 // folding
8034 if (LegalOperations || VT.isVector())
8035 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8036 return R;
8037
8038 return SDValue();
8039}
8040
8042 SDValue &Mask) {
8043 if (Op.getOpcode() == ISD::AND &&
8044 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8045 Mask = Op.getOperand(1);
8046 return Op.getOperand(0);
8047 }
8048 return Op;
8049}
8050
8051/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8052static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8053 SDValue &Mask) {
8054 Op = stripConstantMask(DAG, Op, Mask);
8055 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8056 Shift = Op;
8057 return true;
8058 }
8059 return false;
8060}
8061
8062/// Helper function for visitOR to extract the needed side of a rotate idiom
8063/// from a shl/srl/mul/udiv. This is meant to handle cases where
8064/// InstCombine merged some outside op with one of the shifts from
8065/// the rotate pattern.
8066/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8067/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8068/// patterns:
8069///
8070/// (or (add v v) (shrl v bitwidth-1)):
8071/// expands (add v v) -> (shl v 1)
8072///
8073/// (or (mul v c0) (shrl (mul v c1) c2)):
8074/// expands (mul v c0) -> (shl (mul v c1) c3)
8075///
8076/// (or (udiv v c0) (shl (udiv v c1) c2)):
8077/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8078///
8079/// (or (shl v c0) (shrl (shl v c1) c2)):
8080/// expands (shl v c0) -> (shl (shl v c1) c3)
8081///
8082/// (or (shrl v c0) (shl (shrl v c1) c2)):
8083/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8084///
8085/// Such that in all cases, c3+c2==bitwidth(op v c1).
8087 SDValue ExtractFrom, SDValue &Mask,
8088 const SDLoc &DL) {
8089 assert(OppShift && ExtractFrom && "Empty SDValue");
8090 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8091 return SDValue();
8092
8093 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8094
8095 // Value and Type of the shift.
8096 SDValue OppShiftLHS = OppShift.getOperand(0);
8097 EVT ShiftedVT = OppShiftLHS.getValueType();
8098
8099 // Amount of the existing shift.
8100 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8101
8102 // (add v v) -> (shl v 1)
8103 // TODO: Should this be a general DAG canonicalization?
8104 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8105 ExtractFrom.getOpcode() == ISD::ADD &&
8106 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8107 ExtractFrom.getOperand(0) == OppShiftLHS &&
8108 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8109 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8110 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8111
8112 // Preconditions:
8113 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8114 //
8115 // Find opcode of the needed shift to be extracted from (op0 v c0).
8116 unsigned Opcode = ISD::DELETED_NODE;
8117 bool IsMulOrDiv = false;
8118 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8119 // opcode or its arithmetic (mul or udiv) variant.
8120 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8121 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8122 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8123 return false;
8124 Opcode = NeededShift;
8125 return true;
8126 };
8127 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8128 // that the needed shift can be extracted from.
8129 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8130 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8131 return SDValue();
8132
8133 // op0 must be the same opcode on both sides, have the same LHS argument,
8134 // and produce the same value type.
8135 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8136 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8137 ShiftedVT != ExtractFrom.getValueType())
8138 return SDValue();
8139
8140 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8141 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8142 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8143 ConstantSDNode *ExtractFromCst =
8144 isConstOrConstSplat(ExtractFrom.getOperand(1));
8145 // TODO: We should be able to handle non-uniform constant vectors for these values
8146 // Check that we have constant values.
8147 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8148 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8149 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8150 return SDValue();
8151
8152 // Compute the shift amount we need to extract to complete the rotate.
8153 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8154 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8155 return SDValue();
8156 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8157 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8158 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8159 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8160 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8161
8162 // Now try extract the needed shift from the ExtractFrom op and see if the
8163 // result matches up with the existing shift's LHS op.
8164 if (IsMulOrDiv) {
8165 // Op to extract from is a mul or udiv by a constant.
8166 // Check:
8167 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8168 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8169 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8170 NeededShiftAmt.getZExtValue());
8171 APInt ResultAmt;
8172 APInt Rem;
8173 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8174 if (Rem != 0 || ResultAmt != OppLHSAmt)
8175 return SDValue();
8176 } else {
8177 // Op to extract from is a shift by a constant.
8178 // Check:
8179 // c2 - (bitwidth(op0 v c0) - c1) == c0
8180 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8181 ExtractFromAmt.getBitWidth()))
8182 return SDValue();
8183 }
8184
8185 // Return the expanded shift op that should allow a rotate to be formed.
8186 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8187 EVT ResVT = ExtractFrom.getValueType();
8188 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8189 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8190}
8191
8192// Return true if we can prove that, whenever Neg and Pos are both in the
8193// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8194// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8195//
8196// (or (shift1 X, Neg), (shift2 X, Pos))
8197//
8198// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8199// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8200// to consider shift amounts with defined behavior.
8201//
8202// The IsRotate flag should be set when the LHS of both shifts is the same.
8203// Otherwise if matching a general funnel shift, it should be clear.
8204static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8205 SelectionDAG &DAG, bool IsRotate) {
8206 const auto &TLI = DAG.getTargetLoweringInfo();
8207 // If EltSize is a power of 2 then:
8208 //
8209 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8210 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8211 //
8212 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8213 // for the stronger condition:
8214 //
8215 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8216 //
8217 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8218 // we can just replace Neg with Neg' for the rest of the function.
8219 //
8220 // In other cases we check for the even stronger condition:
8221 //
8222 // Neg == EltSize - Pos [B]
8223 //
8224 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8225 // behavior if Pos == 0 (and consequently Neg == EltSize).
8226 //
8227 // We could actually use [A] whenever EltSize is a power of 2, but the
8228 // only extra cases that it would match are those uninteresting ones
8229 // where Neg and Pos are never in range at the same time. E.g. for
8230 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8231 // as well as (sub 32, Pos), but:
8232 //
8233 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8234 //
8235 // always invokes undefined behavior for 32-bit X.
8236 //
8237 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8238 // This allows us to peek through any operations that only affect Mask's
8239 // un-demanded bits.
8240 //
8241 // NOTE: We can only do this when matching operations which won't modify the
8242 // least Log2(EltSize) significant bits and not a general funnel shift.
8243 unsigned MaskLoBits = 0;
8244 if (IsRotate && isPowerOf2_64(EltSize)) {
8245 unsigned Bits = Log2_64(EltSize);
8246 unsigned NegBits = Neg.getScalarValueSizeInBits();
8247 if (NegBits >= Bits) {
8248 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8249 if (SDValue Inner =
8251 Neg = Inner;
8252 MaskLoBits = Bits;
8253 }
8254 }
8255 }
8256
8257 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8258 if (Neg.getOpcode() != ISD::SUB)
8259 return false;
8261 if (!NegC)
8262 return false;
8263 SDValue NegOp1 = Neg.getOperand(1);
8264
8265 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8266 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8267 // are redundant for the purpose of the equality.
8268 if (MaskLoBits) {
8269 unsigned PosBits = Pos.getScalarValueSizeInBits();
8270 if (PosBits >= MaskLoBits) {
8271 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8272 if (SDValue Inner =
8274 Pos = Inner;
8275 }
8276 }
8277 }
8278
8279 // The condition we need is now:
8280 //
8281 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8282 //
8283 // If NegOp1 == Pos then we need:
8284 //
8285 // EltSize & Mask == NegC & Mask
8286 //
8287 // (because "x & Mask" is a truncation and distributes through subtraction).
8288 //
8289 // We also need to account for a potential truncation of NegOp1 if the amount
8290 // has already been legalized to a shift amount type.
8291 APInt Width;
8292 if ((Pos == NegOp1) ||
8293 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8294 Width = NegC->getAPIntValue();
8295
8296 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8297 // Then the condition we want to prove becomes:
8298 //
8299 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8300 //
8301 // which, again because "x & Mask" is a truncation, becomes:
8302 //
8303 // NegC & Mask == (EltSize - PosC) & Mask
8304 // EltSize & Mask == (NegC + PosC) & Mask
8305 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8306 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8307 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8308 else
8309 return false;
8310 } else
8311 return false;
8312
8313 // Now we just need to check that EltSize & Mask == Width & Mask.
8314 if (MaskLoBits)
8315 // EltSize & Mask is 0 since Mask is EltSize - 1.
8316 return Width.getLoBits(MaskLoBits) == 0;
8317 return Width == EltSize;
8318}
8319
8320// A subroutine of MatchRotate used once we have found an OR of two opposite
8321// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8322// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8323// former being preferred if supported. InnerPos and InnerNeg are Pos and
8324// Neg with outer conversions stripped away.
8325SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8326 SDValue Neg, SDValue InnerPos,
8327 SDValue InnerNeg, bool HasPos,
8328 unsigned PosOpcode, unsigned NegOpcode,
8329 const SDLoc &DL) {
8330 // fold (or (shl x, (*ext y)),
8331 // (srl x, (*ext (sub 32, y)))) ->
8332 // (rotl x, y) or (rotr x, (sub 32, y))
8333 //
8334 // fold (or (shl x, (*ext (sub 32, y))),
8335 // (srl x, (*ext y))) ->
8336 // (rotr x, y) or (rotl x, (sub 32, y))
8337 EVT VT = Shifted.getValueType();
8338 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8339 /*IsRotate*/ true)) {
8340 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8341 HasPos ? Pos : Neg);
8342 }
8343
8344 return SDValue();
8345}
8346
8347// A subroutine of MatchRotate used once we have found an OR of two opposite
8348// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8349// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8350// former being preferred if supported. InnerPos and InnerNeg are Pos and
8351// Neg with outer conversions stripped away.
8352// TODO: Merge with MatchRotatePosNeg.
8353SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8354 SDValue Neg, SDValue InnerPos,
8355 SDValue InnerNeg, bool HasPos,
8356 unsigned PosOpcode, unsigned NegOpcode,
8357 const SDLoc &DL) {
8358 EVT VT = N0.getValueType();
8359 unsigned EltBits = VT.getScalarSizeInBits();
8360
8361 // fold (or (shl x0, (*ext y)),
8362 // (srl x1, (*ext (sub 32, y)))) ->
8363 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8364 //
8365 // fold (or (shl x0, (*ext (sub 32, y))),
8366 // (srl x1, (*ext y))) ->
8367 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8368 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8369 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8370 HasPos ? Pos : Neg);
8371 }
8372
8373 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8374 // so for now just use the PosOpcode case if its legal.
8375 // TODO: When can we use the NegOpcode case?
8376 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8377 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8378 if (Op.getOpcode() != BinOpc)
8379 return false;
8380 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8381 return Cst && (Cst->getAPIntValue() == Imm);
8382 };
8383
8384 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8385 // -> (fshl x0, x1, y)
8386 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8387 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8388 InnerPos == InnerNeg.getOperand(0) &&
8390 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8391 }
8392
8393 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8394 // -> (fshr x0, x1, y)
8395 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8396 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8397 InnerNeg == InnerPos.getOperand(0) &&
8399 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8400 }
8401
8402 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8403 // -> (fshr x0, x1, y)
8404 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8405 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8406 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8407 InnerNeg == InnerPos.getOperand(0) &&
8409 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8410 }
8411 }
8412
8413 return SDValue();
8414}
8415
8416// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8417// idioms for rotate, and if the target supports rotation instructions, generate
8418// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8419// with different shifted sources.
8420SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8421 EVT VT = LHS.getValueType();
8422
8423 // The target must have at least one rotate/funnel flavor.
8424 // We still try to match rotate by constant pre-legalization.
8425 // TODO: Support pre-legalization funnel-shift by constant.
8426 bool HasROTL = hasOperation(ISD::ROTL, VT);
8427 bool HasROTR = hasOperation(ISD::ROTR, VT);
8428 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8429 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8430
8431 // If the type is going to be promoted and the target has enabled custom
8432 // lowering for rotate, allow matching rotate by non-constants. Only allow
8433 // this for scalar types.
8434 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8438 }
8439
8440 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8441 return SDValue();
8442
8443 // Check for truncated rotate.
8444 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8445 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8446 assert(LHS.getValueType() == RHS.getValueType());
8447 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8448 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8449 }
8450 }
8451
8452 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8453 SDValue LHSShift; // The shift.
8454 SDValue LHSMask; // AND value if any.
8455 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8456
8457 SDValue RHSShift; // The shift.
8458 SDValue RHSMask; // AND value if any.
8459 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8460
8461 // If neither side matched a rotate half, bail
8462 if (!LHSShift && !RHSShift)
8463 return SDValue();
8464
8465 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8466 // side of the rotate, so try to handle that here. In all cases we need to
8467 // pass the matched shift from the opposite side to compute the opcode and
8468 // needed shift amount to extract. We still want to do this if both sides
8469 // matched a rotate half because one half may be a potential overshift that
8470 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8471 // single one).
8472
8473 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8474 if (LHSShift)
8475 if (SDValue NewRHSShift =
8476 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8477 RHSShift = NewRHSShift;
8478 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8479 if (RHSShift)
8480 if (SDValue NewLHSShift =
8481 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8482 LHSShift = NewLHSShift;
8483
8484 // If a side is still missing, nothing else we can do.
8485 if (!RHSShift || !LHSShift)
8486 return SDValue();
8487
8488 // At this point we've matched or extracted a shift op on each side.
8489
8490 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8491 return SDValue(); // Shifts must disagree.
8492
8493 // Canonicalize shl to left side in a shl/srl pair.
8494 if (RHSShift.getOpcode() == ISD::SHL) {
8495 std::swap(LHS, RHS);
8496 std::swap(LHSShift, RHSShift);
8497 std::swap(LHSMask, RHSMask);
8498 }
8499
8500 // Something has gone wrong - we've lost the shl/srl pair - bail.
8501 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8502 return SDValue();
8503
8504 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8505 SDValue LHSShiftArg = LHSShift.getOperand(0);
8506 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8507 SDValue RHSShiftArg = RHSShift.getOperand(0);
8508 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8509
8510 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8512 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8513 };
8514
8515 auto ApplyMasks = [&](SDValue Res) {
8516 // If there is an AND of either shifted operand, apply it to the result.
8517 if (LHSMask.getNode() || RHSMask.getNode()) {
8520
8521 if (LHSMask.getNode()) {
8522 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8523 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8524 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8525 }
8526 if (RHSMask.getNode()) {
8527 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8528 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8529 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8530 }
8531
8532 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8533 }
8534
8535 return Res;
8536 };
8537
8538 // TODO: Support pre-legalization funnel-shift by constant.
8539 bool IsRotate = LHSShiftArg == RHSShiftArg;
8540 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8541 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8542 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8543 // Look for a disguised rotate by constant.
8544 // The common shifted operand X may be hidden inside another 'or'.
8545 SDValue X, Y;
8546 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8547 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8548 return false;
8549 if (CommonOp == Or.getOperand(0)) {
8550 X = CommonOp;
8551 Y = Or.getOperand(1);
8552 return true;
8553 }
8554 if (CommonOp == Or.getOperand(1)) {
8555 X = CommonOp;
8556 Y = Or.getOperand(0);
8557 return true;
8558 }
8559 return false;
8560 };
8561
8562 SDValue Res;
8563 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8564 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8565 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8566 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8567 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8568 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8569 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8570 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8571 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8572 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8573 } else {
8574 return SDValue();
8575 }
8576
8577 return ApplyMasks(Res);
8578 }
8579
8580 return SDValue(); // Requires funnel shift support.
8581 }
8582
8583 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8584 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8585 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8586 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8587 // iff C1+C2 == EltSizeInBits
8588 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8589 SDValue Res;
8590 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8591 bool UseROTL = !LegalOperations || HasROTL;
8592 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8593 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8594 } else {
8595 bool UseFSHL = !LegalOperations || HasFSHL;
8596 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8597 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8598 }
8599
8600 return ApplyMasks(Res);
8601 }
8602
8603 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8604 // shift.
8605 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8606 return SDValue();
8607
8608 // If there is a mask here, and we have a variable shift, we can't be sure
8609 // that we're masking out the right stuff.
8610 if (LHSMask.getNode() || RHSMask.getNode())
8611 return SDValue();
8612
8613 // If the shift amount is sign/zext/any-extended just peel it off.
8614 SDValue LExtOp0 = LHSShiftAmt;
8615 SDValue RExtOp0 = RHSShiftAmt;
8616 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8617 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8618 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8619 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8620 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8621 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8622 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8623 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8624 LExtOp0 = LHSShiftAmt.getOperand(0);
8625 RExtOp0 = RHSShiftAmt.getOperand(0);
8626 }
8627
8628 if (IsRotate && (HasROTL || HasROTR)) {
8629 SDValue TryL =
8630 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8631 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8632 if (TryL)
8633 return TryL;
8634
8635 SDValue TryR =
8636 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8637 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8638 if (TryR)
8639 return TryR;
8640 }
8641
8642 SDValue TryL =
8643 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8644 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8645 if (TryL)
8646 return TryL;
8647
8648 SDValue TryR =
8649 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8650 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8651 if (TryR)
8652 return TryR;
8653
8654 return SDValue();
8655}
8656
8657/// Recursively traverses the expression calculating the origin of the requested
8658/// byte of the given value. Returns std::nullopt if the provider can't be
8659/// calculated.
8660///
8661/// For all the values except the root of the expression, we verify that the
8662/// value has exactly one use and if not then return std::nullopt. This way if
8663/// the origin of the byte is returned it's guaranteed that the values which
8664/// contribute to the byte are not used outside of this expression.
8665
8666/// However, there is a special case when dealing with vector loads -- we allow
8667/// more than one use if the load is a vector type. Since the values that
8668/// contribute to the byte ultimately come from the ExtractVectorElements of the
8669/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8670/// because those operations are independent from the pattern to be combined.
8671/// For vector loads, we simply care that the ByteProviders are adjacent
8672/// positions of the same vector, and their index matches the byte that is being
8673/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8674/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8675/// byte position we are trying to provide for the LoadCombine. If these do
8676/// not match, then we can not combine the vector loads. \p Index uses the
8677/// byte position we are trying to provide for and is matched against the
8678/// shl and load size. The \p Index algorithm ensures the requested byte is
8679/// provided for by the pattern, and the pattern does not over provide bytes.
8680///
8681///
8682/// The supported LoadCombine pattern for vector loads is as follows
8683/// or
8684/// / \
8685/// or shl
8686/// / \ |
8687/// or shl zext
8688/// / \ | |
8689/// shl zext zext EVE*
8690/// | | | |
8691/// zext EVE* EVE* LOAD
8692/// | | |
8693/// EVE* LOAD LOAD
8694/// |
8695/// LOAD
8696///
8697/// *ExtractVectorElement
8699
8700static std::optional<SDByteProvider>
8702 std::optional<uint64_t> VectorIndex,
8703 unsigned StartingIndex = 0) {
8704
8705 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8706 if (Depth == 10)
8707 return std::nullopt;
8708
8709 // Only allow multiple uses if the instruction is a vector load (in which
8710 // case we will use the load for every ExtractVectorElement)
8711 if (Depth && !Op.hasOneUse() &&
8712 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8713 return std::nullopt;
8714
8715 // Fail to combine if we have encountered anything but a LOAD after handling
8716 // an ExtractVectorElement.
8717 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8718 return std::nullopt;
8719
8720 unsigned BitWidth = Op.getValueSizeInBits();
8721 if (BitWidth % 8 != 0)
8722 return std::nullopt;
8723 unsigned ByteWidth = BitWidth / 8;
8724 assert(Index < ByteWidth && "invalid index requested");
8725 (void) ByteWidth;
8726
8727 switch (Op.getOpcode()) {
8728 case ISD::OR: {
8729 auto LHS =
8730 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8731 if (!LHS)
8732 return std::nullopt;
8733 auto RHS =
8734 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8735 if (!RHS)
8736 return std::nullopt;
8737
8738 if (LHS->isConstantZero())
8739 return RHS;
8740 if (RHS->isConstantZero())
8741 return LHS;
8742 return std::nullopt;
8743 }
8744 case ISD::SHL: {
8745 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8746 if (!ShiftOp)
8747 return std::nullopt;
8748
8749 uint64_t BitShift = ShiftOp->getZExtValue();
8750
8751 if (BitShift % 8 != 0)
8752 return std::nullopt;
8753 uint64_t ByteShift = BitShift / 8;
8754
8755 // If we are shifting by an amount greater than the index we are trying to
8756 // provide, then do not provide anything. Otherwise, subtract the index by
8757 // the amount we shifted by.
8758 return Index < ByteShift
8760 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8761 Depth + 1, VectorIndex, Index);
8762 }
8763 case ISD::ANY_EXTEND:
8764 case ISD::SIGN_EXTEND:
8765 case ISD::ZERO_EXTEND: {
8766 SDValue NarrowOp = Op->getOperand(0);
8767 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8768 if (NarrowBitWidth % 8 != 0)
8769 return std::nullopt;
8770 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8771
8772 if (Index >= NarrowByteWidth)
8773 return Op.getOpcode() == ISD::ZERO_EXTEND
8774 ? std::optional<SDByteProvider>(
8776 : std::nullopt;
8777 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8778 StartingIndex);
8779 }
8780 case ISD::BSWAP:
8781 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8782 Depth + 1, VectorIndex, StartingIndex);
8784 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8785 if (!OffsetOp)
8786 return std::nullopt;
8787
8788 VectorIndex = OffsetOp->getZExtValue();
8789
8790 SDValue NarrowOp = Op->getOperand(0);
8791 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8792 if (NarrowBitWidth % 8 != 0)
8793 return std::nullopt;
8794 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8795 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8796 // type, leaving the high bits undefined.
8797 if (Index >= NarrowByteWidth)
8798 return std::nullopt;
8799
8800 // Check to see if the position of the element in the vector corresponds
8801 // with the byte we are trying to provide for. In the case of a vector of
8802 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8803 // the element will provide a range of bytes. For example, if we have a
8804 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8805 // 3).
8806 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8807 return std::nullopt;
8808 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8809 return std::nullopt;
8810
8811 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8812 VectorIndex, StartingIndex);
8813 }
8814 case ISD::LOAD: {
8815 auto L = cast<LoadSDNode>(Op.getNode());
8816 if (!L->isSimple() || L->isIndexed())
8817 return std::nullopt;
8818
8819 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8820 if (NarrowBitWidth % 8 != 0)
8821 return std::nullopt;
8822 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8823
8824 // If the width of the load does not reach byte we are trying to provide for
8825 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8826 // question
8827 if (Index >= NarrowByteWidth)
8828 return L->getExtensionType() == ISD::ZEXTLOAD
8829 ? std::optional<SDByteProvider>(
8831 : std::nullopt;
8832
8833 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8834 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8835 }
8836 }
8837
8838 return std::nullopt;
8839}
8840
8841static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8842 return i;
8843}
8844
8845static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8846 return BW - i - 1;
8847}
8848
8849// Check if the bytes offsets we are looking at match with either big or
8850// little endian value loaded. Return true for big endian, false for little
8851// endian, and std::nullopt if match failed.
8852static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8853 int64_t FirstOffset) {
8854 // The endian can be decided only when it is 2 bytes at least.
8855 unsigned Width = ByteOffsets.size();
8856 if (Width < 2)
8857 return std::nullopt;
8858
8859 bool BigEndian = true, LittleEndian = true;
8860 for (unsigned i = 0; i < Width; i++) {
8861 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8862 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8863 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8864 if (!BigEndian && !LittleEndian)
8865 return std::nullopt;
8866 }
8867
8868 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8869 "little endian");
8870 return BigEndian;
8871}
8872
8873// Look through one layer of truncate or extend.
8875 switch (Value.getOpcode()) {
8876 case ISD::TRUNCATE:
8877 case ISD::ZERO_EXTEND:
8878 case ISD::SIGN_EXTEND:
8879 case ISD::ANY_EXTEND:
8880 return Value.getOperand(0);
8881 }
8882 return SDValue();
8883}
8884
8885/// Match a pattern where a wide type scalar value is stored by several narrow
8886/// stores. Fold it into a single store or a BSWAP and a store if the targets
8887/// supports it.
8888///
8889/// Assuming little endian target:
8890/// i8 *p = ...
8891/// i32 val = ...
8892/// p[0] = (val >> 0) & 0xFF;
8893/// p[1] = (val >> 8) & 0xFF;
8894/// p[2] = (val >> 16) & 0xFF;
8895/// p[3] = (val >> 24) & 0xFF;
8896/// =>
8897/// *((i32)p) = val;
8898///
8899/// i8 *p = ...
8900/// i32 val = ...
8901/// p[0] = (val >> 24) & 0xFF;
8902/// p[1] = (val >> 16) & 0xFF;
8903/// p[2] = (val >> 8) & 0xFF;
8904/// p[3] = (val >> 0) & 0xFF;
8905/// =>
8906/// *((i32)p) = BSWAP(val);
8907SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8908 // The matching looks for "store (trunc x)" patterns that appear early but are
8909 // likely to be replaced by truncating store nodes during combining.
8910 // TODO: If there is evidence that running this later would help, this
8911 // limitation could be removed. Legality checks may need to be added
8912 // for the created store and optional bswap/rotate.
8913 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8914 return SDValue();
8915
8916 // We only handle merging simple stores of 1-4 bytes.
8917 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8918 EVT MemVT = N->getMemoryVT();
8919 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8920 !N->isSimple() || N->isIndexed())
8921 return SDValue();
8922
8923 // Collect all of the stores in the chain, upto the maximum store width (i64).
8924 SDValue Chain = N->getChain();
8926 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8927 unsigned MaxWideNumBits = 64;
8928 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8929 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8930 // All stores must be the same size to ensure that we are writing all of the
8931 // bytes in the wide value.
8932 // This store should have exactly one use as a chain operand for another
8933 // store in the merging set. If there are other chain uses, then the
8934 // transform may not be safe because order of loads/stores outside of this
8935 // set may not be preserved.
8936 // TODO: We could allow multiple sizes by tracking each stored byte.
8937 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8938 Store->isIndexed() || !Store->hasOneUse())
8939 return SDValue();
8940 Stores.push_back(Store);
8941 Chain = Store->getChain();
8942 if (MaxStores < Stores.size())
8943 return SDValue();
8944 }
8945 // There is no reason to continue if we do not have at least a pair of stores.
8946 if (Stores.size() < 2)
8947 return SDValue();
8948
8949 // Handle simple types only.
8950 LLVMContext &Context = *DAG.getContext();
8951 unsigned NumStores = Stores.size();
8952 unsigned WideNumBits = NumStores * NarrowNumBits;
8953 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8954 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8955 return SDValue();
8956
8957 // Check if all bytes of the source value that we are looking at are stored
8958 // to the same base address. Collect offsets from Base address into OffsetMap.
8959 SDValue SourceValue;
8960 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8961 int64_t FirstOffset = INT64_MAX;
8962 StoreSDNode *FirstStore = nullptr;
8963 std::optional<BaseIndexOffset> Base;
8964 for (auto *Store : Stores) {
8965 // All the stores store different parts of the CombinedValue. A truncate is
8966 // required to get the partial value.
8967 SDValue Trunc = Store->getValue();
8968 if (Trunc.getOpcode() != ISD::TRUNCATE)
8969 return SDValue();
8970 // Other than the first/last part, a shift operation is required to get the
8971 // offset.
8972 int64_t Offset = 0;
8973 SDValue WideVal = Trunc.getOperand(0);
8974 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8975 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8976 // The shift amount must be a constant multiple of the narrow type.
8977 // It is translated to the offset address in the wide source value "y".
8978 //
8979 // x = srl y, ShiftAmtC
8980 // i8 z = trunc x
8981 // store z, ...
8982 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8983 if (ShiftAmtC % NarrowNumBits != 0)
8984 return SDValue();
8985
8986 // Make sure we aren't reading bits that are shifted in.
8987 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
8988 return SDValue();
8989
8990 Offset = ShiftAmtC / NarrowNumBits;
8991 WideVal = WideVal.getOperand(0);
8992 }
8993
8994 // Stores must share the same source value with different offsets.
8995 if (!SourceValue)
8996 SourceValue = WideVal;
8997 else if (SourceValue != WideVal) {
8998 // Truncate and extends can be stripped to see if the values are related.
8999 if (stripTruncAndExt(SourceValue) != WideVal &&
9000 stripTruncAndExt(WideVal) != SourceValue)
9001 return SDValue();
9002
9003 if (WideVal.getScalarValueSizeInBits() >
9004 SourceValue.getScalarValueSizeInBits())
9005 SourceValue = WideVal;
9006
9007 // Give up if the source value type is smaller than the store size.
9008 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
9009 return SDValue();
9010 }
9011
9012 // Stores must share the same base address.
9014 int64_t ByteOffsetFromBase = 0;
9015 if (!Base)
9016 Base = Ptr;
9017 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9018 return SDValue();
9019
9020 // Remember the first store.
9021 if (ByteOffsetFromBase < FirstOffset) {
9022 FirstStore = Store;
9023 FirstOffset = ByteOffsetFromBase;
9024 }
9025 // Map the offset in the store and the offset in the combined value, and
9026 // early return if it has been set before.
9027 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9028 return SDValue();
9029 OffsetMap[Offset] = ByteOffsetFromBase;
9030 }
9031
9032 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9033 assert(FirstStore && "First store must be set");
9034
9035 // Check that a store of the wide type is both allowed and fast on the target
9036 const DataLayout &Layout = DAG.getDataLayout();
9037 unsigned Fast = 0;
9038 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9039 *FirstStore->getMemOperand(), &Fast);
9040 if (!Allowed || !Fast)
9041 return SDValue();
9042
9043 // Check if the pieces of the value are going to the expected places in memory
9044 // to merge the stores.
9045 auto checkOffsets = [&](bool MatchLittleEndian) {
9046 if (MatchLittleEndian) {
9047 for (unsigned i = 0; i != NumStores; ++i)
9048 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9049 return false;
9050 } else { // MatchBigEndian by reversing loop counter.
9051 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9052 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9053 return false;
9054 }
9055 return true;
9056 };
9057
9058 // Check if the offsets line up for the native data layout of this target.
9059 bool NeedBswap = false;
9060 bool NeedRotate = false;
9061 if (!checkOffsets(Layout.isLittleEndian())) {
9062 // Special-case: check if byte offsets line up for the opposite endian.
9063 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9064 NeedBswap = true;
9065 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9066 NeedRotate = true;
9067 else
9068 return SDValue();
9069 }
9070
9071 SDLoc DL(N);
9072 if (WideVT != SourceValue.getValueType()) {
9073 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9074 "Unexpected store value to merge");
9075 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9076 }
9077
9078 // Before legalize we can introduce illegal bswaps/rotates which will be later
9079 // converted to an explicit bswap sequence. This way we end up with a single
9080 // store and byte shuffling instead of several stores and byte shuffling.
9081 if (NeedBswap) {
9082 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9083 } else if (NeedRotate) {
9084 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9085 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9086 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9087 }
9088
9089 SDValue NewStore =
9090 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9091 FirstStore->getPointerInfo(), FirstStore->getAlign());
9092
9093 // Rely on other DAG combine rules to remove the other individual stores.
9094 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9095 return NewStore;
9096}
9097
9098/// Match a pattern where a wide type scalar value is loaded by several narrow
9099/// loads and combined by shifts and ors. Fold it into a single load or a load
9100/// and a BSWAP if the targets supports it.
9101///
9102/// Assuming little endian target:
9103/// i8 *a = ...
9104/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9105/// =>
9106/// i32 val = *((i32)a)
9107///
9108/// i8 *a = ...
9109/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9110/// =>
9111/// i32 val = BSWAP(*((i32)a))
9112///
9113/// TODO: This rule matches complex patterns with OR node roots and doesn't
9114/// interact well with the worklist mechanism. When a part of the pattern is
9115/// updated (e.g. one of the loads) its direct users are put into the worklist,
9116/// but the root node of the pattern which triggers the load combine is not
9117/// necessarily a direct user of the changed node. For example, once the address
9118/// of t28 load is reassociated load combine won't be triggered:
9119/// t25: i32 = add t4, Constant:i32<2>
9120/// t26: i64 = sign_extend t25
9121/// t27: i64 = add t2, t26
9122/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9123/// t29: i32 = zero_extend t28
9124/// t32: i32 = shl t29, Constant:i8<8>
9125/// t33: i32 = or t23, t32
9126/// As a possible fix visitLoad can check if the load can be a part of a load
9127/// combine pattern and add corresponding OR roots to the worklist.
9128SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9129 assert(N->getOpcode() == ISD::OR &&
9130 "Can only match load combining against OR nodes");
9131
9132 // Handles simple types only
9133 EVT VT = N->getValueType(0);
9134 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9135 return SDValue();
9136 unsigned ByteWidth = VT.getSizeInBits() / 8;
9137
9138 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9139 auto MemoryByteOffset = [&](SDByteProvider P) {
9140 assert(P.hasSrc() && "Must be a memory byte provider");
9141 auto *Load = cast<LoadSDNode>(P.Src.value());
9142
9143 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9144
9145 assert(LoadBitWidth % 8 == 0 &&
9146 "can only analyze providers for individual bytes not bit");
9147 unsigned LoadByteWidth = LoadBitWidth / 8;
9148 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9149 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9150 };
9151
9152 std::optional<BaseIndexOffset> Base;
9153 SDValue Chain;
9154
9156 std::optional<SDByteProvider> FirstByteProvider;
9157 int64_t FirstOffset = INT64_MAX;
9158
9159 // Check if all the bytes of the OR we are looking at are loaded from the same
9160 // base address. Collect bytes offsets from Base address in ByteOffsets.
9161 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9162 unsigned ZeroExtendedBytes = 0;
9163 for (int i = ByteWidth - 1; i >= 0; --i) {
9164 auto P =
9165 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9166 /*StartingIndex*/ i);
9167 if (!P)
9168 return SDValue();
9169
9170 if (P->isConstantZero()) {
9171 // It's OK for the N most significant bytes to be 0, we can just
9172 // zero-extend the load.
9173 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9174 return SDValue();
9175 continue;
9176 }
9177 assert(P->hasSrc() && "provenance should either be memory or zero");
9178 auto *L = cast<LoadSDNode>(P->Src.value());
9179
9180 // All loads must share the same chain
9181 SDValue LChain = L->getChain();
9182 if (!Chain)
9183 Chain = LChain;
9184 else if (Chain != LChain)
9185 return SDValue();
9186
9187 // Loads must share the same base address
9189 int64_t ByteOffsetFromBase = 0;
9190
9191 // For vector loads, the expected load combine pattern will have an
9192 // ExtractElement for each index in the vector. While each of these
9193 // ExtractElements will be accessing the same base address as determined
9194 // by the load instruction, the actual bytes they interact with will differ
9195 // due to different ExtractElement indices. To accurately determine the
9196 // byte position of an ExtractElement, we offset the base load ptr with
9197 // the index multiplied by the byte size of each element in the vector.
9198 if (L->getMemoryVT().isVector()) {
9199 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9200 if (LoadWidthInBit % 8 != 0)
9201 return SDValue();
9202 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9203 Ptr.addToOffset(ByteOffsetFromVector);
9204 }
9205
9206 if (!Base)
9207 Base = Ptr;
9208
9209 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9210 return SDValue();
9211
9212 // Calculate the offset of the current byte from the base address
9213 ByteOffsetFromBase += MemoryByteOffset(*P);
9214 ByteOffsets[i] = ByteOffsetFromBase;
9215
9216 // Remember the first byte load
9217 if (ByteOffsetFromBase < FirstOffset) {
9218 FirstByteProvider = P;
9219 FirstOffset = ByteOffsetFromBase;
9220 }
9221
9222 Loads.insert(L);
9223 }
9224
9225 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9226 "memory, so there must be at least one load which produces the value");
9227 assert(Base && "Base address of the accessed memory location must be set");
9228 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9229
9230 bool NeedsZext = ZeroExtendedBytes > 0;
9231
9232 EVT MemVT =
9233 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9234
9235 if (!MemVT.isSimple())
9236 return SDValue();
9237
9238 // Before legalize we can introduce too wide illegal loads which will be later
9239 // split into legal sized loads. This enables us to combine i64 load by i8
9240 // patterns to a couple of i32 loads on 32 bit targets.
9241 if (LegalOperations &&
9243 MemVT))
9244 return SDValue();
9245
9246 // Check if the bytes of the OR we are looking at match with either big or
9247 // little endian value load
9248 std::optional<bool> IsBigEndian = isBigEndian(
9249 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9250 if (!IsBigEndian)
9251 return SDValue();
9252
9253 assert(FirstByteProvider && "must be set");
9254
9255 // Ensure that the first byte is loaded from zero offset of the first load.
9256 // So the combined value can be loaded from the first load address.
9257 if (MemoryByteOffset(*FirstByteProvider) != 0)
9258 return SDValue();
9259 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9260
9261 // The node we are looking at matches with the pattern, check if we can
9262 // replace it with a single (possibly zero-extended) load and bswap + shift if
9263 // needed.
9264
9265 // If the load needs byte swap check if the target supports it
9266 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9267
9268 // Before legalize we can introduce illegal bswaps which will be later
9269 // converted to an explicit bswap sequence. This way we end up with a single
9270 // load and byte shuffling instead of several loads and byte shuffling.
9271 // We do not introduce illegal bswaps when zero-extending as this tends to
9272 // introduce too many arithmetic instructions.
9273 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9274 !TLI.isOperationLegal(ISD::BSWAP, VT))
9275 return SDValue();
9276
9277 // If we need to bswap and zero extend, we have to insert a shift. Check that
9278 // it is legal.
9279 if (NeedsBswap && NeedsZext && LegalOperations &&
9280 !TLI.isOperationLegal(ISD::SHL, VT))
9281 return SDValue();
9282
9283 // Check that a load of the wide type is both allowed and fast on the target
9284 unsigned Fast = 0;
9285 bool Allowed =
9286 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9287 *FirstLoad->getMemOperand(), &Fast);
9288 if (!Allowed || !Fast)
9289 return SDValue();
9290
9291 SDValue NewLoad =
9292 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9293 Chain, FirstLoad->getBasePtr(),
9294 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9295
9296 // Transfer chain users from old loads to the new load.
9297 for (LoadSDNode *L : Loads)
9298 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9299
9300 if (!NeedsBswap)
9301 return NewLoad;
9302
9303 SDValue ShiftedLoad =
9304 NeedsZext
9305 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9306 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9307 SDLoc(N), LegalOperations))
9308 : NewLoad;
9309 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9310}
9311
9312// If the target has andn, bsl, or a similar bit-select instruction,
9313// we want to unfold masked merge, with canonical pattern of:
9314// | A | |B|
9315// ((x ^ y) & m) ^ y
9316// | D |
9317// Into:
9318// (x & m) | (y & ~m)
9319// If y is a constant, m is not a 'not', and the 'andn' does not work with
9320// immediates, we unfold into a different pattern:
9321// ~(~x & m) & (m | y)
9322// If x is a constant, m is a 'not', and the 'andn' does not work with
9323// immediates, we unfold into a different pattern:
9324// (x | ~m) & ~(~m & ~y)
9325// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9326// the very least that breaks andnpd / andnps patterns, and because those
9327// patterns are simplified in IR and shouldn't be created in the DAG
9328SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9329 assert(N->getOpcode() == ISD::XOR);
9330
9331 // Don't touch 'not' (i.e. where y = -1).
9332 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9333 return SDValue();
9334
9335 EVT VT = N->getValueType(0);
9336
9337 // There are 3 commutable operators in the pattern,
9338 // so we have to deal with 8 possible variants of the basic pattern.
9339 SDValue X, Y, M;
9340 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9341 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9342 return false;
9343 SDValue Xor = And.getOperand(XorIdx);
9344 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9345 return false;
9346 SDValue Xor0 = Xor.getOperand(0);
9347 SDValue Xor1 = Xor.getOperand(1);
9348 // Don't touch 'not' (i.e. where y = -1).
9349 if (isAllOnesOrAllOnesSplat(Xor1))
9350 return false;
9351 if (Other == Xor0)
9352 std::swap(Xor0, Xor1);
9353 if (Other != Xor1)
9354 return false;
9355 X = Xor0;
9356 Y = Xor1;
9357 M = And.getOperand(XorIdx ? 0 : 1);
9358 return true;
9359 };
9360
9361 SDValue N0 = N->getOperand(0);
9362 SDValue N1 = N->getOperand(1);
9363 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9364 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9365 return SDValue();
9366
9367 // Don't do anything if the mask is constant. This should not be reachable.
9368 // InstCombine should have already unfolded this pattern, and DAGCombiner
9369 // probably shouldn't produce it, too.
9370 if (isa<ConstantSDNode>(M.getNode()))
9371 return SDValue();
9372
9373 // We can transform if the target has AndNot
9374 if (!TLI.hasAndNot(M))
9375 return SDValue();
9376
9377 SDLoc DL(N);
9378
9379 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9380 // a bitwise not that would already allow ANDN to be used.
9381 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9382 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9383 // If not, we need to do a bit more work to make sure andn is still used.
9384 SDValue NotX = DAG.getNOT(DL, X, VT);
9385 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9386 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9387 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9388 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9389 }
9390
9391 // If X is a constant and M is a bitwise not, check that 'andn' works with
9392 // immediates.
9393 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9394 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9395 // If not, we need to do a bit more work to make sure andn is still used.
9396 SDValue NotM = M.getOperand(0);
9397 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9398 SDValue NotY = DAG.getNOT(DL, Y, VT);
9399 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9400 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9401 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9402 }
9403
9404 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9405 SDValue NotM = DAG.getNOT(DL, M, VT);
9406 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9407
9408 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9409}
9410
9411SDValue DAGCombiner::visitXOR(SDNode *N) {
9412 SDValue N0 = N->getOperand(0);
9413 SDValue N1 = N->getOperand(1);
9414 EVT VT = N0.getValueType();
9415 SDLoc DL(N);
9416
9417 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9418 if (N0.isUndef() && N1.isUndef())
9419 return DAG.getConstant(0, DL, VT);
9420
9421 // fold (xor x, undef) -> undef
9422 if (N0.isUndef())
9423 return N0;
9424 if (N1.isUndef())
9425 return N1;
9426
9427 // fold (xor c1, c2) -> c1^c2
9428 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9429 return C;
9430
9431 // canonicalize constant to RHS
9434 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9435
9436 // fold vector ops
9437 if (VT.isVector()) {
9438 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9439 return FoldedVOp;
9440
9441 // fold (xor x, 0) -> x, vector edition
9443 return N0;
9444 }
9445
9446 // fold (xor x, 0) -> x
9447 if (isNullConstant(N1))
9448 return N0;
9449
9450 if (SDValue NewSel = foldBinOpIntoSelect(N))
9451 return NewSel;
9452
9453 // reassociate xor
9454 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9455 return RXOR;
9456
9457 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9458 if (SDValue SD =
9459 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9460 return SD;
9461
9462 // fold (a^b) -> (a|b) iff a and b share no bits.
9463 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9464 DAG.haveNoCommonBitsSet(N0, N1)) {
9466 Flags.setDisjoint(true);
9467 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9468 }
9469
9470 // look for 'add-like' folds:
9471 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9472 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9474 if (SDValue Combined = visitADDLike(N))
9475 return Combined;
9476
9477 // fold !(x cc y) -> (x !cc y)
9478 unsigned N0Opcode = N0.getOpcode();
9479 SDValue LHS, RHS, CC;
9480 if (TLI.isConstTrueVal(N1) &&
9481 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9482 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9483 LHS.getValueType());
9484 if (!LegalOperations ||
9485 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9486 switch (N0Opcode) {
9487 default:
9488 llvm_unreachable("Unhandled SetCC Equivalent!");
9489 case ISD::SETCC:
9490 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9491 case ISD::SELECT_CC:
9492 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9493 N0.getOperand(3), NotCC);
9494 case ISD::STRICT_FSETCC:
9495 case ISD::STRICT_FSETCCS: {
9496 if (N0.hasOneUse()) {
9497 // FIXME Can we handle multiple uses? Could we token factor the chain
9498 // results from the new/old setcc?
9499 SDValue SetCC =
9500 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9501 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9502 CombineTo(N, SetCC);
9503 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9504 recursivelyDeleteUnusedNodes(N0.getNode());
9505 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9506 }
9507 break;
9508 }
9509 }
9510 }
9511 }
9512
9513 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9514 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9515 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9516 SDValue V = N0.getOperand(0);
9517 SDLoc DL0(N0);
9518 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9519 DAG.getConstant(1, DL0, V.getValueType()));
9520 AddToWorklist(V.getNode());
9521 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9522 }
9523
9524 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9525 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9526 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9527 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9528 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9529 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9530 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9531 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9532 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9533 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9534 }
9535 }
9536 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9537 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9538 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9539 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9540 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9541 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9542 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9543 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9544 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9545 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9546 }
9547 }
9548
9549 // fold (not (neg x)) -> (add X, -1)
9550 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9551 // Y is a constant or the subtract has a single use.
9552 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9553 isNullConstant(N0.getOperand(0))) {
9554 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9555 DAG.getAllOnesConstant(DL, VT));
9556 }
9557
9558 // fold (not (add X, -1)) -> (neg X)
9559 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9561 return DAG.getNegative(N0.getOperand(0), DL, VT);
9562 }
9563
9564 // fold (xor (and x, y), y) -> (and (not x), y)
9565 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9566 SDValue X = N0.getOperand(0);
9567 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9568 AddToWorklist(NotX.getNode());
9569 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9570 }
9571
9572 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9573 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9574 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9575 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9576 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9577 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9578 SDValue S0 = S.getOperand(0);
9579 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9581 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9582 return DAG.getNode(ISD::ABS, DL, VT, S0);
9583 }
9584 }
9585
9586 // fold (xor x, x) -> 0
9587 if (N0 == N1)
9588 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9589
9590 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9591 // Here is a concrete example of this equivalence:
9592 // i16 x == 14
9593 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9594 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9595 //
9596 // =>
9597 //
9598 // i16 ~1 == 0b1111111111111110
9599 // i16 rol(~1, 14) == 0b1011111111111111
9600 //
9601 // Some additional tips to help conceptualize this transform:
9602 // - Try to see the operation as placing a single zero in a value of all ones.
9603 // - There exists no value for x which would allow the result to contain zero.
9604 // - Values of x larger than the bitwidth are undefined and do not require a
9605 // consistent result.
9606 // - Pushing the zero left requires shifting one bits in from the right.
9607 // A rotate left of ~1 is a nice way of achieving the desired result.
9608 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9610 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9611 N0.getOperand(1));
9612 }
9613
9614 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9615 if (N0Opcode == N1.getOpcode())
9616 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9617 return V;
9618
9619 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9620 return R;
9621 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9622 return R;
9623 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9624 return R;
9625
9626 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9627 if (SDValue MM = unfoldMaskedMerge(N))
9628 return MM;
9629
9630 // Simplify the expression using non-local knowledge.
9632 return SDValue(N, 0);
9633
9634 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9635 return Combined;
9636
9637 return SDValue();
9638}
9639
9640/// If we have a shift-by-constant of a bitwise logic op that itself has a
9641/// shift-by-constant operand with identical opcode, we may be able to convert
9642/// that into 2 independent shifts followed by the logic op. This is a
9643/// throughput improvement.
9645 // Match a one-use bitwise logic op.
9646 SDValue LogicOp = Shift->getOperand(0);
9647 if (!LogicOp.hasOneUse())
9648 return SDValue();
9649
9650 unsigned LogicOpcode = LogicOp.getOpcode();
9651 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9652 LogicOpcode != ISD::XOR)
9653 return SDValue();
9654
9655 // Find a matching one-use shift by constant.
9656 unsigned ShiftOpcode = Shift->getOpcode();
9657 SDValue C1 = Shift->getOperand(1);
9658 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9659 assert(C1Node && "Expected a shift with constant operand");
9660 const APInt &C1Val = C1Node->getAPIntValue();
9661 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9662 const APInt *&ShiftAmtVal) {
9663 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9664 return false;
9665
9666 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9667 if (!ShiftCNode)
9668 return false;
9669
9670 // Capture the shifted operand and shift amount value.
9671 ShiftOp = V.getOperand(0);
9672 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9673
9674 // Shift amount types do not have to match their operand type, so check that
9675 // the constants are the same width.
9676 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9677 return false;
9678
9679 // The fold is not valid if the sum of the shift values doesn't fit in the
9680 // given shift amount type.
9681 bool Overflow = false;
9682 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9683 if (Overflow)
9684 return false;
9685
9686 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9687 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9688 return false;
9689
9690 return true;
9691 };
9692
9693 // Logic ops are commutative, so check each operand for a match.
9694 SDValue X, Y;
9695 const APInt *C0Val;
9696 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9697 Y = LogicOp.getOperand(1);
9698 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9699 Y = LogicOp.getOperand(0);
9700 else
9701 return SDValue();
9702
9703 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9704 SDLoc DL(Shift);
9705 EVT VT = Shift->getValueType(0);
9706 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9707 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9708 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9709 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9710 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9711 LogicOp->getFlags());
9712}
9713
9714/// Handle transforms common to the three shifts, when the shift amount is a
9715/// constant.
9716/// We are looking for: (shift being one of shl/sra/srl)
9717/// shift (binop X, C0), C1
9718/// And want to transform into:
9719/// binop (shift X, C1), (shift C0, C1)
9720SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9721 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9722
9723 // Do not turn a 'not' into a regular xor.
9724 if (isBitwiseNot(N->getOperand(0)))
9725 return SDValue();
9726
9727 // The inner binop must be one-use, since we want to replace it.
9728 SDValue LHS = N->getOperand(0);
9729 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9730 return SDValue();
9731
9732 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9733 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9734 return R;
9735
9736 // We want to pull some binops through shifts, so that we have (and (shift))
9737 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9738 // thing happens with address calculations, so it's important to canonicalize
9739 // it.
9740 switch (LHS.getOpcode()) {
9741 default:
9742 return SDValue();
9743 case ISD::OR:
9744 case ISD::XOR:
9745 case ISD::AND:
9746 break;
9747 case ISD::ADD:
9748 if (N->getOpcode() != ISD::SHL)
9749 return SDValue(); // only shl(add) not sr[al](add).
9750 break;
9751 }
9752
9753 // FIXME: disable this unless the input to the binop is a shift by a constant
9754 // or is copy/select. Enable this in other cases when figure out it's exactly
9755 // profitable.
9756 SDValue BinOpLHSVal = LHS.getOperand(0);
9757 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9758 BinOpLHSVal.getOpcode() == ISD::SRA ||
9759 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9760 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9761 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9762 BinOpLHSVal.getOpcode() == ISD::SELECT;
9763
9764 if (!IsShiftByConstant && !IsCopyOrSelect)
9765 return SDValue();
9766
9767 if (IsCopyOrSelect && N->hasOneUse())
9768 return SDValue();
9769
9770 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9771 SDLoc DL(N);
9772 EVT VT = N->getValueType(0);
9773 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9774 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9775 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9776 N->getOperand(1));
9777 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9778 }
9779
9780 return SDValue();
9781}
9782
9783SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9784 assert(N->getOpcode() == ISD::TRUNCATE);
9785 assert(N->getOperand(0).getOpcode() == ISD::AND);
9786
9787 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9788 EVT TruncVT = N->getValueType(0);
9789 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9790 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9791 SDValue N01 = N->getOperand(0).getOperand(1);
9792 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9793 SDLoc DL(N);
9794 SDValue N00 = N->getOperand(0).getOperand(0);
9795 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9796 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9797 AddToWorklist(Trunc00.getNode());
9798 AddToWorklist(Trunc01.getNode());
9799 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9800 }
9801 }
9802
9803 return SDValue();
9804}
9805
9806SDValue DAGCombiner::visitRotate(SDNode *N) {
9807 SDLoc dl(N);
9808 SDValue N0 = N->getOperand(0);
9809 SDValue N1 = N->getOperand(1);
9810 EVT VT = N->getValueType(0);
9811 unsigned Bitsize = VT.getScalarSizeInBits();
9812
9813 // fold (rot x, 0) -> x
9814 if (isNullOrNullSplat(N1))
9815 return N0;
9816
9817 // fold (rot x, c) -> x iff (c % BitSize) == 0
9818 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9819 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9820 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9821 return N0;
9822 }
9823
9824 // fold (rot x, c) -> (rot x, c % BitSize)
9825 bool OutOfRange = false;
9826 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9827 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9828 return true;
9829 };
9830 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9831 EVT AmtVT = N1.getValueType();
9832 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9833 if (SDValue Amt =
9834 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9835 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9836 }
9837
9838 // rot i16 X, 8 --> bswap X
9839 auto *RotAmtC = isConstOrConstSplat(N1);
9840 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9841 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9842 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9843
9844 // Simplify the operands using demanded-bits information.
9846 return SDValue(N, 0);
9847
9848 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9849 if (N1.getOpcode() == ISD::TRUNCATE &&
9850 N1.getOperand(0).getOpcode() == ISD::AND) {
9851 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9852 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9853 }
9854
9855 unsigned NextOp = N0.getOpcode();
9856
9857 // fold (rot* (rot* x, c2), c1)
9858 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9859 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9862 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9863 EVT ShiftVT = C1->getValueType(0);
9864 bool SameSide = (N->getOpcode() == NextOp);
9865 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9866 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9867 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9868 {N1, BitsizeC});
9869 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9870 {N0.getOperand(1), BitsizeC});
9871 if (Norm1 && Norm2)
9872 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9873 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9874 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9875 {CombinedShift, BitsizeC});
9876 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9877 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9878 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9879 CombinedShiftNorm);
9880 }
9881 }
9882 }
9883 return SDValue();
9884}
9885
9886SDValue DAGCombiner::visitSHL(SDNode *N) {
9887 SDValue N0 = N->getOperand(0);
9888 SDValue N1 = N->getOperand(1);
9889 if (SDValue V = DAG.simplifyShift(N0, N1))
9890 return V;
9891
9892 SDLoc DL(N);
9893 EVT VT = N0.getValueType();
9894 EVT ShiftVT = N1.getValueType();
9895 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9896
9897 // fold (shl c1, c2) -> c1<<c2
9898 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
9899 return C;
9900
9901 // fold vector ops
9902 if (VT.isVector()) {
9903 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9904 return FoldedVOp;
9905
9906 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9907 // If setcc produces all-one true value then:
9908 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9909 if (N1CV && N1CV->isConstant()) {
9910 if (N0.getOpcode() == ISD::AND) {
9911 SDValue N00 = N0->getOperand(0);
9912 SDValue N01 = N0->getOperand(1);
9913 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9914
9915 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9918 if (SDValue C =
9919 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
9920 return DAG.getNode(ISD::AND, DL, VT, N00, C);
9921 }
9922 }
9923 }
9924 }
9925
9926 if (SDValue NewSel = foldBinOpIntoSelect(N))
9927 return NewSel;
9928
9929 // if (shl x, c) is known to be zero, return 0
9930 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9931 return DAG.getConstant(0, DL, VT);
9932
9933 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9934 if (N1.getOpcode() == ISD::TRUNCATE &&
9935 N1.getOperand(0).getOpcode() == ISD::AND) {
9936 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9937 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
9938 }
9939
9940 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9941 if (N0.getOpcode() == ISD::SHL) {
9942 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9944 APInt c1 = LHS->getAPIntValue();
9945 APInt c2 = RHS->getAPIntValue();
9946 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9947 return (c1 + c2).uge(OpSizeInBits);
9948 };
9949 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9950 return DAG.getConstant(0, DL, VT);
9951
9952 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9954 APInt c1 = LHS->getAPIntValue();
9955 APInt c2 = RHS->getAPIntValue();
9956 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9957 return (c1 + c2).ult(OpSizeInBits);
9958 };
9959 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9960 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9961 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9962 }
9963 }
9964
9965 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9966 // For this to be valid, the second form must not preserve any of the bits
9967 // that are shifted out by the inner shift in the first form. This means
9968 // the outer shift size must be >= the number of bits added by the ext.
9969 // As a corollary, we don't care what kind of ext it is.
9970 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9971 N0.getOpcode() == ISD::ANY_EXTEND ||
9972 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9973 N0.getOperand(0).getOpcode() == ISD::SHL) {
9974 SDValue N0Op0 = N0.getOperand(0);
9975 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9976 EVT InnerVT = N0Op0.getValueType();
9977 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9978
9979 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9981 APInt c1 = LHS->getAPIntValue();
9982 APInt c2 = RHS->getAPIntValue();
9983 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9984 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9985 (c1 + c2).uge(OpSizeInBits);
9986 };
9987 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9988 /*AllowUndefs*/ false,
9989 /*AllowTypeMismatch*/ true))
9990 return DAG.getConstant(0, DL, VT);
9991
9992 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9994 APInt c1 = LHS->getAPIntValue();
9995 APInt c2 = RHS->getAPIntValue();
9996 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9997 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9998 (c1 + c2).ult(OpSizeInBits);
9999 };
10000 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10001 /*AllowUndefs*/ false,
10002 /*AllowTypeMismatch*/ true)) {
10003 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10004 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10005 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10006 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10007 }
10008 }
10009
10010 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10011 // Only fold this if the inner zext has no other uses to avoid increasing
10012 // the total number of instructions.
10013 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10014 N0.getOperand(0).getOpcode() == ISD::SRL) {
10015 SDValue N0Op0 = N0.getOperand(0);
10016 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10017
10018 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10019 APInt c1 = LHS->getAPIntValue();
10020 APInt c2 = RHS->getAPIntValue();
10021 zeroExtendToMatch(c1, c2);
10022 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10023 };
10024 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10025 /*AllowUndefs*/ false,
10026 /*AllowTypeMismatch*/ true)) {
10027 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10028 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10029 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10030 AddToWorklist(NewSHL.getNode());
10031 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10032 }
10033 }
10034
10035 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10036 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10038 const APInt &LHSC = LHS->getAPIntValue();
10039 const APInt &RHSC = RHS->getAPIntValue();
10040 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10041 LHSC.getZExtValue() <= RHSC.getZExtValue();
10042 };
10043
10044 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10045 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10046 if (N0->getFlags().hasExact()) {
10047 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10048 /*AllowUndefs*/ false,
10049 /*AllowTypeMismatch*/ true)) {
10050 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10051 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10052 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10053 }
10054 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10055 /*AllowUndefs*/ false,
10056 /*AllowTypeMismatch*/ true)) {
10057 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10058 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10059 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10060 }
10061 }
10062
10063 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10064 // (and (srl x, (sub c1, c2), MASK)
10065 // Only fold this if the inner shift has no other uses -- if it does,
10066 // folding this will increase the total number of instructions.
10067 if (N0.getOpcode() == ISD::SRL &&
10068 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10070 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10071 /*AllowUndefs*/ false,
10072 /*AllowTypeMismatch*/ true)) {
10073 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10074 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10075 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10076 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10077 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10078 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10079 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10080 }
10081 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10082 /*AllowUndefs*/ false,
10083 /*AllowTypeMismatch*/ true)) {
10084 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10085 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10086 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10087 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10088 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10089 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10090 }
10091 }
10092 }
10093
10094 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10095 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10096 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10097 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10098 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10099 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10100 }
10101
10102 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10103 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10104 // Variant of version done on multiply, except mul by a power of 2 is turned
10105 // into a shift.
10106 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10107 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10108 SDValue N01 = N0.getOperand(1);
10109 if (SDValue Shl1 =
10110 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10111 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10112 AddToWorklist(Shl0.getNode());
10114 // Preserve the disjoint flag for Or.
10115 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10116 Flags.setDisjoint(true);
10117 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10118 }
10119 }
10120
10121 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10122 // TODO: Add zext/add_nuw variant with suitable test coverage
10123 // TODO: Should we limit this with isLegalAddImmediate?
10124 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10125 N0.getOperand(0).getOpcode() == ISD::ADD &&
10126 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10127 N0.getOperand(0)->hasOneUse() &&
10128 TLI.isDesirableToCommuteWithShift(N, Level)) {
10129 SDValue Add = N0.getOperand(0);
10130 SDLoc DL(N0);
10131 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10132 {Add.getOperand(1)})) {
10133 if (SDValue ShlC =
10134 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10135 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10136 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10137 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10138 }
10139 }
10140 }
10141
10142 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10143 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10144 SDValue N01 = N0.getOperand(1);
10145 if (SDValue Shl =
10146 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10147 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10148 }
10149
10151 if (N1C && !N1C->isOpaque())
10152 if (SDValue NewSHL = visitShiftByConstant(N))
10153 return NewSHL;
10154
10155 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10156 // target.
10157 if (((N1.getOpcode() == ISD::CTTZ &&
10158 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10159 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10160 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10162 SDValue Y = N1.getOperand(0);
10163 SDLoc DL(N);
10164 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10165 SDValue And =
10166 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10167 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10168 }
10169
10171 return SDValue(N, 0);
10172
10173 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10174 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10175 const APInt &C0 = N0.getConstantOperandAPInt(0);
10176 const APInt &C1 = N1C->getAPIntValue();
10177 return DAG.getVScale(DL, VT, C0 << C1);
10178 }
10179
10180 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10181 APInt ShlVal;
10182 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10183 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10184 const APInt &C0 = N0.getConstantOperandAPInt(0);
10185 if (ShlVal.ult(C0.getBitWidth())) {
10186 APInt NewStep = C0 << ShlVal;
10187 return DAG.getStepVector(DL, VT, NewStep);
10188 }
10189 }
10190
10191 return SDValue();
10192}
10193
10194// Transform a right shift of a multiply into a multiply-high.
10195// Examples:
10196// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10197// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10199 const TargetLowering &TLI) {
10200 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10201 "SRL or SRA node is required here!");
10202
10203 // Check the shift amount. Proceed with the transformation if the shift
10204 // amount is constant.
10205 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10206 if (!ShiftAmtSrc)
10207 return SDValue();
10208
10209 // The operation feeding into the shift must be a multiply.
10210 SDValue ShiftOperand = N->getOperand(0);
10211 if (ShiftOperand.getOpcode() != ISD::MUL)
10212 return SDValue();
10213
10214 // Both operands must be equivalent extend nodes.
10215 SDValue LeftOp = ShiftOperand.getOperand(0);
10216 SDValue RightOp = ShiftOperand.getOperand(1);
10217
10218 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10219 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10220
10221 if (!IsSignExt && !IsZeroExt)
10222 return SDValue();
10223
10224 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10225 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10226
10227 // return true if U may use the lower bits of its operands
10228 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10229 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10230 return true;
10231 }
10232 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10233 if (!UShiftAmtSrc) {
10234 return true;
10235 }
10236 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10237 return UShiftAmt < NarrowVTSize;
10238 };
10239
10240 // If the lower part of the MUL is also used and MUL_LOHI is supported
10241 // do not introduce the MULH in favor of MUL_LOHI
10242 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10243 if (!ShiftOperand.hasOneUse() &&
10244 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10245 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10246 return SDValue();
10247 }
10248
10249 SDValue MulhRightOp;
10251 unsigned ActiveBits = IsSignExt
10252 ? Constant->getAPIntValue().getSignificantBits()
10253 : Constant->getAPIntValue().getActiveBits();
10254 if (ActiveBits > NarrowVTSize)
10255 return SDValue();
10256 MulhRightOp = DAG.getConstant(
10257 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10258 NarrowVT);
10259 } else {
10260 if (LeftOp.getOpcode() != RightOp.getOpcode())
10261 return SDValue();
10262 // Check that the two extend nodes are the same type.
10263 if (NarrowVT != RightOp.getOperand(0).getValueType())
10264 return SDValue();
10265 MulhRightOp = RightOp.getOperand(0);
10266 }
10267
10268 EVT WideVT = LeftOp.getValueType();
10269 // Proceed with the transformation if the wide types match.
10270 assert((WideVT == RightOp.getValueType()) &&
10271 "Cannot have a multiply node with two different operand types.");
10272
10273 // Proceed with the transformation if the wide type is twice as large
10274 // as the narrow type.
10275 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10276 return SDValue();
10277
10278 // Check the shift amount with the narrow type size.
10279 // Proceed with the transformation if the shift amount is the width
10280 // of the narrow type.
10281 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10282 if (ShiftAmt != NarrowVTSize)
10283 return SDValue();
10284
10285 // If the operation feeding into the MUL is a sign extend (sext),
10286 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10287 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10288
10289 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10290 // or if it is a vector type then we could transform to an acceptable type and
10291 // rely on legalization to split/combine the result.
10292 if (NarrowVT.isVector()) {
10293 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10294 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10295 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10296 return SDValue();
10297 } else {
10298 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10299 return SDValue();
10300 }
10301
10302 SDValue Result =
10303 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10304 bool IsSigned = N->getOpcode() == ISD::SRA;
10305 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10306}
10307
10308// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10309// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10311 unsigned Opcode = N->getOpcode();
10312 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10313 return SDValue();
10314
10315 SDValue N0 = N->getOperand(0);
10316 EVT VT = N->getValueType(0);
10317 SDLoc DL(N);
10318 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10319 SDValue OldLHS = N0.getOperand(0);
10320 SDValue OldRHS = N0.getOperand(1);
10321
10322 // If both operands are bswap/bitreverse, ignore the multiuse
10323 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10324 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10325 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10326 OldRHS.getOperand(0));
10327 }
10328
10329 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10330 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10331 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10332 NewBitReorder);
10333 }
10334
10335 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10336 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10337 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10338 OldRHS.getOperand(0));
10339 }
10340 }
10341 return SDValue();
10342}
10343
10344SDValue DAGCombiner::visitSRA(SDNode *N) {
10345 SDValue N0 = N->getOperand(0);
10346 SDValue N1 = N->getOperand(1);
10347 if (SDValue V = DAG.simplifyShift(N0, N1))
10348 return V;
10349
10350 SDLoc DL(N);
10351 EVT VT = N0.getValueType();
10352 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10353
10354 // fold (sra c1, c2) -> (sra c1, c2)
10355 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10356 return C;
10357
10358 // Arithmetic shifting an all-sign-bit value is a no-op.
10359 // fold (sra 0, x) -> 0
10360 // fold (sra -1, x) -> -1
10361 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10362 return N0;
10363
10364 // fold vector ops
10365 if (VT.isVector())
10366 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10367 return FoldedVOp;
10368
10369 if (SDValue NewSel = foldBinOpIntoSelect(N))
10370 return NewSel;
10371
10373
10374 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10375 // clamp (add c1, c2) to max shift.
10376 if (N0.getOpcode() == ISD::SRA) {
10377 EVT ShiftVT = N1.getValueType();
10378 EVT ShiftSVT = ShiftVT.getScalarType();
10379 SmallVector<SDValue, 16> ShiftValues;
10380
10381 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10382 APInt c1 = LHS->getAPIntValue();
10383 APInt c2 = RHS->getAPIntValue();
10384 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10385 APInt Sum = c1 + c2;
10386 unsigned ShiftSum =
10387 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10388 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10389 return true;
10390 };
10391 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10392 SDValue ShiftValue;
10393 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10394 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10395 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10396 assert(ShiftValues.size() == 1 &&
10397 "Expected matchBinaryPredicate to return one element for "
10398 "SPLAT_VECTORs");
10399 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10400 } else
10401 ShiftValue = ShiftValues[0];
10402 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10403 }
10404 }
10405
10406 // fold (sra (shl X, m), (sub result_size, n))
10407 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10408 // result_size - n != m.
10409 // If truncate is free for the target sext(shl) is likely to result in better
10410 // code.
10411 if (N0.getOpcode() == ISD::SHL && N1C) {
10412 // Get the two constants of the shifts, CN0 = m, CN = n.
10413 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10414 if (N01C) {
10415 LLVMContext &Ctx = *DAG.getContext();
10416 // Determine what the truncate's result bitsize and type would be.
10417 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10418
10419 if (VT.isVector())
10420 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10421
10422 // Determine the residual right-shift amount.
10423 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10424
10425 // If the shift is not a no-op (in which case this should be just a sign
10426 // extend already), the truncated to type is legal, sign_extend is legal
10427 // on that type, and the truncate to that type is both legal and free,
10428 // perform the transform.
10429 if ((ShiftAmt > 0) &&
10432 TLI.isTruncateFree(VT, TruncVT)) {
10433 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10435 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10436 N0.getOperand(0), Amt);
10437 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10438 Shift);
10439 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10440 N->getValueType(0), Trunc);
10441 }
10442 }
10443 }
10444
10445 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10446 // sra (add (shl X, N1C), AddC), N1C -->
10447 // sext (add (trunc X to (width - N1C)), AddC')
10448 // sra (sub AddC, (shl X, N1C)), N1C -->
10449 // sext (sub AddC1',(trunc X to (width - N1C)))
10450 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10451 N0.hasOneUse()) {
10452 bool IsAdd = N0.getOpcode() == ISD::ADD;
10453 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10454 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10455 Shl.hasOneUse()) {
10456 // TODO: AddC does not need to be a splat.
10457 if (ConstantSDNode *AddC =
10458 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10459 // Determine what the truncate's type would be and ask the target if
10460 // that is a free operation.
10461 LLVMContext &Ctx = *DAG.getContext();
10462 unsigned ShiftAmt = N1C->getZExtValue();
10463 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10464 if (VT.isVector())
10465 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10466
10467 // TODO: The simple type check probably belongs in the default hook
10468 // implementation and/or target-specific overrides (because
10469 // non-simple types likely require masking when legalized), but
10470 // that restriction may conflict with other transforms.
10471 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10472 TLI.isTruncateFree(VT, TruncVT)) {
10473 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10474 SDValue ShiftC =
10475 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10476 TruncVT.getScalarSizeInBits()),
10477 DL, TruncVT);
10478 SDValue Add;
10479 if (IsAdd)
10480 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10481 else
10482 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10483 return DAG.getSExtOrTrunc(Add, DL, VT);
10484 }
10485 }
10486 }
10487 }
10488
10489 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10490 if (N1.getOpcode() == ISD::TRUNCATE &&
10491 N1.getOperand(0).getOpcode() == ISD::AND) {
10492 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10493 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10494 }
10495
10496 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10497 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10498 // if c1 is equal to the number of bits the trunc removes
10499 // TODO - support non-uniform vector shift amounts.
10500 if (N0.getOpcode() == ISD::TRUNCATE &&
10501 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10502 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10503 N0.getOperand(0).hasOneUse() &&
10504 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10505 SDValue N0Op0 = N0.getOperand(0);
10506 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10507 EVT LargeVT = N0Op0.getValueType();
10508 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10509 if (LargeShift->getAPIntValue() == TruncBits) {
10510 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10511 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10512 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10513 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10514 SDValue SRA =
10515 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10516 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10517 }
10518 }
10519 }
10520
10521 // Simplify, based on bits shifted out of the LHS.
10523 return SDValue(N, 0);
10524
10525 // If the sign bit is known to be zero, switch this to a SRL.
10526 if (DAG.SignBitIsZero(N0))
10527 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10528
10529 if (N1C && !N1C->isOpaque())
10530 if (SDValue NewSRA = visitShiftByConstant(N))
10531 return NewSRA;
10532
10533 // Try to transform this shift into a multiply-high if
10534 // it matches the appropriate pattern detected in combineShiftToMULH.
10535 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10536 return MULH;
10537
10538 // Attempt to convert a sra of a load into a narrower sign-extending load.
10539 if (SDValue NarrowLoad = reduceLoadWidth(N))
10540 return NarrowLoad;
10541
10542 return SDValue();
10543}
10544
10545SDValue DAGCombiner::visitSRL(SDNode *N) {
10546 SDValue N0 = N->getOperand(0);
10547 SDValue N1 = N->getOperand(1);
10548 if (SDValue V = DAG.simplifyShift(N0, N1))
10549 return V;
10550
10551 SDLoc DL(N);
10552 EVT VT = N0.getValueType();
10553 EVT ShiftVT = N1.getValueType();
10554 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10555
10556 // fold (srl c1, c2) -> c1 >>u c2
10557 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10558 return C;
10559
10560 // fold vector ops
10561 if (VT.isVector())
10562 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10563 return FoldedVOp;
10564
10565 if (SDValue NewSel = foldBinOpIntoSelect(N))
10566 return NewSel;
10567
10568 // if (srl x, c) is known to be zero, return 0
10570 if (N1C &&
10571 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10572 return DAG.getConstant(0, DL, VT);
10573
10574 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10575 if (N0.getOpcode() == ISD::SRL) {
10576 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10578 APInt c1 = LHS->getAPIntValue();
10579 APInt c2 = RHS->getAPIntValue();
10580 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10581 return (c1 + c2).uge(OpSizeInBits);
10582 };
10583 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10584 return DAG.getConstant(0, DL, VT);
10585
10586 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10588 APInt c1 = LHS->getAPIntValue();
10589 APInt c2 = RHS->getAPIntValue();
10590 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10591 return (c1 + c2).ult(OpSizeInBits);
10592 };
10593 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10594 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10595 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10596 }
10597 }
10598
10599 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10600 N0.getOperand(0).getOpcode() == ISD::SRL) {
10601 SDValue InnerShift = N0.getOperand(0);
10602 // TODO - support non-uniform vector shift amounts.
10603 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10604 uint64_t c1 = N001C->getZExtValue();
10605 uint64_t c2 = N1C->getZExtValue();
10606 EVT InnerShiftVT = InnerShift.getValueType();
10607 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10608 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10609 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10610 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10611 if (c1 + OpSizeInBits == InnerShiftSize) {
10612 if (c1 + c2 >= InnerShiftSize)
10613 return DAG.getConstant(0, DL, VT);
10614 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10615 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10616 InnerShift.getOperand(0), NewShiftAmt);
10617 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10618 }
10619 // In the more general case, we can clear the high bits after the shift:
10620 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10621 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10622 c1 + c2 < InnerShiftSize) {
10623 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10624 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10625 InnerShift.getOperand(0), NewShiftAmt);
10626 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10627 OpSizeInBits - c2),
10628 DL, InnerShiftVT);
10629 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10630 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10631 }
10632 }
10633 }
10634
10635 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10636 // (and (srl x, (sub c2, c1), MASK)
10637 if (N0.getOpcode() == ISD::SHL &&
10638 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10640 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10642 const APInt &LHSC = LHS->getAPIntValue();
10643 const APInt &RHSC = RHS->getAPIntValue();
10644 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10645 LHSC.getZExtValue() <= RHSC.getZExtValue();
10646 };
10647 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10648 /*AllowUndefs*/ false,
10649 /*AllowTypeMismatch*/ true)) {
10650 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10651 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10652 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10653 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10654 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10655 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10656 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10657 }
10658 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10659 /*AllowUndefs*/ false,
10660 /*AllowTypeMismatch*/ true)) {
10661 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10662 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10663 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10664 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10665 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10666 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10667 }
10668 }
10669
10670 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10671 // TODO - support non-uniform vector shift amounts.
10672 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10673 // Shifting in all undef bits?
10674 EVT SmallVT = N0.getOperand(0).getValueType();
10675 unsigned BitSize = SmallVT.getScalarSizeInBits();
10676 if (N1C->getAPIntValue().uge(BitSize))
10677 return DAG.getUNDEF(VT);
10678
10679 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10680 uint64_t ShiftAmt = N1C->getZExtValue();
10681 SDLoc DL0(N0);
10682 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10683 N0.getOperand(0),
10684 DAG.getConstant(ShiftAmt, DL0,
10685 getShiftAmountTy(SmallVT)));
10686 AddToWorklist(SmallShift.getNode());
10687 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10688 return DAG.getNode(ISD::AND, DL, VT,
10689 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10690 DAG.getConstant(Mask, DL, VT));
10691 }
10692 }
10693
10694 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10695 // bit, which is unmodified by sra.
10696 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10697 if (N0.getOpcode() == ISD::SRA)
10698 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10699 }
10700
10701 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10702 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10703 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10704 isPowerOf2_32(OpSizeInBits) &&
10705 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10706 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10707
10708 // If any of the input bits are KnownOne, then the input couldn't be all
10709 // zeros, thus the result of the srl will always be zero.
10710 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10711
10712 // If all of the bits input the to ctlz node are known to be zero, then
10713 // the result of the ctlz is "32" and the result of the shift is one.
10714 APInt UnknownBits = ~Known.Zero;
10715 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10716
10717 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10718 if (UnknownBits.isPowerOf2()) {
10719 // Okay, we know that only that the single bit specified by UnknownBits
10720 // could be set on input to the CTLZ node. If this bit is set, the SRL
10721 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10722 // to an SRL/XOR pair, which is likely to simplify more.
10723 unsigned ShAmt = UnknownBits.countr_zero();
10724 SDValue Op = N0.getOperand(0);
10725
10726 if (ShAmt) {
10727 SDLoc DL(N0);
10728 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10729 DAG.getConstant(ShAmt, DL,
10730 getShiftAmountTy(Op.getValueType())));
10731 AddToWorklist(Op.getNode());
10732 }
10733 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10734 }
10735 }
10736
10737 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10738 if (N1.getOpcode() == ISD::TRUNCATE &&
10739 N1.getOperand(0).getOpcode() == ISD::AND) {
10740 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10741 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10742 }
10743
10744 // fold operands of srl based on knowledge that the low bits are not
10745 // demanded.
10747 return SDValue(N, 0);
10748
10749 if (N1C && !N1C->isOpaque())
10750 if (SDValue NewSRL = visitShiftByConstant(N))
10751 return NewSRL;
10752
10753 // Attempt to convert a srl of a load into a narrower zero-extending load.
10754 if (SDValue NarrowLoad = reduceLoadWidth(N))
10755 return NarrowLoad;
10756
10757 // Here is a common situation. We want to optimize:
10758 //
10759 // %a = ...
10760 // %b = and i32 %a, 2
10761 // %c = srl i32 %b, 1
10762 // brcond i32 %c ...
10763 //
10764 // into
10765 //
10766 // %a = ...
10767 // %b = and %a, 2
10768 // %c = setcc eq %b, 0
10769 // brcond %c ...
10770 //
10771 // However when after the source operand of SRL is optimized into AND, the SRL
10772 // itself may not be optimized further. Look for it and add the BRCOND into
10773 // the worklist.
10774 //
10775 // The also tends to happen for binary operations when SimplifyDemandedBits
10776 // is involved.
10777 //
10778 // FIXME: This is unecessary if we process the DAG in topological order,
10779 // which we plan to do. This workaround can be removed once the DAG is
10780 // processed in topological order.
10781 if (N->hasOneUse()) {
10782 SDNode *Use = *N->use_begin();
10783
10784 // Look pass the truncate.
10785 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10786 Use = *Use->use_begin();
10787
10788 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10789 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10790 AddToWorklist(Use);
10791 }
10792
10793 // Try to transform this shift into a multiply-high if
10794 // it matches the appropriate pattern detected in combineShiftToMULH.
10795 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10796 return MULH;
10797
10798 return SDValue();
10799}
10800
10801SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10802 EVT VT = N->getValueType(0);
10803 SDValue N0 = N->getOperand(0);
10804 SDValue N1 = N->getOperand(1);
10805 SDValue N2 = N->getOperand(2);
10806 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10807 unsigned BitWidth = VT.getScalarSizeInBits();
10808 SDLoc DL(N);
10809
10810 // fold (fshl N0, N1, 0) -> N0
10811 // fold (fshr N0, N1, 0) -> N1
10813 if (DAG.MaskedValueIsZero(
10814 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10815 return IsFSHL ? N0 : N1;
10816
10817 auto IsUndefOrZero = [](SDValue V) {
10818 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10819 };
10820
10821 // TODO - support non-uniform vector shift amounts.
10822 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10823 EVT ShAmtTy = N2.getValueType();
10824
10825 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10826 if (Cst->getAPIntValue().uge(BitWidth)) {
10827 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10828 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10829 DAG.getConstant(RotAmt, DL, ShAmtTy));
10830 }
10831
10832 unsigned ShAmt = Cst->getZExtValue();
10833 if (ShAmt == 0)
10834 return IsFSHL ? N0 : N1;
10835
10836 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10837 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10838 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10839 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10840 if (IsUndefOrZero(N0))
10841 return DAG.getNode(
10842 ISD::SRL, DL, VT, N1,
10843 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10844 if (IsUndefOrZero(N1))
10845 return DAG.getNode(
10846 ISD::SHL, DL, VT, N0,
10847 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10848
10849 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10850 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10851 // TODO - bigendian support once we have test coverage.
10852 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10853 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10854 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10855 !DAG.getDataLayout().isBigEndian()) {
10856 auto *LHS = dyn_cast<LoadSDNode>(N0);
10857 auto *RHS = dyn_cast<LoadSDNode>(N1);
10858 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10859 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10860 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10861 ISD::isNON_EXTLoad(LHS)) {
10862 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10863 SDLoc DL(RHS);
10864 uint64_t PtrOff =
10865 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10866 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10867 unsigned Fast = 0;
10868 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10869 RHS->getAddressSpace(), NewAlign,
10870 RHS->getMemOperand()->getFlags(), &Fast) &&
10871 Fast) {
10872 SDValue NewPtr = DAG.getMemBasePlusOffset(
10873 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10874 AddToWorklist(NewPtr.getNode());
10875 SDValue Load = DAG.getLoad(
10876 VT, DL, RHS->getChain(), NewPtr,
10877 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10878 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10879 // Replace the old load's chain with the new load's chain.
10880 WorklistRemover DeadNodes(*this);
10881 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10882 return Load;
10883 }
10884 }
10885 }
10886 }
10887 }
10888
10889 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10890 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10891 // iff We know the shift amount is in range.
10892 // TODO: when is it worth doing SUB(BW, N2) as well?
10893 if (isPowerOf2_32(BitWidth)) {
10894 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10895 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10896 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
10897 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10898 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
10899 }
10900
10901 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10902 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10903 // TODO: Investigate flipping this rotate if only one is legal.
10904 // If funnel shift is legal as well we might be better off avoiding
10905 // non-constant (BW - N2).
10906 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10907 if (N0 == N1 && hasOperation(RotOpc, VT))
10908 return DAG.getNode(RotOpc, DL, VT, N0, N2);
10909
10910 // Simplify, based on bits shifted out of N0/N1.
10912 return SDValue(N, 0);
10913
10914 return SDValue();
10915}
10916
10917SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10918 SDValue N0 = N->getOperand(0);
10919 SDValue N1 = N->getOperand(1);
10920 if (SDValue V = DAG.simplifyShift(N0, N1))
10921 return V;
10922
10923 SDLoc DL(N);
10924 EVT VT = N0.getValueType();
10925
10926 // fold (*shlsat c1, c2) -> c1<<c2
10927 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
10928 return C;
10929
10931
10932 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10933 // fold (sshlsat x, c) -> (shl x, c)
10934 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10935 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10936 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10937
10938 // fold (ushlsat x, c) -> (shl x, c)
10939 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10940 N1C->getAPIntValue().ule(
10942 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10943 }
10944
10945 return SDValue();
10946}
10947
10948// Given a ABS node, detect the following patterns:
10949// (ABS (SUB (EXTEND a), (EXTEND b))).
10950// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10951// Generates UABD/SABD instruction.
10952SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10953 EVT SrcVT = N->getValueType(0);
10954
10955 if (N->getOpcode() == ISD::TRUNCATE)
10956 N = N->getOperand(0).getNode();
10957
10958 if (N->getOpcode() != ISD::ABS)
10959 return SDValue();
10960
10961 EVT VT = N->getValueType(0);
10962 SDValue AbsOp1 = N->getOperand(0);
10963 SDValue Op0, Op1;
10964
10965 if (AbsOp1.getOpcode() != ISD::SUB)
10966 return SDValue();
10967
10968 Op0 = AbsOp1.getOperand(0);
10969 Op1 = AbsOp1.getOperand(1);
10970
10971 unsigned Opc0 = Op0.getOpcode();
10972
10973 // Check if the operands of the sub are (zero|sign)-extended.
10974 // TODO: Should we use ValueTracking instead?
10975 if (Opc0 != Op1.getOpcode() ||
10976 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10977 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10978 // fold (abs (sub nsw x, y)) -> abds(x, y)
10979 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10980 TLI.preferABDSToABSWithNSW(VT)) {
10981 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10982 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10983 }
10984 return SDValue();
10985 }
10986
10987 EVT VT0, VT1;
10988 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10989 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10990 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10991 } else {
10992 VT0 = Op0.getOperand(0).getValueType();
10993 VT1 = Op1.getOperand(0).getValueType();
10994 }
10995 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10996
10997 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10998 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10999 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11000 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11001 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
11002 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11003 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11004 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11005 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11006 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11007 }
11008
11009 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11010 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11011 if (hasOperation(ABDOpcode, VT)) {
11012 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11013 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11014 }
11015
11016 return SDValue();
11017}
11018
11019SDValue DAGCombiner::visitABS(SDNode *N) {
11020 SDValue N0 = N->getOperand(0);
11021 EVT VT = N->getValueType(0);
11022 SDLoc DL(N);
11023
11024 // fold (abs c1) -> c2
11025 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11026 return C;
11027 // fold (abs (abs x)) -> (abs x)
11028 if (N0.getOpcode() == ISD::ABS)
11029 return N0;
11030 // fold (abs x) -> x iff not-negative
11031 if (DAG.SignBitIsZero(N0))
11032 return N0;
11033
11034 if (SDValue ABD = foldABSToABD(N, DL))
11035 return ABD;
11036
11037 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11038 // iff zero_extend/truncate are free.
11039 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11040 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11041 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11042 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11043 hasOperation(ISD::ABS, ExtVT)) {
11044 return DAG.getNode(
11045 ISD::ZERO_EXTEND, DL, VT,
11046 DAG.getNode(ISD::ABS, DL, ExtVT,
11047 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11048 }
11049 }
11050
11051 return SDValue();
11052}
11053
11054SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11055 SDValue N0 = N->getOperand(0);
11056 EVT VT = N->getValueType(0);
11057 SDLoc DL(N);
11058
11059 // fold (bswap c1) -> c2
11060 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11061 return C;
11062 // fold (bswap (bswap x)) -> x
11063 if (N0.getOpcode() == ISD::BSWAP)
11064 return N0.getOperand(0);
11065
11066 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11067 // isn't supported, it will be expanded to bswap followed by a manual reversal
11068 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11069 // the two bswaps if the bitreverse gets expanded.
11070 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11071 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11072 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11073 }
11074
11075 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11076 // iff x >= bw/2 (i.e. lower half is known zero)
11077 unsigned BW = VT.getScalarSizeInBits();
11078 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11079 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11080 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11081 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11082 ShAmt->getZExtValue() >= (BW / 2) &&
11083 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11084 TLI.isTruncateFree(VT, HalfVT) &&
11085 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11086 SDValue Res = N0.getOperand(0);
11087 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11088 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11089 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
11090 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11091 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11092 return DAG.getZExtOrTrunc(Res, DL, VT);
11093 }
11094 }
11095
11096 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11097 // inverse-shift-of-bswap:
11098 // bswap (X u<< C) --> (bswap X) u>> C
11099 // bswap (X u>> C) --> (bswap X) u<< C
11100 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11101 N0.hasOneUse()) {
11102 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11103 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11104 ShAmt->getZExtValue() % 8 == 0) {
11105 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11106 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11107 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11108 }
11109 }
11110
11111 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11112 return V;
11113
11114 return SDValue();
11115}
11116
11117SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11118 SDValue N0 = N->getOperand(0);
11119 EVT VT = N->getValueType(0);
11120 SDLoc DL(N);
11121
11122 // fold (bitreverse c1) -> c2
11123 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11124 return C;
11125
11126 // fold (bitreverse (bitreverse x)) -> x
11127 if (N0.getOpcode() == ISD::BITREVERSE)
11128 return N0.getOperand(0);
11129
11130 SDValue X, Y;
11131
11132 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11133 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11135 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11136
11137 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11138 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11140 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11141
11142 return SDValue();
11143}
11144
11145SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11146 SDValue N0 = N->getOperand(0);
11147 EVT VT = N->getValueType(0);
11148 SDLoc DL(N);
11149
11150 // fold (ctlz c1) -> c2
11151 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11152 return C;
11153
11154 // If the value is known never to be zero, switch to the undef version.
11155 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11156 if (DAG.isKnownNeverZero(N0))
11157 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11158
11159 return SDValue();
11160}
11161
11162SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11163 SDValue N0 = N->getOperand(0);
11164 EVT VT = N->getValueType(0);
11165 SDLoc DL(N);
11166
11167 // fold (ctlz_zero_undef c1) -> c2
11168 if (SDValue C =
11170 return C;
11171 return SDValue();
11172}
11173
11174SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11175 SDValue N0 = N->getOperand(0);
11176 EVT VT = N->getValueType(0);
11177 SDLoc DL(N);
11178
11179 // fold (cttz c1) -> c2
11180 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11181 return C;
11182
11183 // If the value is known never to be zero, switch to the undef version.
11184 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11185 if (DAG.isKnownNeverZero(N0))
11186 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11187
11188 return SDValue();
11189}
11190
11191SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11192 SDValue N0 = N->getOperand(0);
11193 EVT VT = N->getValueType(0);
11194 SDLoc DL(N);
11195
11196 // fold (cttz_zero_undef c1) -> c2
11197 if (SDValue C =
11199 return C;
11200 return SDValue();
11201}
11202
11203SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11204 SDValue N0 = N->getOperand(0);
11205 EVT VT = N->getValueType(0);
11206 unsigned NumBits = VT.getScalarSizeInBits();
11207 SDLoc DL(N);
11208
11209 // fold (ctpop c1) -> c2
11210 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11211 return C;
11212
11213 // If the source is being shifted, but doesn't affect any active bits,
11214 // then we can call CTPOP on the shift source directly.
11215 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11216 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11217 const APInt &Amt = AmtC->getAPIntValue();
11218 if (Amt.ult(NumBits)) {
11219 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11220 if ((N0.getOpcode() == ISD::SRL &&
11221 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11222 (N0.getOpcode() == ISD::SHL &&
11223 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11224 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11225 }
11226 }
11227 }
11228 }
11229
11230 // If the upper bits are known to be zero, then see if its profitable to
11231 // only count the lower bits.
11232 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11233 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11234 if (hasOperation(ISD::CTPOP, HalfVT) &&
11235 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11236 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11237 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11238 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11239 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11240 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11241 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11242 }
11243 }
11244 }
11245
11246 return SDValue();
11247}
11248
11250 SDValue RHS, const SDNodeFlags Flags,
11251 const TargetLowering &TLI) {
11252 EVT VT = LHS.getValueType();
11253 if (!VT.isFloatingPoint())
11254 return false;
11255
11256 const TargetOptions &Options = DAG.getTarget().Options;
11257
11258 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11260 (Flags.hasNoNaNs() ||
11261 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11262}
11263
11265 SDValue RHS, SDValue True, SDValue False,
11267 const TargetLowering &TLI,
11268 SelectionDAG &DAG) {
11269 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11270 switch (CC) {
11271 case ISD::SETOLT:
11272 case ISD::SETOLE:
11273 case ISD::SETLT:
11274 case ISD::SETLE:
11275 case ISD::SETULT:
11276 case ISD::SETULE: {
11277 // Since it's known never nan to get here already, either fminnum or
11278 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11279 // expanded in terms of it.
11280 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11281 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11282 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11283
11284 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11285 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11286 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11287 return SDValue();
11288 }
11289 case ISD::SETOGT:
11290 case ISD::SETOGE:
11291 case ISD::SETGT:
11292 case ISD::SETGE:
11293 case ISD::SETUGT:
11294 case ISD::SETUGE: {
11295 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11296 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11297 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11298
11299 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11300 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11301 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11302 return SDValue();
11303 }
11304 default:
11305 return SDValue();
11306 }
11307}
11308
11309/// Generate Min/Max node
11310SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11311 SDValue RHS, SDValue True,
11312 SDValue False, ISD::CondCode CC) {
11313 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11314 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11315
11316 // If we can't directly match this, try to see if we can pull an fneg out of
11317 // the select.
11319 True, DAG, LegalOperations, ForCodeSize);
11320 if (!NegTrue)
11321 return SDValue();
11322
11323 HandleSDNode NegTrueHandle(NegTrue);
11324
11325 // Try to unfold an fneg from the select if we are comparing the negated
11326 // constant.
11327 //
11328 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11329 //
11330 // TODO: Handle fabs
11331 if (LHS == NegTrue) {
11332 // If we can't directly match this, try to see if we can pull an fneg out of
11333 // the select.
11335 RHS, DAG, LegalOperations, ForCodeSize);
11336 if (NegRHS) {
11337 HandleSDNode NegRHSHandle(NegRHS);
11338 if (NegRHS == False) {
11339 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11340 False, CC, TLI, DAG);
11341 if (Combined)
11342 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11343 }
11344 }
11345 }
11346
11347 return SDValue();
11348}
11349
11350/// If a (v)select has a condition value that is a sign-bit test, try to smear
11351/// the condition operand sign-bit across the value width and use it as a mask.
11353 SelectionDAG &DAG) {
11354 SDValue Cond = N->getOperand(0);
11355 SDValue C1 = N->getOperand(1);
11356 SDValue C2 = N->getOperand(2);
11358 return SDValue();
11359
11360 EVT VT = N->getValueType(0);
11361 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11362 VT != Cond.getOperand(0).getValueType())
11363 return SDValue();
11364
11365 // The inverted-condition + commuted-select variants of these patterns are
11366 // canonicalized to these forms in IR.
11367 SDValue X = Cond.getOperand(0);
11368 SDValue CondC = Cond.getOperand(1);
11369 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11370 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11372 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11373 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11374 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11375 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11376 }
11377 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11378 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11379 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11380 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11381 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11382 }
11383 return SDValue();
11384}
11385
11387 const TargetLowering &TLI) {
11388 if (!TLI.convertSelectOfConstantsToMath(VT))
11389 return false;
11390
11391 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11392 return true;
11394 return true;
11395
11396 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11397 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11398 return true;
11399 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11400 return true;
11401
11402 return false;
11403}
11404
11405SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11406 SDValue Cond = N->getOperand(0);
11407 SDValue N1 = N->getOperand(1);
11408 SDValue N2 = N->getOperand(2);
11409 EVT VT = N->getValueType(0);
11410 EVT CondVT = Cond.getValueType();
11411 SDLoc DL(N);
11412
11413 if (!VT.isInteger())
11414 return SDValue();
11415
11416 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11417 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11418 if (!C1 || !C2)
11419 return SDValue();
11420
11421 if (CondVT != MVT::i1 || LegalOperations) {
11422 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11423 // We can't do this reliably if integer based booleans have different contents
11424 // to floating point based booleans. This is because we can't tell whether we
11425 // have an integer-based boolean or a floating-point-based boolean unless we
11426 // can find the SETCC that produced it and inspect its operands. This is
11427 // fairly easy if C is the SETCC node, but it can potentially be
11428 // undiscoverable (or not reasonably discoverable). For example, it could be
11429 // in another basic block or it could require searching a complicated
11430 // expression.
11431 if (CondVT.isInteger() &&
11432 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11434 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11436 C1->isZero() && C2->isOne()) {
11437 SDValue NotCond =
11438 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11439 if (VT.bitsEq(CondVT))
11440 return NotCond;
11441 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11442 }
11443
11444 return SDValue();
11445 }
11446
11447 // Only do this before legalization to avoid conflicting with target-specific
11448 // transforms in the other direction (create a select from a zext/sext). There
11449 // is also a target-independent combine here in DAGCombiner in the other
11450 // direction for (select Cond, -1, 0) when the condition is not i1.
11451 assert(CondVT == MVT::i1 && !LegalOperations);
11452
11453 // select Cond, 1, 0 --> zext (Cond)
11454 if (C1->isOne() && C2->isZero())
11455 return DAG.getZExtOrTrunc(Cond, DL, VT);
11456
11457 // select Cond, -1, 0 --> sext (Cond)
11458 if (C1->isAllOnes() && C2->isZero())
11459 return DAG.getSExtOrTrunc(Cond, DL, VT);
11460
11461 // select Cond, 0, 1 --> zext (!Cond)
11462 if (C1->isZero() && C2->isOne()) {
11463 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11464 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11465 return NotCond;
11466 }
11467
11468 // select Cond, 0, -1 --> sext (!Cond)
11469 if (C1->isZero() && C2->isAllOnes()) {
11470 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11471 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11472 return NotCond;
11473 }
11474
11475 // Use a target hook because some targets may prefer to transform in the
11476 // other direction.
11478 return SDValue();
11479
11480 // For any constants that differ by 1, we can transform the select into
11481 // an extend and add.
11482 const APInt &C1Val = C1->getAPIntValue();
11483 const APInt &C2Val = C2->getAPIntValue();
11484
11485 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11486 if (C1Val - 1 == C2Val) {
11487 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11488 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11489 }
11490
11491 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11492 if (C1Val + 1 == C2Val) {
11493 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11494 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11495 }
11496
11497 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11498 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11499 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11500 SDValue ShAmtC =
11501 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11502 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11503 }
11504
11505 // select Cond, -1, C --> or (sext Cond), C
11506 if (C1->isAllOnes()) {
11507 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11508 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11509 }
11510
11511 // select Cond, C, -1 --> or (sext (not Cond)), C
11512 if (C2->isAllOnes()) {
11513 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11514 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11515 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11516 }
11517
11519 return V;
11520
11521 return SDValue();
11522}
11523
11524template <class MatchContextClass>
11526 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11527 N->getOpcode() == ISD::VP_SELECT) &&
11528 "Expected a (v)(vp.)select");
11529 SDValue Cond = N->getOperand(0);
11530 SDValue T = N->getOperand(1), F = N->getOperand(2);
11531 EVT VT = N->getValueType(0);
11532 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11533 MatchContextClass matcher(DAG, TLI, N);
11534
11535 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11536 return SDValue();
11537
11538 // select Cond, Cond, F --> or Cond, F
11539 // select Cond, 1, F --> or Cond, F
11540 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11541 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11542
11543 // select Cond, T, Cond --> and Cond, T
11544 // select Cond, T, 0 --> and Cond, T
11545 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11546 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11547
11548 // select Cond, T, 1 --> or (not Cond), T
11549 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11550 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11551 DAG.getAllOnesConstant(SDLoc(N), VT));
11552 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11553 }
11554
11555 // select Cond, 0, F --> and (not Cond), F
11556 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11557 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11558 DAG.getAllOnesConstant(SDLoc(N), VT));
11559 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11560 }
11561
11562 return SDValue();
11563}
11564
11566 SDValue N0 = N->getOperand(0);
11567 SDValue N1 = N->getOperand(1);
11568 SDValue N2 = N->getOperand(2);
11569 EVT VT = N->getValueType(0);
11570 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11571 return SDValue();
11572
11573 SDValue Cond0 = N0.getOperand(0);
11574 SDValue Cond1 = N0.getOperand(1);
11575 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11576 if (VT != Cond0.getValueType())
11577 return SDValue();
11578
11579 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11580 // compare is inverted from that pattern ("Cond0 s> -1").
11581 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11582 ; // This is the pattern we are looking for.
11583 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11584 std::swap(N1, N2);
11585 else
11586 return SDValue();
11587
11588 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11589 if (isNullOrNullSplat(N2)) {
11590 SDLoc DL(N);
11591 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11592 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11593 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11594 }
11595
11596 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11597 if (isAllOnesOrAllOnesSplat(N1)) {
11598 SDLoc DL(N);
11599 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11600 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11601 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11602 }
11603
11604 // If we have to invert the sign bit mask, only do that transform if the
11605 // target has a bitwise 'and not' instruction (the invert is free).
11606 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11607 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11608 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11609 SDLoc DL(N);
11610 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11611 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11612 SDValue Not = DAG.getNOT(DL, Sra, VT);
11613 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11614 }
11615
11616 // TODO: There's another pattern in this family, but it may require
11617 // implementing hasOrNot() to check for profitability:
11618 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11619
11620 return SDValue();
11621}
11622
11623SDValue DAGCombiner::visitSELECT(SDNode *N) {
11624 SDValue N0 = N->getOperand(0);
11625 SDValue N1 = N->getOperand(1);
11626 SDValue N2 = N->getOperand(2);
11627 EVT VT = N->getValueType(0);
11628 EVT VT0 = N0.getValueType();
11629 SDLoc DL(N);
11630 SDNodeFlags Flags = N->getFlags();
11631
11632 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11633 return V;
11634
11635 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11636 return V;
11637
11638 // select (not Cond), N1, N2 -> select Cond, N2, N1
11639 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11640 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11641 SelectOp->setFlags(Flags);
11642 return SelectOp;
11643 }
11644
11645 if (SDValue V = foldSelectOfConstants(N))
11646 return V;
11647
11648 // If we can fold this based on the true/false value, do so.
11649 if (SimplifySelectOps(N, N1, N2))
11650 return SDValue(N, 0); // Don't revisit N.
11651
11652 if (VT0 == MVT::i1) {
11653 // The code in this block deals with the following 2 equivalences:
11654 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11655 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11656 // The target can specify its preferred form with the
11657 // shouldNormalizeToSelectSequence() callback. However we always transform
11658 // to the right anyway if we find the inner select exists in the DAG anyway
11659 // and we always transform to the left side if we know that we can further
11660 // optimize the combination of the conditions.
11661 bool normalizeToSequence =
11663 // select (and Cond0, Cond1), X, Y
11664 // -> select Cond0, (select Cond1, X, Y), Y
11665 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11666 SDValue Cond0 = N0->getOperand(0);
11667 SDValue Cond1 = N0->getOperand(1);
11668 SDValue InnerSelect =
11669 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11670 if (normalizeToSequence || !InnerSelect.use_empty())
11671 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11672 InnerSelect, N2, Flags);
11673 // Cleanup on failure.
11674 if (InnerSelect.use_empty())
11675 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11676 }
11677 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11678 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11679 SDValue Cond0 = N0->getOperand(0);
11680 SDValue Cond1 = N0->getOperand(1);
11681 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11682 Cond1, N1, N2, Flags);
11683 if (normalizeToSequence || !InnerSelect.use_empty())
11684 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11685 InnerSelect, Flags);
11686 // Cleanup on failure.
11687 if (InnerSelect.use_empty())
11688 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11689 }
11690
11691 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11692 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11693 SDValue N1_0 = N1->getOperand(0);
11694 SDValue N1_1 = N1->getOperand(1);
11695 SDValue N1_2 = N1->getOperand(2);
11696 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11697 // Create the actual and node if we can generate good code for it.
11698 if (!normalizeToSequence) {
11699 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11700 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11701 N2, Flags);
11702 }
11703 // Otherwise see if we can optimize the "and" to a better pattern.
11704 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11705 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11706 N2, Flags);
11707 }
11708 }
11709 }
11710 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11711 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11712 SDValue N2_0 = N2->getOperand(0);
11713 SDValue N2_1 = N2->getOperand(1);
11714 SDValue N2_2 = N2->getOperand(2);
11715 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11716 // Create the actual or node if we can generate good code for it.
11717 if (!normalizeToSequence) {
11718 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11719 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11720 N2_2, Flags);
11721 }
11722 // Otherwise see if we can optimize to a better pattern.
11723 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11724 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11725 N2_2, Flags);
11726 }
11727 }
11728 }
11729
11730 // Fold selects based on a setcc into other things, such as min/max/abs.
11731 if (N0.getOpcode() == ISD::SETCC) {
11732 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11733 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11734
11735 // select (fcmp lt x, y), x, y -> fminnum x, y
11736 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11737 //
11738 // This is OK if we don't care what happens if either operand is a NaN.
11739 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
11740 if (SDValue FMinMax =
11741 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11742 return FMinMax;
11743
11744 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11745 // This is conservatively limited to pre-legal-operations to give targets
11746 // a chance to reverse the transform if they want to do that. Also, it is
11747 // unlikely that the pattern would be formed late, so it's probably not
11748 // worth going through the other checks.
11749 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11750 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11751 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11752 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11753 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11754 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11755 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11756 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11757 //
11758 // The IR equivalent of this transform would have this form:
11759 // %a = add %x, C
11760 // %c = icmp ugt %x, ~C
11761 // %r = select %c, -1, %a
11762 // =>
11763 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11764 // %u0 = extractvalue %u, 0
11765 // %u1 = extractvalue %u, 1
11766 // %r = select %u1, -1, %u0
11767 SDVTList VTs = DAG.getVTList(VT, VT0);
11768 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11769 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11770 }
11771 }
11772
11773 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11774 (!LegalOperations &&
11776 // Any flags available in a select/setcc fold will be on the setcc as they
11777 // migrated from fcmp
11778 Flags = N0->getFlags();
11779 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11780 N2, N0.getOperand(2));
11781 SelectNode->setFlags(Flags);
11782 return SelectNode;
11783 }
11784
11785 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11786 return NewSel;
11787 }
11788
11789 if (!VT.isVector())
11790 if (SDValue BinOp = foldSelectOfBinops(N))
11791 return BinOp;
11792
11793 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11794 return R;
11795
11796 return SDValue();
11797}
11798
11799// This function assumes all the vselect's arguments are CONCAT_VECTOR
11800// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11802 SDLoc DL(N);
11803 SDValue Cond = N->getOperand(0);
11804 SDValue LHS = N->getOperand(1);
11805 SDValue RHS = N->getOperand(2);
11806 EVT VT = N->getValueType(0);
11807 int NumElems = VT.getVectorNumElements();
11808 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11809 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11810 Cond.getOpcode() == ISD::BUILD_VECTOR);
11811
11812 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11813 // binary ones here.
11814 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11815 return SDValue();
11816
11817 // We're sure we have an even number of elements due to the
11818 // concat_vectors we have as arguments to vselect.
11819 // Skip BV elements until we find one that's not an UNDEF
11820 // After we find an UNDEF element, keep looping until we get to half the
11821 // length of the BV and see if all the non-undef nodes are the same.
11822 ConstantSDNode *BottomHalf = nullptr;
11823 for (int i = 0; i < NumElems / 2; ++i) {
11824 if (Cond->getOperand(i)->isUndef())
11825 continue;
11826
11827 if (BottomHalf == nullptr)
11828 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11829 else if (Cond->getOperand(i).getNode() != BottomHalf)
11830 return SDValue();
11831 }
11832
11833 // Do the same for the second half of the BuildVector
11834 ConstantSDNode *TopHalf = nullptr;
11835 for (int i = NumElems / 2; i < NumElems; ++i) {
11836 if (Cond->getOperand(i)->isUndef())
11837 continue;
11838
11839 if (TopHalf == nullptr)
11840 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11841 else if (Cond->getOperand(i).getNode() != TopHalf)
11842 return SDValue();
11843 }
11844
11845 assert(TopHalf && BottomHalf &&
11846 "One half of the selector was all UNDEFs and the other was all the "
11847 "same value. This should have been addressed before this function.");
11848 return DAG.getNode(
11850 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11851 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11852}
11853
11854bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11855 SelectionDAG &DAG, const SDLoc &DL) {
11856
11857 // Only perform the transformation when existing operands can be reused.
11858 if (IndexIsScaled)
11859 return false;
11860
11861 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11862 return false;
11863
11864 EVT VT = BasePtr.getValueType();
11865
11866 if (SDValue SplatVal = DAG.getSplatValue(Index);
11867 SplatVal && !isNullConstant(SplatVal) &&
11868 SplatVal.getValueType() == VT) {
11869 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11870 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11871 return true;
11872 }
11873
11874 if (Index.getOpcode() != ISD::ADD)
11875 return false;
11876
11877 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11878 SplatVal && SplatVal.getValueType() == VT) {
11879 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11880 Index = Index.getOperand(1);
11881 return true;
11882 }
11883 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11884 SplatVal && SplatVal.getValueType() == VT) {
11885 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11886 Index = Index.getOperand(0);
11887 return true;
11888 }
11889 return false;
11890}
11891
11892// Fold sext/zext of index into index type.
11894 SelectionDAG &DAG) {
11895 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11896
11897 // It's always safe to look through zero extends.
11898 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11899 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11900 IndexType = ISD::UNSIGNED_SCALED;
11901 Index = Index.getOperand(0);
11902 return true;
11903 }
11904 if (ISD::isIndexTypeSigned(IndexType)) {
11905 IndexType = ISD::UNSIGNED_SCALED;
11906 return true;
11907 }
11908 }
11909
11910 // It's only safe to look through sign extends when Index is signed.
11911 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11912 ISD::isIndexTypeSigned(IndexType) &&
11913 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11914 Index = Index.getOperand(0);
11915 return true;
11916 }
11917
11918 return false;
11919}
11920
11921SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11922 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11923 SDValue Mask = MSC->getMask();
11924 SDValue Chain = MSC->getChain();
11925 SDValue Index = MSC->getIndex();
11926 SDValue Scale = MSC->getScale();
11927 SDValue StoreVal = MSC->getValue();
11928 SDValue BasePtr = MSC->getBasePtr();
11929 SDValue VL = MSC->getVectorLength();
11930 ISD::MemIndexType IndexType = MSC->getIndexType();
11931 SDLoc DL(N);
11932
11933 // Zap scatters with a zero mask.
11935 return Chain;
11936
11937 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11938 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11939 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11940 DL, Ops, MSC->getMemOperand(), IndexType);
11941 }
11942
11943 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11944 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11945 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11946 DL, Ops, MSC->getMemOperand(), IndexType);
11947 }
11948
11949 return SDValue();
11950}
11951
11952SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11953 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11954 SDValue Mask = MSC->getMask();
11955 SDValue Chain = MSC->getChain();
11956 SDValue Index = MSC->getIndex();
11957 SDValue Scale = MSC->getScale();
11958 SDValue StoreVal = MSC->getValue();
11959 SDValue BasePtr = MSC->getBasePtr();
11960 ISD::MemIndexType IndexType = MSC->getIndexType();
11961 SDLoc DL(N);
11962
11963 // Zap scatters with a zero mask.
11965 return Chain;
11966
11967 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11968 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11969 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11970 DL, Ops, MSC->getMemOperand(), IndexType,
11971 MSC->isTruncatingStore());
11972 }
11973
11974 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11975 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11976 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11977 DL, Ops, MSC->getMemOperand(), IndexType,
11978 MSC->isTruncatingStore());
11979 }
11980
11981 return SDValue();
11982}
11983
11984SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11985 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11986 SDValue Mask = MST->getMask();
11987 SDValue Chain = MST->getChain();
11988 SDValue Value = MST->getValue();
11989 SDValue Ptr = MST->getBasePtr();
11990 SDLoc DL(N);
11991
11992 // Zap masked stores with a zero mask.
11994 return Chain;
11995
11996 // Remove a masked store if base pointers and masks are equal.
11997 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11998 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11999 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12000 !MST->getBasePtr().isUndef() &&
12001 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12002 MST1->getMemoryVT().getStoreSize()) ||
12004 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12005 MST->getMemoryVT().getStoreSize())) {
12006 CombineTo(MST1, MST1->getChain());
12007 if (N->getOpcode() != ISD::DELETED_NODE)
12008 AddToWorklist(N);
12009 return SDValue(N, 0);
12010 }
12011 }
12012
12013 // If this is a masked load with an all ones mask, we can use a unmasked load.
12014 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12015 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12016 !MST->isCompressingStore() && !MST->isTruncatingStore())
12017 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12018 MST->getBasePtr(), MST->getPointerInfo(),
12019 MST->getOriginalAlign(),
12020 MST->getMemOperand()->getFlags(), MST->getAAInfo());
12021
12022 // Try transforming N to an indexed store.
12023 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12024 return SDValue(N, 0);
12025
12026 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12027 Value.getValueType().isInteger() &&
12028 (!isa<ConstantSDNode>(Value) ||
12029 !cast<ConstantSDNode>(Value)->isOpaque())) {
12030 APInt TruncDemandedBits =
12031 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12033
12034 // See if we can simplify the operation with
12035 // SimplifyDemandedBits, which only works if the value has a single use.
12036 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12037 // Re-visit the store if anything changed and the store hasn't been merged
12038 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12039 // node back to the worklist if necessary, but we also need to re-visit
12040 // the Store node itself.
12041 if (N->getOpcode() != ISD::DELETED_NODE)
12042 AddToWorklist(N);
12043 return SDValue(N, 0);
12044 }
12045 }
12046
12047 // If this is a TRUNC followed by a masked store, fold this into a masked
12048 // truncating store. We can do this even if this is already a masked
12049 // truncstore.
12050 // TODO: Try combine to masked compress store if possiable.
12051 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12052 MST->isUnindexed() && !MST->isCompressingStore() &&
12053 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12054 MST->getMemoryVT(), LegalOperations)) {
12055 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12056 Value.getOperand(0).getValueType());
12057 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12058 MST->getOffset(), Mask, MST->getMemoryVT(),
12059 MST->getMemOperand(), MST->getAddressingMode(),
12060 /*IsTruncating=*/true);
12061 }
12062
12063 return SDValue();
12064}
12065
12066SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12067 auto *SST = cast<VPStridedStoreSDNode>(N);
12068 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12069 // Combine strided stores with unit-stride to a regular VP store.
12070 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12071 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12072 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12073 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12074 SST->getVectorLength(), SST->getMemoryVT(),
12075 SST->getMemOperand(), SST->getAddressingMode(),
12076 SST->isTruncatingStore(), SST->isCompressingStore());
12077 }
12078 return SDValue();
12079}
12080
12081SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12082 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12083 SDValue Mask = MGT->getMask();
12084 SDValue Chain = MGT->getChain();
12085 SDValue Index = MGT->getIndex();
12086 SDValue Scale = MGT->getScale();
12087 SDValue BasePtr = MGT->getBasePtr();
12088 SDValue VL = MGT->getVectorLength();
12089 ISD::MemIndexType IndexType = MGT->getIndexType();
12090 SDLoc DL(N);
12091
12092 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12093 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12094 return DAG.getGatherVP(
12095 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12096 Ops, MGT->getMemOperand(), IndexType);
12097 }
12098
12099 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12100 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12101 return DAG.getGatherVP(
12102 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12103 Ops, MGT->getMemOperand(), IndexType);
12104 }
12105
12106 return SDValue();
12107}
12108
12109SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12110 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12111 SDValue Mask = MGT->getMask();
12112 SDValue Chain = MGT->getChain();
12113 SDValue Index = MGT->getIndex();
12114 SDValue Scale = MGT->getScale();
12115 SDValue PassThru = MGT->getPassThru();
12116 SDValue BasePtr = MGT->getBasePtr();
12117 ISD::MemIndexType IndexType = MGT->getIndexType();
12118 SDLoc DL(N);
12119
12120 // Zap gathers with a zero mask.
12122 return CombineTo(N, PassThru, MGT->getChain());
12123
12124 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12125 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12126 return DAG.getMaskedGather(
12127 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12128 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12129 }
12130
12131 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12132 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12133 return DAG.getMaskedGather(
12134 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12135 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12136 }
12137
12138 return SDValue();
12139}
12140
12141SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12142 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12143 SDValue Mask = MLD->getMask();
12144 SDLoc DL(N);
12145
12146 // Zap masked loads with a zero mask.
12148 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12149
12150 // If this is a masked load with an all ones mask, we can use a unmasked load.
12151 // FIXME: Can we do this for indexed, expanding, or extending loads?
12152 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12153 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12154 SDValue NewLd = DAG.getLoad(
12155 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12156 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12157 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12158 return CombineTo(N, NewLd, NewLd.getValue(1));
12159 }
12160
12161 // Try transforming N to an indexed load.
12162 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12163 return SDValue(N, 0);
12164
12165 return SDValue();
12166}
12167
12168SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12169 auto *SLD = cast<VPStridedLoadSDNode>(N);
12170 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12171 // Combine strided loads with unit-stride to a regular VP load.
12172 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12173 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12174 SDValue NewLd = DAG.getLoadVP(
12175 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12176 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12177 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12178 SLD->getMemOperand(), SLD->isExpandingLoad());
12179 return CombineTo(N, NewLd, NewLd.getValue(1));
12180 }
12181 return SDValue();
12182}
12183
12184/// A vector select of 2 constant vectors can be simplified to math/logic to
12185/// avoid a variable select instruction and possibly avoid constant loads.
12186SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12187 SDValue Cond = N->getOperand(0);
12188 SDValue N1 = N->getOperand(1);
12189 SDValue N2 = N->getOperand(2);
12190 EVT VT = N->getValueType(0);
12191 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12195 return SDValue();
12196
12197 // Check if we can use the condition value to increment/decrement a single
12198 // constant value. This simplifies a select to an add and removes a constant
12199 // load/materialization from the general case.
12200 bool AllAddOne = true;
12201 bool AllSubOne = true;
12202 unsigned Elts = VT.getVectorNumElements();
12203 for (unsigned i = 0; i != Elts; ++i) {
12204 SDValue N1Elt = N1.getOperand(i);
12205 SDValue N2Elt = N2.getOperand(i);
12206 if (N1Elt.isUndef() || N2Elt.isUndef())
12207 continue;
12208 if (N1Elt.getValueType() != N2Elt.getValueType()) {
12209 AllAddOne = false;
12210 AllSubOne = false;
12211 break;
12212 }
12213
12214 const APInt &C1 = N1Elt->getAsAPIntVal();
12215 const APInt &C2 = N2Elt->getAsAPIntVal();
12216 if (C1 != C2 + 1)
12217 AllAddOne = false;
12218 if (C1 != C2 - 1)
12219 AllSubOne = false;
12220 }
12221
12222 // Further simplifications for the extra-special cases where the constants are
12223 // all 0 or all -1 should be implemented as folds of these patterns.
12224 SDLoc DL(N);
12225 if (AllAddOne || AllSubOne) {
12226 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12227 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12228 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12229 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12230 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12231 }
12232
12233 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12234 APInt Pow2C;
12235 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12236 isNullOrNullSplat(N2)) {
12237 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12238 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12239 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12240 }
12241
12243 return V;
12244
12245 // The general case for select-of-constants:
12246 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12247 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12248 // leave that to a machine-specific pass.
12249 return SDValue();
12250}
12251
12252SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12253 SDValue N0 = N->getOperand(0);
12254 SDValue N1 = N->getOperand(1);
12255 SDValue N2 = N->getOperand(2);
12256
12257 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12258 return V;
12259
12260 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12261 return V;
12262
12263 return SDValue();
12264}
12265
12266SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12267 SDValue N0 = N->getOperand(0);
12268 SDValue N1 = N->getOperand(1);
12269 SDValue N2 = N->getOperand(2);
12270 EVT VT = N->getValueType(0);
12271 SDLoc DL(N);
12272
12273 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12274 return V;
12275
12276 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12277 return V;
12278
12279 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12280 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12281 return DAG.getSelect(DL, VT, F, N2, N1);
12282
12283 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12284 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12287 TLI.getBooleanContents(N0.getValueType()) ==
12289 return DAG.getNode(
12290 ISD::ADD, DL, N1.getValueType(), N2,
12291 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12292 }
12293
12294 // Canonicalize integer abs.
12295 // vselect (setg[te] X, 0), X, -X ->
12296 // vselect (setgt X, -1), X, -X ->
12297 // vselect (setl[te] X, 0), -X, X ->
12298 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12299 if (N0.getOpcode() == ISD::SETCC) {
12300 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12301 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12302 bool isAbs = false;
12303 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12304
12305 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12306 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12307 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12309 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12310 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12312
12313 if (isAbs) {
12315 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12316
12317 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12318 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12319 DL, getShiftAmountTy(VT)));
12320 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12321 AddToWorklist(Shift.getNode());
12322 AddToWorklist(Add.getNode());
12323 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12324 }
12325
12326 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12327 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12328 //
12329 // This is OK if we don't care about what happens if either operand is a
12330 // NaN.
12331 //
12332 if (N0.hasOneUse() &&
12333 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12334 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12335 return FMinMax;
12336 }
12337
12338 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12339 return S;
12340 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12341 return S;
12342
12343 // If this select has a condition (setcc) with narrower operands than the
12344 // select, try to widen the compare to match the select width.
12345 // TODO: This should be extended to handle any constant.
12346 // TODO: This could be extended to handle non-loading patterns, but that
12347 // requires thorough testing to avoid regressions.
12348 if (isNullOrNullSplat(RHS)) {
12349 EVT NarrowVT = LHS.getValueType();
12351 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12352 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12353 unsigned WideWidth = WideVT.getScalarSizeInBits();
12354 bool IsSigned = isSignedIntSetCC(CC);
12355 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12356 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12357 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12358 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12359 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12360 // Both compare operands can be widened for free. The LHS can use an
12361 // extended load, and the RHS is a constant:
12362 // vselect (ext (setcc load(X), C)), N1, N2 -->
12363 // vselect (setcc extload(X), C'), N1, N2
12364 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12365 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12366 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12367 EVT WideSetCCVT = getSetCCResultType(WideVT);
12368 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12369 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12370 }
12371 }
12372
12373 // Match VSELECTs with absolute difference patterns.
12374 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12375 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12376 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12377 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12378 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12379 N1.getOperand(0) == N2.getOperand(1) &&
12380 N1.getOperand(1) == N2.getOperand(0)) {
12381 bool IsSigned = isSignedIntSetCC(CC);
12382 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12383 if (hasOperation(ABDOpc, VT)) {
12384 switch (CC) {
12385 case ISD::SETGT:
12386 case ISD::SETGE:
12387 case ISD::SETUGT:
12388 case ISD::SETUGE:
12389 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12390 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12391 break;
12392 case ISD::SETLT:
12393 case ISD::SETLE:
12394 case ISD::SETULT:
12395 case ISD::SETULE:
12396 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12397 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12398 break;
12399 default:
12400 break;
12401 }
12402 }
12403 }
12404
12405 // Match VSELECTs into add with unsigned saturation.
12406 if (hasOperation(ISD::UADDSAT, VT)) {
12407 // Check if one of the arms of the VSELECT is vector with all bits set.
12408 // If it's on the left side invert the predicate to simplify logic below.
12409 SDValue Other;
12410 ISD::CondCode SatCC = CC;
12412 Other = N2;
12413 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12414 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12415 Other = N1;
12416 }
12417
12418 if (Other && Other.getOpcode() == ISD::ADD) {
12419 SDValue CondLHS = LHS, CondRHS = RHS;
12420 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12421
12422 // Canonicalize condition operands.
12423 if (SatCC == ISD::SETUGE) {
12424 std::swap(CondLHS, CondRHS);
12425 SatCC = ISD::SETULE;
12426 }
12427
12428 // We can test against either of the addition operands.
12429 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12430 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12431 if (SatCC == ISD::SETULE && Other == CondRHS &&
12432 (OpLHS == CondLHS || OpRHS == CondLHS))
12433 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12434
12435 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12436 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12437 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12438 CondLHS == OpLHS) {
12439 // If the RHS is a constant we have to reverse the const
12440 // canonicalization.
12441 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12442 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12443 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12444 };
12445 if (SatCC == ISD::SETULE &&
12446 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12447 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12448 }
12449 }
12450 }
12451
12452 // Match VSELECTs into sub with unsigned saturation.
12453 if (hasOperation(ISD::USUBSAT, VT)) {
12454 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12455 // the left side invert the predicate to simplify logic below.
12456 SDValue Other;
12457 ISD::CondCode SatCC = CC;
12459 Other = N2;
12460 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12462 Other = N1;
12463 }
12464
12465 // zext(x) >= y ? trunc(zext(x) - y) : 0
12466 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12467 // zext(x) > y ? trunc(zext(x) - y) : 0
12468 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12469 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12470 Other.getOperand(0).getOpcode() == ISD::SUB &&
12471 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12472 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12473 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12474 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12475 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12476 DAG, DL))
12477 return R;
12478 }
12479
12480 if (Other && Other.getNumOperands() == 2) {
12481 SDValue CondRHS = RHS;
12482 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12483
12484 if (OpLHS == LHS) {
12485 // Look for a general sub with unsigned saturation first.
12486 // x >= y ? x-y : 0 --> usubsat x, y
12487 // x > y ? x-y : 0 --> usubsat x, y
12488 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12489 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12490 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12491
12492 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12493 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12494 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12495 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12496 // If the RHS is a constant we have to reverse the const
12497 // canonicalization.
12498 // x > C-1 ? x+-C : 0 --> usubsat x, C
12499 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12500 return (!Op && !Cond) ||
12501 (Op && Cond &&
12502 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12503 };
12504 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12505 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12506 /*AllowUndefs*/ true)) {
12507 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12508 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12509 }
12510
12511 // Another special case: If C was a sign bit, the sub has been
12512 // canonicalized into a xor.
12513 // FIXME: Would it be better to use computeKnownBits to
12514 // determine whether it's safe to decanonicalize the xor?
12515 // x s< 0 ? x^C : 0 --> usubsat x, C
12516 APInt SplatValue;
12517 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12518 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12520 SplatValue.isSignMask()) {
12521 // Note that we have to rebuild the RHS constant here to
12522 // ensure we don't rely on particular values of undef lanes.
12523 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12524 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12525 }
12526 }
12527 }
12528 }
12529 }
12530 }
12531 }
12532
12533 if (SimplifySelectOps(N, N1, N2))
12534 return SDValue(N, 0); // Don't revisit N.
12535
12536 // Fold (vselect all_ones, N1, N2) -> N1
12538 return N1;
12539 // Fold (vselect all_zeros, N1, N2) -> N2
12541 return N2;
12542
12543 // The ConvertSelectToConcatVector function is assuming both the above
12544 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12545 // and addressed.
12546 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12549 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12550 return CV;
12551 }
12552
12553 if (SDValue V = foldVSelectOfConstants(N))
12554 return V;
12555
12556 if (hasOperation(ISD::SRA, VT))
12558 return V;
12559
12561 return SDValue(N, 0);
12562
12563 return SDValue();
12564}
12565
12566SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12567 SDValue N0 = N->getOperand(0);
12568 SDValue N1 = N->getOperand(1);
12569 SDValue N2 = N->getOperand(2);
12570 SDValue N3 = N->getOperand(3);
12571 SDValue N4 = N->getOperand(4);
12572 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12573
12574 // fold select_cc lhs, rhs, x, x, cc -> x
12575 if (N2 == N3)
12576 return N2;
12577
12578 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12579 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12580 isNullConstant(N1))
12581 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12582
12583 // Determine if the condition we're dealing with is constant
12584 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12585 CC, SDLoc(N), false)) {
12586 AddToWorklist(SCC.getNode());
12587
12588 // cond always true -> true val
12589 // cond always false -> false val
12590 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12591 return SCCC->isZero() ? N3 : N2;
12592
12593 // When the condition is UNDEF, just return the first operand. This is
12594 // coherent the DAG creation, no setcc node is created in this case
12595 if (SCC->isUndef())
12596 return N2;
12597
12598 // Fold to a simpler select_cc
12599 if (SCC.getOpcode() == ISD::SETCC) {
12600 SDValue SelectOp = DAG.getNode(
12601 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12602 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12603 SelectOp->setFlags(SCC->getFlags());
12604 return SelectOp;
12605 }
12606 }
12607
12608 // If we can fold this based on the true/false value, do so.
12609 if (SimplifySelectOps(N, N2, N3))
12610 return SDValue(N, 0); // Don't revisit N.
12611
12612 // fold select_cc into other things, such as min/max/abs
12613 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12614}
12615
12616SDValue DAGCombiner::visitSETCC(SDNode *N) {
12617 // setcc is very commonly used as an argument to brcond. This pattern
12618 // also lend itself to numerous combines and, as a result, it is desired
12619 // we keep the argument to a brcond as a setcc as much as possible.
12620 bool PreferSetCC =
12621 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12622
12623 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12624 EVT VT = N->getValueType(0);
12625 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12626
12627 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12628
12629 if (Combined) {
12630 // If we prefer to have a setcc, and we don't, we'll try our best to
12631 // recreate one using rebuildSetCC.
12632 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12633 SDValue NewSetCC = rebuildSetCC(Combined);
12634
12635 // We don't have anything interesting to combine to.
12636 if (NewSetCC.getNode() == N)
12637 return SDValue();
12638
12639 if (NewSetCC)
12640 return NewSetCC;
12641 }
12642 return Combined;
12643 }
12644
12645 // Optimize
12646 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12647 // or
12648 // 2) (icmp eq/ne X, (rotate X, C1))
12649 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12650 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12651 // Then:
12652 // If C1 is a power of 2, then the rotate and shift+and versions are
12653 // equivilent, so we can interchange them depending on target preference.
12654 // Otherwise, if we have the shift+and version we can interchange srl/shl
12655 // which inturn affects the constant C0. We can use this to get better
12656 // constants again determined by target preference.
12657 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12658 auto IsAndWithShift = [](SDValue A, SDValue B) {
12659 return A.getOpcode() == ISD::AND &&
12660 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12661 A.getOperand(0) == B.getOperand(0);
12662 };
12663 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12664 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12665 B.getOperand(0) == A;
12666 };
12667 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12668 bool IsRotate = false;
12669
12670 // Find either shift+and or rotate pattern.
12671 if (IsAndWithShift(N0, N1)) {
12672 AndOrOp = N0;
12673 ShiftOrRotate = N1;
12674 } else if (IsAndWithShift(N1, N0)) {
12675 AndOrOp = N1;
12676 ShiftOrRotate = N0;
12677 } else if (IsRotateWithOp(N0, N1)) {
12678 IsRotate = true;
12679 AndOrOp = N0;
12680 ShiftOrRotate = N1;
12681 } else if (IsRotateWithOp(N1, N0)) {
12682 IsRotate = true;
12683 AndOrOp = N1;
12684 ShiftOrRotate = N0;
12685 }
12686
12687 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12688 (IsRotate || AndOrOp.hasOneUse())) {
12689 EVT OpVT = N0.getValueType();
12690 // Get constant shift/rotate amount and possibly mask (if its shift+and
12691 // variant).
12692 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12693 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12694 /*AllowTrunc*/ false);
12695 if (CNode == nullptr)
12696 return std::nullopt;
12697 return CNode->getAPIntValue();
12698 };
12699 std::optional<APInt> AndCMask =
12700 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12701 std::optional<APInt> ShiftCAmt =
12702 GetAPIntValue(ShiftOrRotate.getOperand(1));
12703 unsigned NumBits = OpVT.getScalarSizeInBits();
12704
12705 // We found constants.
12706 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12707 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12708 // Check that the constants meet the constraints.
12709 bool CanTransform = IsRotate;
12710 if (!CanTransform) {
12711 // Check that mask and shift compliment eachother
12712 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12713 // Check that we are comparing all bits
12714 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12715 // Check that the and mask is correct for the shift
12716 CanTransform &=
12717 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12718 }
12719
12720 // See if target prefers another shift/rotate opcode.
12721 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12722 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12723 // Transform is valid and we have a new preference.
12724 if (CanTransform && NewShiftOpc != ShiftOpc) {
12725 SDLoc DL(N);
12726 SDValue NewShiftOrRotate =
12727 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12728 ShiftOrRotate.getOperand(1));
12729 SDValue NewAndOrOp = SDValue();
12730
12731 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12732 APInt NewMask =
12733 NewShiftOpc == ISD::SHL
12734 ? APInt::getHighBitsSet(NumBits,
12735 NumBits - ShiftCAmt->getZExtValue())
12736 : APInt::getLowBitsSet(NumBits,
12737 NumBits - ShiftCAmt->getZExtValue());
12738 NewAndOrOp =
12739 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12740 DAG.getConstant(NewMask, DL, OpVT));
12741 } else {
12742 NewAndOrOp = ShiftOrRotate.getOperand(0);
12743 }
12744
12745 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12746 }
12747 }
12748 }
12749 }
12750 return SDValue();
12751}
12752
12753SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12754 SDValue LHS = N->getOperand(0);
12755 SDValue RHS = N->getOperand(1);
12756 SDValue Carry = N->getOperand(2);
12757 SDValue Cond = N->getOperand(3);
12758
12759 // If Carry is false, fold to a regular SETCC.
12760 if (isNullConstant(Carry))
12761 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12762
12763 return SDValue();
12764}
12765
12766/// Check if N satisfies:
12767/// N is used once.
12768/// N is a Load.
12769/// The load is compatible with ExtOpcode. It means
12770/// If load has explicit zero/sign extension, ExpOpcode must have the same
12771/// extension.
12772/// Otherwise returns true.
12773static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12774 if (!N.hasOneUse())
12775 return false;
12776
12777 if (!isa<LoadSDNode>(N))
12778 return false;
12779
12780 LoadSDNode *Load = cast<LoadSDNode>(N);
12781 ISD::LoadExtType LoadExt = Load->getExtensionType();
12782 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12783 return true;
12784
12785 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12786 // extension.
12787 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12788 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12789 return false;
12790
12791 return true;
12792}
12793
12794/// Fold
12795/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12796/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12797/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12798/// This function is called by the DAGCombiner when visiting sext/zext/aext
12799/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12801 SelectionDAG &DAG,
12802 CombineLevel Level) {
12803 unsigned Opcode = N->getOpcode();
12804 SDValue N0 = N->getOperand(0);
12805 EVT VT = N->getValueType(0);
12806 SDLoc DL(N);
12807
12808 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12809 Opcode == ISD::ANY_EXTEND) &&
12810 "Expected EXTEND dag node in input!");
12811
12812 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12813 !N0.hasOneUse())
12814 return SDValue();
12815
12816 SDValue Op1 = N0->getOperand(1);
12817 SDValue Op2 = N0->getOperand(2);
12818 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12819 return SDValue();
12820
12821 auto ExtLoadOpcode = ISD::EXTLOAD;
12822 if (Opcode == ISD::SIGN_EXTEND)
12823 ExtLoadOpcode = ISD::SEXTLOAD;
12824 else if (Opcode == ISD::ZERO_EXTEND)
12825 ExtLoadOpcode = ISD::ZEXTLOAD;
12826
12827 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12828 // Combine2), so we should conservatively check the OperationAction.
12829 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12830 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12831 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12832 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12833 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12835 return SDValue();
12836
12837 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12838 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12839 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12840}
12841
12842/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12843/// a build_vector of constants.
12844/// This function is called by the DAGCombiner when visiting sext/zext/aext
12845/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12846/// Vector extends are not folded if operations are legal; this is to
12847/// avoid introducing illegal build_vector dag nodes.
12849 const TargetLowering &TLI,
12850 SelectionDAG &DAG, bool LegalTypes) {
12851 unsigned Opcode = N->getOpcode();
12852 SDValue N0 = N->getOperand(0);
12853 EVT VT = N->getValueType(0);
12854
12855 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12856 "Expected EXTEND dag node in input!");
12857
12858 // fold (sext c1) -> c1
12859 // fold (zext c1) -> c1
12860 // fold (aext c1) -> c1
12861 if (isa<ConstantSDNode>(N0))
12862 return DAG.getNode(Opcode, DL, VT, N0);
12863
12864 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12865 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12866 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12867 if (N0->getOpcode() == ISD::SELECT) {
12868 SDValue Op1 = N0->getOperand(1);
12869 SDValue Op2 = N0->getOperand(2);
12870 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12871 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12872 // For any_extend, choose sign extension of the constants to allow a
12873 // possible further transform to sign_extend_inreg.i.e.
12874 //
12875 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12876 // t2: i64 = any_extend t1
12877 // -->
12878 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12879 // -->
12880 // t4: i64 = sign_extend_inreg t3
12881 unsigned FoldOpc = Opcode;
12882 if (FoldOpc == ISD::ANY_EXTEND)
12883 FoldOpc = ISD::SIGN_EXTEND;
12884 return DAG.getSelect(DL, VT, N0->getOperand(0),
12885 DAG.getNode(FoldOpc, DL, VT, Op1),
12886 DAG.getNode(FoldOpc, DL, VT, Op2));
12887 }
12888 }
12889
12890 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12891 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12892 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12893 EVT SVT = VT.getScalarType();
12894 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12896 return SDValue();
12897
12898 // We can fold this node into a build_vector.
12899 unsigned VTBits = SVT.getSizeInBits();
12900 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12902 unsigned NumElts = VT.getVectorNumElements();
12903
12904 for (unsigned i = 0; i != NumElts; ++i) {
12905 SDValue Op = N0.getOperand(i);
12906 if (Op.isUndef()) {
12907 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12908 Elts.push_back(DAG.getUNDEF(SVT));
12909 else
12910 Elts.push_back(DAG.getConstant(0, DL, SVT));
12911 continue;
12912 }
12913
12914 SDLoc DL(Op);
12915 // Get the constant value and if needed trunc it to the size of the type.
12916 // Nodes like build_vector might have constants wider than the scalar type.
12917 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12918 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12919 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12920 else
12921 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12922 }
12923
12924 return DAG.getBuildVector(VT, DL, Elts);
12925}
12926
12927// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12928// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12929// transformation. Returns true if extension are possible and the above
12930// mentioned transformation is profitable.
12932 unsigned ExtOpc,
12933 SmallVectorImpl<SDNode *> &ExtendNodes,
12934 const TargetLowering &TLI) {
12935 bool HasCopyToRegUses = false;
12936 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12937 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12938 ++UI) {
12939 SDNode *User = *UI;
12940 if (User == N)
12941 continue;
12942 if (UI.getUse().getResNo() != N0.getResNo())
12943 continue;
12944 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12945 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12946 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12947 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12948 // Sign bits will be lost after a zext.
12949 return false;
12950 bool Add = false;
12951 for (unsigned i = 0; i != 2; ++i) {
12952 SDValue UseOp = User->getOperand(i);
12953 if (UseOp == N0)
12954 continue;
12955 if (!isa<ConstantSDNode>(UseOp))
12956 return false;
12957 Add = true;
12958 }
12959 if (Add)
12960 ExtendNodes.push_back(User);
12961 continue;
12962 }
12963 // If truncates aren't free and there are users we can't
12964 // extend, it isn't worthwhile.
12965 if (!isTruncFree)
12966 return false;
12967 // Remember if this value is live-out.
12968 if (User->getOpcode() == ISD::CopyToReg)
12969 HasCopyToRegUses = true;
12970 }
12971
12972 if (HasCopyToRegUses) {
12973 bool BothLiveOut = false;
12974 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12975 UI != UE; ++UI) {
12976 SDUse &Use = UI.getUse();
12977 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12978 BothLiveOut = true;
12979 break;
12980 }
12981 }
12982 if (BothLiveOut)
12983 // Both unextended and extended values are live out. There had better be
12984 // a good reason for the transformation.
12985 return !ExtendNodes.empty();
12986 }
12987 return true;
12988}
12989
12990void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12991 SDValue OrigLoad, SDValue ExtLoad,
12992 ISD::NodeType ExtType) {
12993 // Extend SetCC uses if necessary.
12994 SDLoc DL(ExtLoad);
12995 for (SDNode *SetCC : SetCCs) {
12997
12998 for (unsigned j = 0; j != 2; ++j) {
12999 SDValue SOp = SetCC->getOperand(j);
13000 if (SOp == OrigLoad)
13001 Ops.push_back(ExtLoad);
13002 else
13003 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13004 }
13005
13006 Ops.push_back(SetCC->getOperand(2));
13007 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13008 }
13009}
13010
13011// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13012SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13013 SDValue N0 = N->getOperand(0);
13014 EVT DstVT = N->getValueType(0);
13015 EVT SrcVT = N0.getValueType();
13016
13017 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13018 N->getOpcode() == ISD::ZERO_EXTEND) &&
13019 "Unexpected node type (not an extend)!");
13020
13021 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13022 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13023 // (v8i32 (sext (v8i16 (load x))))
13024 // into:
13025 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13026 // (v4i32 (sextload (x + 16)))))
13027 // Where uses of the original load, i.e.:
13028 // (v8i16 (load x))
13029 // are replaced with:
13030 // (v8i16 (truncate
13031 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13032 // (v4i32 (sextload (x + 16)))))))
13033 //
13034 // This combine is only applicable to illegal, but splittable, vectors.
13035 // All legal types, and illegal non-vector types, are handled elsewhere.
13036 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13037 //
13038 if (N0->getOpcode() != ISD::LOAD)
13039 return SDValue();
13040
13041 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13042
13043 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13044 !N0.hasOneUse() || !LN0->isSimple() ||
13045 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13047 return SDValue();
13048
13050 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13051 return SDValue();
13052
13053 ISD::LoadExtType ExtType =
13054 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13055
13056 // Try to split the vector types to get down to legal types.
13057 EVT SplitSrcVT = SrcVT;
13058 EVT SplitDstVT = DstVT;
13059 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13060 SplitSrcVT.getVectorNumElements() > 1) {
13061 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13062 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13063 }
13064
13065 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13066 return SDValue();
13067
13068 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13069
13070 SDLoc DL(N);
13071 const unsigned NumSplits =
13072 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13073 const unsigned Stride = SplitSrcVT.getStoreSize();
13076
13077 SDValue BasePtr = LN0->getBasePtr();
13078 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13079 const unsigned Offset = Idx * Stride;
13080
13081 SDValue SplitLoad =
13082 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13083 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13084 SplitSrcVT, LN0->getOriginalAlign(),
13085 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13086
13087 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13088
13089 Loads.push_back(SplitLoad.getValue(0));
13090 Chains.push_back(SplitLoad.getValue(1));
13091 }
13092
13093 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13094 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13095
13096 // Simplify TF.
13097 AddToWorklist(NewChain.getNode());
13098
13099 CombineTo(N, NewValue);
13100
13101 // Replace uses of the original load (before extension)
13102 // with a truncate of the concatenated sextloaded vectors.
13103 SDValue Trunc =
13104 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13105 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13106 CombineTo(N0.getNode(), Trunc, NewChain);
13107 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13108}
13109
13110// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13111// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13112SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13113 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13114 EVT VT = N->getValueType(0);
13115 EVT OrigVT = N->getOperand(0).getValueType();
13116 if (TLI.isZExtFree(OrigVT, VT))
13117 return SDValue();
13118
13119 // and/or/xor
13120 SDValue N0 = N->getOperand(0);
13121 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13122 N0.getOperand(1).getOpcode() != ISD::Constant ||
13123 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13124 return SDValue();
13125
13126 // shl/shr
13127 SDValue N1 = N0->getOperand(0);
13128 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13129 N1.getOperand(1).getOpcode() != ISD::Constant ||
13130 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13131 return SDValue();
13132
13133 // load
13134 if (!isa<LoadSDNode>(N1.getOperand(0)))
13135 return SDValue();
13136 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13137 EVT MemVT = Load->getMemoryVT();
13138 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13139 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13140 return SDValue();
13141
13142
13143 // If the shift op is SHL, the logic op must be AND, otherwise the result
13144 // will be wrong.
13145 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13146 return SDValue();
13147
13148 if (!N0.hasOneUse() || !N1.hasOneUse())
13149 return SDValue();
13150
13152 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13153 ISD::ZERO_EXTEND, SetCCs, TLI))
13154 return SDValue();
13155
13156 // Actually do the transformation.
13157 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13158 Load->getChain(), Load->getBasePtr(),
13159 Load->getMemoryVT(), Load->getMemOperand());
13160
13161 SDLoc DL1(N1);
13162 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13163 N1.getOperand(1));
13164
13166 SDLoc DL0(N0);
13167 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13168 DAG.getConstant(Mask, DL0, VT));
13169
13170 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13171 CombineTo(N, And);
13172 if (SDValue(Load, 0).hasOneUse()) {
13173 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13174 } else {
13175 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13176 Load->getValueType(0), ExtLoad);
13177 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13178 }
13179
13180 // N0 is dead at this point.
13181 recursivelyDeleteUnusedNodes(N0.getNode());
13182
13183 return SDValue(N,0); // Return N so it doesn't get rechecked!
13184}
13185
13186/// If we're narrowing or widening the result of a vector select and the final
13187/// size is the same size as a setcc (compare) feeding the select, then try to
13188/// apply the cast operation to the select's operands because matching vector
13189/// sizes for a select condition and other operands should be more efficient.
13190SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13191 unsigned CastOpcode = Cast->getOpcode();
13192 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13193 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13194 CastOpcode == ISD::FP_ROUND) &&
13195 "Unexpected opcode for vector select narrowing/widening");
13196
13197 // We only do this transform before legal ops because the pattern may be
13198 // obfuscated by target-specific operations after legalization. Do not create
13199 // an illegal select op, however, because that may be difficult to lower.
13200 EVT VT = Cast->getValueType(0);
13201 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13202 return SDValue();
13203
13204 SDValue VSel = Cast->getOperand(0);
13205 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13206 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13207 return SDValue();
13208
13209 // Does the setcc have the same vector size as the casted select?
13210 SDValue SetCC = VSel.getOperand(0);
13211 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13212 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13213 return SDValue();
13214
13215 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13216 SDValue A = VSel.getOperand(1);
13217 SDValue B = VSel.getOperand(2);
13218 SDValue CastA, CastB;
13219 SDLoc DL(Cast);
13220 if (CastOpcode == ISD::FP_ROUND) {
13221 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13222 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13223 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13224 } else {
13225 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13226 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13227 }
13228 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13229}
13230
13231// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13232// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13234 const TargetLowering &TLI, EVT VT,
13235 bool LegalOperations, SDNode *N,
13236 SDValue N0, ISD::LoadExtType ExtLoadType) {
13237 SDNode *N0Node = N0.getNode();
13238 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13239 : ISD::isZEXTLoad(N0Node);
13240 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13241 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13242 return SDValue();
13243
13244 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13245 EVT MemVT = LN0->getMemoryVT();
13246 if ((LegalOperations || !LN0->isSimple() ||
13247 VT.isVector()) &&
13248 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13249 return SDValue();
13250
13251 SDValue ExtLoad =
13252 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13253 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13254 Combiner.CombineTo(N, ExtLoad);
13255 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13256 if (LN0->use_empty())
13257 Combiner.recursivelyDeleteUnusedNodes(LN0);
13258 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13259}
13260
13261// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13262// Only generate vector extloads when 1) they're legal, and 2) they are
13263// deemed desirable by the target. NonNegZExt can be set to true if a zero
13264// extend has the nonneg flag to allow use of sextload if profitable.
13266 const TargetLowering &TLI, EVT VT,
13267 bool LegalOperations, SDNode *N, SDValue N0,
13268 ISD::LoadExtType ExtLoadType,
13269 ISD::NodeType ExtOpc,
13270 bool NonNegZExt = false) {
13272 return {};
13273
13274 // If this is zext nneg, see if it would make sense to treat it as a sext.
13275 if (NonNegZExt) {
13276 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13277 "Unexpected load type or opcode");
13278 for (SDNode *User : N0->uses()) {
13279 if (User->getOpcode() == ISD::SETCC) {
13280 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13282 ExtLoadType = ISD::SEXTLOAD;
13283 ExtOpc = ISD::SIGN_EXTEND;
13284 break;
13285 }
13286 }
13287 }
13288 }
13289
13290 // TODO: isFixedLengthVector() should be removed and any negative effects on
13291 // code generation being the result of that target's implementation of
13292 // isVectorLoadExtDesirable().
13293 if ((LegalOperations || VT.isFixedLengthVector() ||
13294 !cast<LoadSDNode>(N0)->isSimple()) &&
13295 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13296 return {};
13297
13298 bool DoXform = true;
13300 if (!N0.hasOneUse())
13301 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13302 if (VT.isVector())
13303 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13304 if (!DoXform)
13305 return {};
13306
13307 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13308 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13309 LN0->getBasePtr(), N0.getValueType(),
13310 LN0->getMemOperand());
13311 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13312 // If the load value is used only by N, replace it via CombineTo N.
13313 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13314 Combiner.CombineTo(N, ExtLoad);
13315 if (NoReplaceTrunc) {
13316 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13317 Combiner.recursivelyDeleteUnusedNodes(LN0);
13318 } else {
13319 SDValue Trunc =
13320 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13321 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13322 }
13323 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13324}
13325
13326static SDValue
13328 bool LegalOperations, SDNode *N, SDValue N0,
13329 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13330 if (!N0.hasOneUse())
13331 return SDValue();
13332
13333 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13334 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13335 return SDValue();
13336
13337 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13338 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13339 return SDValue();
13340
13341 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13342 return SDValue();
13343
13344 SDLoc dl(Ld);
13345 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13346 SDValue NewLoad = DAG.getMaskedLoad(
13347 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13348 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13349 ExtLoadType, Ld->isExpandingLoad());
13350 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13351 return NewLoad;
13352}
13353
13354// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13356 const TargetLowering &TLI, EVT VT,
13357 SDValue N0,
13358 ISD::LoadExtType ExtLoadType) {
13359 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13360 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13361 return {};
13362 EVT MemoryVT = ALoad->getMemoryVT();
13363 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13364 return {};
13365 // Can't fold into ALoad if it is already extending differently.
13366 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13367 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13368 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13369 return {};
13370
13371 EVT OrigVT = ALoad->getValueType(0);
13372 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13373 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13374 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13375 ALoad->getBasePtr(), ALoad->getMemOperand()));
13376 NewALoad->setExtensionType(ExtLoadType);
13378 SDValue(ALoad, 0),
13379 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13380 // Update the chain uses.
13381 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13382 return SDValue(NewALoad, 0);
13383}
13384
13386 bool LegalOperations) {
13387 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13388 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13389
13390 SDValue SetCC = N->getOperand(0);
13391 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13392 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13393 return SDValue();
13394
13395 SDValue X = SetCC.getOperand(0);
13396 SDValue Ones = SetCC.getOperand(1);
13397 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13398 EVT VT = N->getValueType(0);
13399 EVT XVT = X.getValueType();
13400 // setge X, C is canonicalized to setgt, so we do not need to match that
13401 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13402 // not require the 'not' op.
13403 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13404 // Invert and smear/shift the sign bit:
13405 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13406 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13407 SDLoc DL(N);
13408 unsigned ShCt = VT.getSizeInBits() - 1;
13409 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13410 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13411 SDValue NotX = DAG.getNOT(DL, X, VT);
13412 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13413 auto ShiftOpcode =
13414 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13415 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13416 }
13417 }
13418 return SDValue();
13419}
13420
13421SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13422 SDValue N0 = N->getOperand(0);
13423 if (N0.getOpcode() != ISD::SETCC)
13424 return SDValue();
13425
13426 SDValue N00 = N0.getOperand(0);
13427 SDValue N01 = N0.getOperand(1);
13428 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13429 EVT VT = N->getValueType(0);
13430 EVT N00VT = N00.getValueType();
13431 SDLoc DL(N);
13432
13433 // Propagate fast-math-flags.
13434 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13435
13436 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13437 // the same size as the compared operands. Try to optimize sext(setcc())
13438 // if this is the case.
13439 if (VT.isVector() && !LegalOperations &&
13440 TLI.getBooleanContents(N00VT) ==
13442 EVT SVT = getSetCCResultType(N00VT);
13443
13444 // If we already have the desired type, don't change it.
13445 if (SVT != N0.getValueType()) {
13446 // We know that the # elements of the results is the same as the
13447 // # elements of the compare (and the # elements of the compare result
13448 // for that matter). Check to see that they are the same size. If so,
13449 // we know that the element size of the sext'd result matches the
13450 // element size of the compare operands.
13451 if (VT.getSizeInBits() == SVT.getSizeInBits())
13452 return DAG.getSetCC(DL, VT, N00, N01, CC);
13453
13454 // If the desired elements are smaller or larger than the source
13455 // elements, we can use a matching integer vector type and then
13456 // truncate/sign extend.
13457 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13458 if (SVT == MatchingVecType) {
13459 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13460 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13461 }
13462 }
13463
13464 // Try to eliminate the sext of a setcc by zexting the compare operands.
13465 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13467 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13468 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13469 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13470
13471 // We have an unsupported narrow vector compare op that would be legal
13472 // if extended to the destination type. See if the compare operands
13473 // can be freely extended to the destination type.
13474 auto IsFreeToExtend = [&](SDValue V) {
13475 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13476 return true;
13477 // Match a simple, non-extended load that can be converted to a
13478 // legal {z/s}ext-load.
13479 // TODO: Allow widening of an existing {z/s}ext-load?
13480 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13481 ISD::isUNINDEXEDLoad(V.getNode()) &&
13482 cast<LoadSDNode>(V)->isSimple() &&
13483 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13484 return false;
13485
13486 // Non-chain users of this value must either be the setcc in this
13487 // sequence or extends that can be folded into the new {z/s}ext-load.
13488 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13489 UI != UE; ++UI) {
13490 // Skip uses of the chain and the setcc.
13491 SDNode *User = *UI;
13492 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13493 continue;
13494 // Extra users must have exactly the same cast we are about to create.
13495 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13496 // is enhanced similarly.
13497 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13498 return false;
13499 }
13500 return true;
13501 };
13502
13503 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13504 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13505 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13506 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13507 }
13508 }
13509 }
13510
13511 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13512 // Here, T can be 1 or -1, depending on the type of the setcc and
13513 // getBooleanContents().
13514 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13515
13516 // To determine the "true" side of the select, we need to know the high bit
13517 // of the value returned by the setcc if it evaluates to true.
13518 // If the type of the setcc is i1, then the true case of the select is just
13519 // sext(i1 1), that is, -1.
13520 // If the type of the setcc is larger (say, i8) then the value of the high
13521 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13522 // of the appropriate width.
13523 SDValue ExtTrueVal = (SetCCWidth == 1)
13524 ? DAG.getAllOnesConstant(DL, VT)
13525 : DAG.getBoolConstant(true, DL, VT, N00VT);
13526 SDValue Zero = DAG.getConstant(0, DL, VT);
13527 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13528 return SCC;
13529
13530 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13531 EVT SetCCVT = getSetCCResultType(N00VT);
13532 // Don't do this transform for i1 because there's a select transform
13533 // that would reverse it.
13534 // TODO: We should not do this transform at all without a target hook
13535 // because a sext is likely cheaper than a select?
13536 if (SetCCVT.getScalarSizeInBits() != 1 &&
13537 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13538 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13539 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13540 }
13541 }
13542
13543 return SDValue();
13544}
13545
13546SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13547 SDValue N0 = N->getOperand(0);
13548 EVT VT = N->getValueType(0);
13549 SDLoc DL(N);
13550
13551 if (VT.isVector())
13552 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13553 return FoldedVOp;
13554
13555 // sext(undef) = 0 because the top bit will all be the same.
13556 if (N0.isUndef())
13557 return DAG.getConstant(0, DL, VT);
13558
13559 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13560 return Res;
13561
13562 // fold (sext (sext x)) -> (sext x)
13563 // fold (sext (aext x)) -> (sext x)
13564 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13565 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13566
13567 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13568 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13572 N0.getOperand(0));
13573
13574 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13575 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13576 SDValue N00 = N0.getOperand(0);
13577 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13578 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13579 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13580 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13581 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13582 }
13583 }
13584
13585 if (N0.getOpcode() == ISD::TRUNCATE) {
13586 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13587 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13588 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13589 SDNode *oye = N0.getOperand(0).getNode();
13590 if (NarrowLoad.getNode() != N0.getNode()) {
13591 CombineTo(N0.getNode(), NarrowLoad);
13592 // CombineTo deleted the truncate, if needed, but not what's under it.
13593 AddToWorklist(oye);
13594 }
13595 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13596 }
13597
13598 // See if the value being truncated is already sign extended. If so, just
13599 // eliminate the trunc/sext pair.
13600 SDValue Op = N0.getOperand(0);
13601 unsigned OpBits = Op.getScalarValueSizeInBits();
13602 unsigned MidBits = N0.getScalarValueSizeInBits();
13603 unsigned DestBits = VT.getScalarSizeInBits();
13604 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13605
13606 if (OpBits == DestBits) {
13607 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13608 // bits, it is already ready.
13609 if (NumSignBits > DestBits-MidBits)
13610 return Op;
13611 } else if (OpBits < DestBits) {
13612 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13613 // bits, just sext from i32.
13614 if (NumSignBits > OpBits-MidBits)
13615 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13616 } else {
13617 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13618 // bits, just truncate to i32.
13619 if (NumSignBits > OpBits-MidBits)
13620 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13621 }
13622
13623 // fold (sext (truncate x)) -> (sextinreg x).
13624 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13625 N0.getValueType())) {
13626 if (OpBits < DestBits)
13627 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13628 else if (OpBits > DestBits)
13629 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13630 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13631 DAG.getValueType(N0.getValueType()));
13632 }
13633 }
13634
13635 // Try to simplify (sext (load x)).
13636 if (SDValue foldedExt =
13637 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13639 return foldedExt;
13640
13641 if (SDValue foldedExt =
13642 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13644 return foldedExt;
13645
13646 // fold (sext (load x)) to multiple smaller sextloads.
13647 // Only on illegal but splittable vectors.
13648 if (SDValue ExtLoad = CombineExtLoad(N))
13649 return ExtLoad;
13650
13651 // Try to simplify (sext (sextload x)).
13652 if (SDValue foldedExt = tryToFoldExtOfExtload(
13653 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13654 return foldedExt;
13655
13656 // Try to simplify (sext (atomic_load x)).
13657 if (SDValue foldedExt =
13658 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13659 return foldedExt;
13660
13661 // fold (sext (and/or/xor (load x), cst)) ->
13662 // (and/or/xor (sextload x), (sext cst))
13663 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13664 isa<LoadSDNode>(N0.getOperand(0)) &&
13665 N0.getOperand(1).getOpcode() == ISD::Constant &&
13666 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13667 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13668 EVT MemVT = LN00->getMemoryVT();
13669 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13670 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13672 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13673 ISD::SIGN_EXTEND, SetCCs, TLI);
13674 if (DoXform) {
13675 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13676 LN00->getChain(), LN00->getBasePtr(),
13677 LN00->getMemoryVT(),
13678 LN00->getMemOperand());
13680 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13681 ExtLoad, DAG.getConstant(Mask, DL, VT));
13682 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13683 bool NoReplaceTruncAnd = !N0.hasOneUse();
13684 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13685 CombineTo(N, And);
13686 // If N0 has multiple uses, change other uses as well.
13687 if (NoReplaceTruncAnd) {
13688 SDValue TruncAnd =
13690 CombineTo(N0.getNode(), TruncAnd);
13691 }
13692 if (NoReplaceTrunc) {
13693 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13694 } else {
13695 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13696 LN00->getValueType(0), ExtLoad);
13697 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13698 }
13699 return SDValue(N,0); // Return N so it doesn't get rechecked!
13700 }
13701 }
13702 }
13703
13704 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13705 return V;
13706
13707 if (SDValue V = foldSextSetcc(N))
13708 return V;
13709
13710 // fold (sext x) -> (zext x) if the sign bit is known zero.
13711 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13712 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13713 DAG.SignBitIsZero(N0)) {
13715 Flags.setNonNeg(true);
13716 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13717 }
13718
13719 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13720 return NewVSel;
13721
13722 // Eliminate this sign extend by doing a negation in the destination type:
13723 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13724 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13728 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13729 return DAG.getNegative(Zext, DL, VT);
13730 }
13731 // Eliminate this sign extend by doing a decrement in the destination type:
13732 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13733 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13737 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13738 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13739 }
13740
13741 // fold sext (not i1 X) -> add (zext i1 X), -1
13742 // TODO: This could be extended to handle bool vectors.
13743 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13744 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13745 TLI.isOperationLegal(ISD::ADD, VT)))) {
13746 // If we can eliminate the 'not', the sext form should be better
13747 if (SDValue NewXor = visitXOR(N0.getNode())) {
13748 // Returning N0 is a form of in-visit replacement that may have
13749 // invalidated N0.
13750 if (NewXor.getNode() == N0.getNode()) {
13751 // Return SDValue here as the xor should have already been replaced in
13752 // this sext.
13753 return SDValue();
13754 }
13755
13756 // Return a new sext with the new xor.
13757 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13758 }
13759
13760 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13761 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13762 }
13763
13764 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13765 return Res;
13766
13767 return SDValue();
13768}
13769
13770/// Given an extending node with a pop-count operand, if the target does not
13771/// support a pop-count in the narrow source type but does support it in the
13772/// destination type, widen the pop-count to the destination type.
13773static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13774 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13775 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13776
13777 SDValue CtPop = Extend->getOperand(0);
13778 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13779 return SDValue();
13780
13781 EVT VT = Extend->getValueType(0);
13782 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13785 return SDValue();
13786
13787 // zext (ctpop X) --> ctpop (zext X)
13788 SDLoc DL(Extend);
13789 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13790 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13791}
13792
13793// If we have (zext (abs X)) where X is a type that will be promoted by type
13794// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13795static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13796 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13797
13798 EVT VT = Extend->getValueType(0);
13799 if (VT.isVector())
13800 return SDValue();
13801
13802 SDValue Abs = Extend->getOperand(0);
13803 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13804 return SDValue();
13805
13806 EVT AbsVT = Abs.getValueType();
13807 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13808 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13810 return SDValue();
13811
13812 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13813
13814 SDValue SExt =
13815 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13816 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13817 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13818}
13819
13820SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13821 SDValue N0 = N->getOperand(0);
13822 EVT VT = N->getValueType(0);
13823 SDLoc DL(N);
13824
13825 if (VT.isVector())
13826 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13827 return FoldedVOp;
13828
13829 // zext(undef) = 0
13830 if (N0.isUndef())
13831 return DAG.getConstant(0, DL, VT);
13832
13833 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13834 return Res;
13835
13836 // fold (zext (zext x)) -> (zext x)
13837 // fold (zext (aext x)) -> (zext x)
13838 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13840 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13841 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13842 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13843 }
13844
13845 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13846 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13850 N0.getOperand(0));
13851
13852 // fold (zext (truncate x)) -> (zext x) or
13853 // (zext (truncate x)) -> (truncate x)
13854 // This is valid when the truncated bits of x are already zero.
13855 SDValue Op;
13856 KnownBits Known;
13857 if (isTruncateOf(DAG, N0, Op, Known)) {
13858 APInt TruncatedBits =
13859 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13860 APInt(Op.getScalarValueSizeInBits(), 0) :
13861 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13863 std::min(Op.getScalarValueSizeInBits(),
13864 VT.getScalarSizeInBits()));
13865 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13866 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13867 DAG.salvageDebugInfo(*N0.getNode());
13868
13869 return ZExtOrTrunc;
13870 }
13871 }
13872
13873 // fold (zext (truncate x)) -> (and x, mask)
13874 if (N0.getOpcode() == ISD::TRUNCATE) {
13875 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13876 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13877 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13878 SDNode *oye = N0.getOperand(0).getNode();
13879 if (NarrowLoad.getNode() != N0.getNode()) {
13880 CombineTo(N0.getNode(), NarrowLoad);
13881 // CombineTo deleted the truncate, if needed, but not what's under it.
13882 AddToWorklist(oye);
13883 }
13884 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13885 }
13886
13887 EVT SrcVT = N0.getOperand(0).getValueType();
13888 EVT MinVT = N0.getValueType();
13889
13890 if (N->getFlags().hasNonNeg()) {
13891 SDValue Op = N0.getOperand(0);
13892 unsigned OpBits = SrcVT.getScalarSizeInBits();
13893 unsigned MidBits = MinVT.getScalarSizeInBits();
13894 unsigned DestBits = VT.getScalarSizeInBits();
13895 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13896
13897 if (OpBits == DestBits) {
13898 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13899 // bits, it is already ready.
13900 if (NumSignBits > DestBits - MidBits)
13901 return Op;
13902 } else if (OpBits < DestBits) {
13903 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13904 // bits, just sext from i32.
13905 // FIXME: This can probably be ZERO_EXTEND nneg?
13906 if (NumSignBits > OpBits - MidBits)
13907 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13908 } else {
13909 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13910 // bits, just truncate to i32.
13911 if (NumSignBits > OpBits - MidBits)
13912 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13913 }
13914 }
13915
13916 // Try to mask before the extension to avoid having to generate a larger mask,
13917 // possibly over several sub-vectors.
13918 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13919 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13921 SDValue Op = N0.getOperand(0);
13922 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13923 AddToWorklist(Op.getNode());
13924 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13925 // Transfer the debug info; the new node is equivalent to N0.
13926 DAG.transferDbgValues(N0, ZExtOrTrunc);
13927 return ZExtOrTrunc;
13928 }
13929 }
13930
13931 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13932 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13933 AddToWorklist(Op.getNode());
13934 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13935 // We may safely transfer the debug info describing the truncate node over
13936 // to the equivalent and operation.
13937 DAG.transferDbgValues(N0, And);
13938 return And;
13939 }
13940 }
13941
13942 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13943 // if either of the casts is not free.
13944 if (N0.getOpcode() == ISD::AND &&
13945 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13946 N0.getOperand(1).getOpcode() == ISD::Constant &&
13947 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13948 !TLI.isZExtFree(N0.getValueType(), VT))) {
13949 SDValue X = N0.getOperand(0).getOperand(0);
13950 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13952 return DAG.getNode(ISD::AND, DL, VT,
13953 X, DAG.getConstant(Mask, DL, VT));
13954 }
13955
13956 // Try to simplify (zext (load x)).
13957 if (SDValue foldedExt = tryToFoldExtOfLoad(
13958 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13959 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13960 return foldedExt;
13961
13962 if (SDValue foldedExt =
13963 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13965 return foldedExt;
13966
13967 // fold (zext (load x)) to multiple smaller zextloads.
13968 // Only on illegal but splittable vectors.
13969 if (SDValue ExtLoad = CombineExtLoad(N))
13970 return ExtLoad;
13971
13972 // Try to simplify (zext (atomic_load x)).
13973 if (SDValue foldedExt =
13974 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13975 return foldedExt;
13976
13977 // fold (zext (and/or/xor (load x), cst)) ->
13978 // (and/or/xor (zextload x), (zext cst))
13979 // Unless (and (load x) cst) will match as a zextload already and has
13980 // additional users, or the zext is already free.
13981 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13982 isa<LoadSDNode>(N0.getOperand(0)) &&
13983 N0.getOperand(1).getOpcode() == ISD::Constant &&
13984 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13985 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13986 EVT MemVT = LN00->getMemoryVT();
13987 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13988 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13989 bool DoXform = true;
13991 if (!N0.hasOneUse()) {
13992 if (N0.getOpcode() == ISD::AND) {
13993 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13994 EVT LoadResultTy = AndC->getValueType(0);
13995 EVT ExtVT;
13996 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13997 DoXform = false;
13998 }
13999 }
14000 if (DoXform)
14001 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14002 ISD::ZERO_EXTEND, SetCCs, TLI);
14003 if (DoXform) {
14004 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14005 LN00->getChain(), LN00->getBasePtr(),
14006 LN00->getMemoryVT(),
14007 LN00->getMemOperand());
14009 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14010 ExtLoad, DAG.getConstant(Mask, DL, VT));
14011 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14012 bool NoReplaceTruncAnd = !N0.hasOneUse();
14013 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14014 CombineTo(N, And);
14015 // If N0 has multiple uses, change other uses as well.
14016 if (NoReplaceTruncAnd) {
14017 SDValue TruncAnd =
14019 CombineTo(N0.getNode(), TruncAnd);
14020 }
14021 if (NoReplaceTrunc) {
14022 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14023 } else {
14024 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14025 LN00->getValueType(0), ExtLoad);
14026 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14027 }
14028 return SDValue(N,0); // Return N so it doesn't get rechecked!
14029 }
14030 }
14031 }
14032
14033 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14034 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14035 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14036 return ZExtLoad;
14037
14038 // Try to simplify (zext (zextload x)).
14039 if (SDValue foldedExt = tryToFoldExtOfExtload(
14040 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14041 return foldedExt;
14042
14043 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14044 return V;
14045
14046 if (N0.getOpcode() == ISD::SETCC) {
14047 // Propagate fast-math-flags.
14048 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14049
14050 // Only do this before legalize for now.
14051 if (!LegalOperations && VT.isVector() &&
14052 N0.getValueType().getVectorElementType() == MVT::i1) {
14053 EVT N00VT = N0.getOperand(0).getValueType();
14054 if (getSetCCResultType(N00VT) == N0.getValueType())
14055 return SDValue();
14056
14057 // We know that the # elements of the results is the same as the #
14058 // elements of the compare (and the # elements of the compare result for
14059 // that matter). Check to see that they are the same size. If so, we know
14060 // that the element size of the sext'd result matches the element size of
14061 // the compare operands.
14062 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14063 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14064 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14065 N0.getOperand(1), N0.getOperand(2));
14066 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14067 }
14068
14069 // If the desired elements are smaller or larger than the source
14070 // elements we can use a matching integer vector type and then
14071 // truncate/any extend followed by zext_in_reg.
14072 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14073 SDValue VsetCC =
14074 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14075 N0.getOperand(1), N0.getOperand(2));
14076 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14077 N0.getValueType());
14078 }
14079
14080 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14081 EVT N0VT = N0.getValueType();
14082 EVT N00VT = N0.getOperand(0).getValueType();
14083 if (SDValue SCC = SimplifySelectCC(
14084 DL, N0.getOperand(0), N0.getOperand(1),
14085 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14086 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14087 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14088 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14089 }
14090
14091 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14092 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14093 !TLI.isZExtFree(N0, VT)) {
14094 SDValue ShVal = N0.getOperand(0);
14095 SDValue ShAmt = N0.getOperand(1);
14096 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14097 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14098 if (N0.getOpcode() == ISD::SHL) {
14099 // If the original shl may be shifting out bits, do not perform this
14100 // transformation.
14101 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14102 ShVal.getOperand(0).getValueSizeInBits();
14103 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14104 // If the shift is too large, then see if we can deduce that the
14105 // shift is safe anyway.
14106 // Create a mask that has ones for the bits being shifted out.
14107 APInt ShiftOutMask =
14109 ShAmtC->getAPIntValue().getZExtValue());
14110
14111 // Check if the bits being shifted out are known to be zero.
14112 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
14113 return SDValue();
14114 }
14115 }
14116
14117 // Ensure that the shift amount is wide enough for the shifted value.
14118 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14119 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14120
14121 return DAG.getNode(N0.getOpcode(), DL, VT,
14122 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14123 }
14124 }
14125 }
14126
14127 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14128 return NewVSel;
14129
14130 if (SDValue NewCtPop = widenCtPop(N, DAG))
14131 return NewCtPop;
14132
14133 if (SDValue V = widenAbs(N, DAG))
14134 return V;
14135
14136 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14137 return Res;
14138
14139 // CSE zext nneg with sext if the zext is not free.
14140 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14141 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14142 if (CSENode)
14143 return SDValue(CSENode, 0);
14144 }
14145
14146 return SDValue();
14147}
14148
14149SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14150 SDValue N0 = N->getOperand(0);
14151 EVT VT = N->getValueType(0);
14152 SDLoc DL(N);
14153
14154 // aext(undef) = undef
14155 if (N0.isUndef())
14156 return DAG.getUNDEF(VT);
14157
14158 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14159 return Res;
14160
14161 // fold (aext (aext x)) -> (aext x)
14162 // fold (aext (zext x)) -> (zext x)
14163 // fold (aext (sext x)) -> (sext x)
14164 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14165 N0.getOpcode() == ISD::SIGN_EXTEND) {
14167 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14168 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14169 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14170 }
14171
14172 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14173 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14174 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14178 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14179
14180 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14181 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14182 if (N0.getOpcode() == ISD::TRUNCATE) {
14183 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14184 SDNode *oye = N0.getOperand(0).getNode();
14185 if (NarrowLoad.getNode() != N0.getNode()) {
14186 CombineTo(N0.getNode(), NarrowLoad);
14187 // CombineTo deleted the truncate, if needed, but not what's under it.
14188 AddToWorklist(oye);
14189 }
14190 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14191 }
14192 }
14193
14194 // fold (aext (truncate x))
14195 if (N0.getOpcode() == ISD::TRUNCATE)
14196 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14197
14198 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14199 // if the trunc is not free.
14200 if (N0.getOpcode() == ISD::AND &&
14201 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14202 N0.getOperand(1).getOpcode() == ISD::Constant &&
14203 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14204 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14205 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14206 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14207 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14208 }
14209
14210 // fold (aext (load x)) -> (aext (truncate (extload x)))
14211 // None of the supported targets knows how to perform load and any_ext
14212 // on vectors in one instruction, so attempt to fold to zext instead.
14213 if (VT.isVector()) {
14214 // Try to simplify (zext (load x)).
14215 if (SDValue foldedExt =
14216 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14218 return foldedExt;
14219 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14221 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14222 bool DoXform = true;
14224 if (!N0.hasOneUse())
14225 DoXform =
14226 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14227 if (DoXform) {
14228 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14229 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14230 LN0->getBasePtr(), N0.getValueType(),
14231 LN0->getMemOperand());
14232 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14233 // If the load value is used only by N, replace it via CombineTo N.
14234 bool NoReplaceTrunc = N0.hasOneUse();
14235 CombineTo(N, ExtLoad);
14236 if (NoReplaceTrunc) {
14237 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14238 recursivelyDeleteUnusedNodes(LN0);
14239 } else {
14240 SDValue Trunc =
14241 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14242 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14243 }
14244 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14245 }
14246 }
14247
14248 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14249 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14250 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14251 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14252 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14253 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14254 ISD::LoadExtType ExtType = LN0->getExtensionType();
14255 EVT MemVT = LN0->getMemoryVT();
14256 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14257 SDValue ExtLoad =
14258 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14259 MemVT, LN0->getMemOperand());
14260 CombineTo(N, ExtLoad);
14261 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14262 recursivelyDeleteUnusedNodes(LN0);
14263 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14264 }
14265 }
14266
14267 if (N0.getOpcode() == ISD::SETCC) {
14268 // Propagate fast-math-flags.
14269 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14270
14271 // For vectors:
14272 // aext(setcc) -> vsetcc
14273 // aext(setcc) -> truncate(vsetcc)
14274 // aext(setcc) -> aext(vsetcc)
14275 // Only do this before legalize for now.
14276 if (VT.isVector() && !LegalOperations) {
14277 EVT N00VT = N0.getOperand(0).getValueType();
14278 if (getSetCCResultType(N00VT) == N0.getValueType())
14279 return SDValue();
14280
14281 // We know that the # elements of the results is the same as the
14282 // # elements of the compare (and the # elements of the compare result
14283 // for that matter). Check to see that they are the same size. If so,
14284 // we know that the element size of the sext'd result matches the
14285 // element size of the compare operands.
14286 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14287 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14288 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14289
14290 // If the desired elements are smaller or larger than the source
14291 // elements we can use a matching integer vector type and then
14292 // truncate/any extend
14293 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14294 SDValue VsetCC = DAG.getSetCC(
14295 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14296 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14297 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14298 }
14299
14300 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14301 if (SDValue SCC = SimplifySelectCC(
14302 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14303 DAG.getConstant(0, DL, VT),
14304 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14305 return SCC;
14306 }
14307
14308 if (SDValue NewCtPop = widenCtPop(N, DAG))
14309 return NewCtPop;
14310
14311 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14312 return Res;
14313
14314 return SDValue();
14315}
14316
14317SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14318 unsigned Opcode = N->getOpcode();
14319 SDValue N0 = N->getOperand(0);
14320 SDValue N1 = N->getOperand(1);
14321 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14322
14323 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14324 if (N0.getOpcode() == Opcode &&
14325 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14326 return N0;
14327
14328 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14329 N0.getOperand(0).getOpcode() == Opcode) {
14330 // We have an assert, truncate, assert sandwich. Make one stronger assert
14331 // by asserting on the smallest asserted type to the larger source type.
14332 // This eliminates the later assert:
14333 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14334 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14335 SDLoc DL(N);
14336 SDValue BigA = N0.getOperand(0);
14337 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14338 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14339 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14340 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14341 BigA.getOperand(0), MinAssertVTVal);
14342 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14343 }
14344
14345 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14346 // than X. Just move the AssertZext in front of the truncate and drop the
14347 // AssertSExt.
14348 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14350 Opcode == ISD::AssertZext) {
14351 SDValue BigA = N0.getOperand(0);
14352 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14353 if (AssertVT.bitsLT(BigA_AssertVT)) {
14354 SDLoc DL(N);
14355 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14356 BigA.getOperand(0), N1);
14357 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14358 }
14359 }
14360
14361 return SDValue();
14362}
14363
14364SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14365 SDLoc DL(N);
14366
14367 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14368 SDValue N0 = N->getOperand(0);
14369
14370 // Fold (assertalign (assertalign x, AL0), AL1) ->
14371 // (assertalign x, max(AL0, AL1))
14372 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14373 return DAG.getAssertAlign(DL, N0.getOperand(0),
14374 std::max(AL, AAN->getAlign()));
14375
14376 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14377 // this assert down to source operands so that those arithmetic ops could be
14378 // exposed to the DAG combining.
14379 switch (N0.getOpcode()) {
14380 default:
14381 break;
14382 case ISD::ADD:
14383 case ISD::SUB: {
14384 unsigned AlignShift = Log2(AL);
14385 SDValue LHS = N0.getOperand(0);
14386 SDValue RHS = N0.getOperand(1);
14387 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14388 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14389 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14390 if (LHSAlignShift < AlignShift)
14391 LHS = DAG.getAssertAlign(DL, LHS, AL);
14392 if (RHSAlignShift < AlignShift)
14393 RHS = DAG.getAssertAlign(DL, RHS, AL);
14394 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14395 }
14396 break;
14397 }
14398 }
14399
14400 return SDValue();
14401}
14402
14403/// If the result of a load is shifted/masked/truncated to an effectively
14404/// narrower type, try to transform the load to a narrower type and/or
14405/// use an extending load.
14406SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14407 unsigned Opc = N->getOpcode();
14408
14410 SDValue N0 = N->getOperand(0);
14411 EVT VT = N->getValueType(0);
14412 EVT ExtVT = VT;
14413
14414 // This transformation isn't valid for vector loads.
14415 if (VT.isVector())
14416 return SDValue();
14417
14418 // The ShAmt variable is used to indicate that we've consumed a right
14419 // shift. I.e. we want to narrow the width of the load by skipping to load the
14420 // ShAmt least significant bits.
14421 unsigned ShAmt = 0;
14422 // A special case is when the least significant bits from the load are masked
14423 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14424 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14425 // the result.
14426 unsigned ShiftedOffset = 0;
14427 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14428 // extended to VT.
14429 if (Opc == ISD::SIGN_EXTEND_INREG) {
14430 ExtType = ISD::SEXTLOAD;
14431 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14432 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14433 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14434 // value, or it may be shifting a higher subword, half or byte into the
14435 // lowest bits.
14436
14437 // Only handle shift with constant shift amount, and the shiftee must be a
14438 // load.
14439 auto *LN = dyn_cast<LoadSDNode>(N0);
14440 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14441 if (!N1C || !LN)
14442 return SDValue();
14443 // If the shift amount is larger than the memory type then we're not
14444 // accessing any of the loaded bytes.
14445 ShAmt = N1C->getZExtValue();
14446 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14447 if (MemoryWidth <= ShAmt)
14448 return SDValue();
14449 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14450 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14451 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14452 // If original load is a SEXTLOAD then we can't simply replace it by a
14453 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14454 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14455 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14456 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14457 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14458 LN->getExtensionType() != ExtType)
14459 return SDValue();
14460 } else if (Opc == ISD::AND) {
14461 // An AND with a constant mask is the same as a truncate + zero-extend.
14462 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14463 if (!AndC)
14464 return SDValue();
14465
14466 const APInt &Mask = AndC->getAPIntValue();
14467 unsigned ActiveBits = 0;
14468 if (Mask.isMask()) {
14469 ActiveBits = Mask.countr_one();
14470 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14471 ShiftedOffset = ShAmt;
14472 } else {
14473 return SDValue();
14474 }
14475
14476 ExtType = ISD::ZEXTLOAD;
14477 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14478 }
14479
14480 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14481 // a right shift. Here we redo some of those checks, to possibly adjust the
14482 // ExtVT even further based on "a masking AND". We could also end up here for
14483 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14484 // need to be done here as well.
14485 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14486 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14487 // Bail out when the SRL has more than one use. This is done for historical
14488 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14489 // check below? And maybe it could be non-profitable to do the transform in
14490 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14491 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14492 if (!SRL.hasOneUse())
14493 return SDValue();
14494
14495 // Only handle shift with constant shift amount, and the shiftee must be a
14496 // load.
14497 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14498 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14499 if (!SRL1C || !LN)
14500 return SDValue();
14501
14502 // If the shift amount is larger than the input type then we're not
14503 // accessing any of the loaded bytes. If the load was a zextload/extload
14504 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14505 ShAmt = SRL1C->getZExtValue();
14506 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14507 if (ShAmt >= MemoryWidth)
14508 return SDValue();
14509
14510 // Because a SRL must be assumed to *need* to zero-extend the high bits
14511 // (as opposed to anyext the high bits), we can't combine the zextload
14512 // lowering of SRL and an sextload.
14513 if (LN->getExtensionType() == ISD::SEXTLOAD)
14514 return SDValue();
14515
14516 // Avoid reading outside the memory accessed by the original load (could
14517 // happened if we only adjust the load base pointer by ShAmt). Instead we
14518 // try to narrow the load even further. The typical scenario here is:
14519 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14520 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14521 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14522 // Don't replace sextload by zextload.
14523 if (ExtType == ISD::SEXTLOAD)
14524 return SDValue();
14525 // Narrow the load.
14526 ExtType = ISD::ZEXTLOAD;
14527 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14528 }
14529
14530 // If the SRL is only used by a masking AND, we may be able to adjust
14531 // the ExtVT to make the AND redundant.
14532 SDNode *Mask = *(SRL->use_begin());
14533 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14534 isa<ConstantSDNode>(Mask->getOperand(1))) {
14535 unsigned Offset, ActiveBits;
14536 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14537 if (ShiftMask.isMask()) {
14538 EVT MaskedVT =
14539 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14540 // If the mask is smaller, recompute the type.
14541 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14542 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14543 ExtVT = MaskedVT;
14544 } else if (ExtType == ISD::ZEXTLOAD &&
14545 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14546 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14547 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14548 // If the mask is shifted we can use a narrower load and a shl to insert
14549 // the trailing zeros.
14550 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14551 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14552 ExtVT = MaskedVT;
14553 ShAmt = Offset + ShAmt;
14554 ShiftedOffset = Offset;
14555 }
14556 }
14557 }
14558
14559 N0 = SRL.getOperand(0);
14560 }
14561
14562 // If the load is shifted left (and the result isn't shifted back right), we
14563 // can fold a truncate through the shift. The typical scenario is that N
14564 // points at a TRUNCATE here so the attempted fold is:
14565 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14566 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14567 unsigned ShLeftAmt = 0;
14568 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14569 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14570 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14571 ShLeftAmt = N01->getZExtValue();
14572 N0 = N0.getOperand(0);
14573 }
14574 }
14575
14576 // If we haven't found a load, we can't narrow it.
14577 if (!isa<LoadSDNode>(N0))
14578 return SDValue();
14579
14580 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14581 // Reducing the width of a volatile load is illegal. For atomics, we may be
14582 // able to reduce the width provided we never widen again. (see D66309)
14583 if (!LN0->isSimple() ||
14584 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14585 return SDValue();
14586
14587 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14588 unsigned LVTStoreBits =
14590 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14591 return LVTStoreBits - EVTStoreBits - ShAmt;
14592 };
14593
14594 // We need to adjust the pointer to the load by ShAmt bits in order to load
14595 // the correct bytes.
14596 unsigned PtrAdjustmentInBits =
14597 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14598
14599 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14600 SDLoc DL(LN0);
14601 // The original load itself didn't wrap, so an offset within it doesn't.
14603 Flags.setNoUnsignedWrap(true);
14604 SDValue NewPtr = DAG.getMemBasePlusOffset(
14605 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14606 AddToWorklist(NewPtr.getNode());
14607
14608 SDValue Load;
14609 if (ExtType == ISD::NON_EXTLOAD)
14610 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14611 LN0->getPointerInfo().getWithOffset(PtrOff),
14612 LN0->getOriginalAlign(),
14613 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14614 else
14615 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14616 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14617 LN0->getOriginalAlign(),
14618 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14619
14620 // Replace the old load's chain with the new load's chain.
14621 WorklistRemover DeadNodes(*this);
14622 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14623
14624 // Shift the result left, if we've swallowed a left shift.
14626 if (ShLeftAmt != 0) {
14627 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14628 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14629 ShImmTy = VT;
14630 // If the shift amount is as large as the result size (but, presumably,
14631 // no larger than the source) then the useful bits of the result are
14632 // zero; we can't simply return the shortened shift, because the result
14633 // of that operation is undefined.
14634 if (ShLeftAmt >= VT.getScalarSizeInBits())
14635 Result = DAG.getConstant(0, DL, VT);
14636 else
14637 Result = DAG.getNode(ISD::SHL, DL, VT,
14638 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14639 }
14640
14641 if (ShiftedOffset != 0) {
14642 // We're using a shifted mask, so the load now has an offset. This means
14643 // that data has been loaded into the lower bytes than it would have been
14644 // before, so we need to shl the loaded data into the correct position in the
14645 // register.
14646 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14647 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14648 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14649 }
14650
14651 // Return the new loaded value.
14652 return Result;
14653}
14654
14655SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14656 SDValue N0 = N->getOperand(0);
14657 SDValue N1 = N->getOperand(1);
14658 EVT VT = N->getValueType(0);
14659 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14660 unsigned VTBits = VT.getScalarSizeInBits();
14661 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14662
14663 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14664 if (N0.isUndef())
14665 return DAG.getConstant(0, SDLoc(N), VT);
14666
14667 // fold (sext_in_reg c1) -> c1
14669 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14670
14671 // If the input is already sign extended, just drop the extension.
14672 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14673 return N0;
14674
14675 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14676 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14677 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14678 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14679 N1);
14680
14681 // fold (sext_in_reg (sext x)) -> (sext x)
14682 // fold (sext_in_reg (aext x)) -> (sext x)
14683 // if x is small enough or if we know that x has more than 1 sign bit and the
14684 // sign_extend_inreg is extending from one of them.
14685 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14686 SDValue N00 = N0.getOperand(0);
14687 unsigned N00Bits = N00.getScalarValueSizeInBits();
14688 if ((N00Bits <= ExtVTBits ||
14689 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14690 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14691 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14692 }
14693
14694 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14695 // if x is small enough or if we know that x has more than 1 sign bit and the
14696 // sign_extend_inreg is extending from one of them.
14698 SDValue N00 = N0.getOperand(0);
14699 unsigned N00Bits = N00.getScalarValueSizeInBits();
14700 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14701 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14702 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14703 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14704 if ((N00Bits == ExtVTBits ||
14705 (!IsZext && (N00Bits < ExtVTBits ||
14706 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14707 (!LegalOperations ||
14709 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14710 }
14711
14712 // fold (sext_in_reg (zext x)) -> (sext x)
14713 // iff we are extending the source sign bit.
14714 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14715 SDValue N00 = N0.getOperand(0);
14716 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14717 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14718 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14719 }
14720
14721 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14722 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14723 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14724
14725 // fold operands of sext_in_reg based on knowledge that the top bits are not
14726 // demanded.
14728 return SDValue(N, 0);
14729
14730 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14731 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14732 if (SDValue NarrowLoad = reduceLoadWidth(N))
14733 return NarrowLoad;
14734
14735 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14736 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14737 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14738 if (N0.getOpcode() == ISD::SRL) {
14739 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14740 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14741 // We can turn this into an SRA iff the input to the SRL is already sign
14742 // extended enough.
14743 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14744 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14745 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14746 N0.getOperand(1));
14747 }
14748 }
14749
14750 // fold (sext_inreg (extload x)) -> (sextload x)
14751 // If sextload is not supported by target, we can only do the combine when
14752 // load has one use. Doing otherwise can block folding the extload with other
14753 // extends that the target does support.
14754 if (ISD::isEXTLoad(N0.getNode()) &&
14756 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14757 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14758 N0.hasOneUse()) ||
14759 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14760 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14761 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14762 LN0->getChain(),
14763 LN0->getBasePtr(), ExtVT,
14764 LN0->getMemOperand());
14765 CombineTo(N, ExtLoad);
14766 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14767 AddToWorklist(ExtLoad.getNode());
14768 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14769 }
14770
14771 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14773 N0.hasOneUse() &&
14774 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14775 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14776 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14777 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14778 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14779 LN0->getChain(),
14780 LN0->getBasePtr(), ExtVT,
14781 LN0->getMemOperand());
14782 CombineTo(N, ExtLoad);
14783 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14784 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14785 }
14786
14787 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14788 // ignore it if the masked load is already sign extended
14789 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14790 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14791 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14792 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14793 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14794 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14795 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14796 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14797 CombineTo(N, ExtMaskedLoad);
14798 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14799 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14800 }
14801 }
14802
14803 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14804 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14805 if (SDValue(GN0, 0).hasOneUse() &&
14806 ExtVT == GN0->getMemoryVT() &&
14808 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14809 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14810
14811 SDValue ExtLoad = DAG.getMaskedGather(
14812 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14813 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14814
14815 CombineTo(N, ExtLoad);
14816 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14817 AddToWorklist(ExtLoad.getNode());
14818 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14819 }
14820 }
14821
14822 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14823 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14824 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14825 N0.getOperand(1), false))
14826 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14827 }
14828
14829 // Fold (iM_signext_inreg
14830 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14831 // from iN)
14832 // -> (extract_subvector (signext iN_v to iM))
14833 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14835 SDValue InnerExt = N0.getOperand(0);
14836 EVT InnerExtVT = InnerExt->getValueType(0);
14837 SDValue Extendee = InnerExt->getOperand(0);
14838
14839 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14840 (!LegalOperations ||
14841 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14842 SDValue SignExtExtendee =
14843 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14844 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14845 N0.getOperand(1));
14846 }
14847 }
14848
14849 return SDValue();
14850}
14851
14853 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14854 bool LegalOperations) {
14855 unsigned InregOpcode = N->getOpcode();
14856 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14857
14858 SDValue Src = N->getOperand(0);
14859 EVT VT = N->getValueType(0);
14860 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14861 Src.getValueType().getVectorElementType(),
14863
14864 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14865 "Expected EXTEND_VECTOR_INREG dag node in input!");
14866
14867 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14868 // FIXME: one-use check may be overly restrictive
14869 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14870 return SDValue();
14871
14872 // Profitability check: we must be extending exactly one of it's operands.
14873 // FIXME: this is probably overly restrictive.
14874 Src = Src.getOperand(0);
14875 if (Src.getValueType() != SrcVT)
14876 return SDValue();
14877
14878 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14879 return SDValue();
14880
14881 return DAG.getNode(Opcode, DL, VT, Src);
14882}
14883
14884SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14885 SDValue N0 = N->getOperand(0);
14886 EVT VT = N->getValueType(0);
14887 SDLoc DL(N);
14888
14889 if (N0.isUndef()) {
14890 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14891 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14892 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14893 ? DAG.getUNDEF(VT)
14894 : DAG.getConstant(0, DL, VT);
14895 }
14896
14897 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14898 return Res;
14899
14901 return SDValue(N, 0);
14902
14904 LegalOperations))
14905 return R;
14906
14907 return SDValue();
14908}
14909
14910SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14911 SDValue N0 = N->getOperand(0);
14912 EVT VT = N->getValueType(0);
14913 EVT SrcVT = N0.getValueType();
14914 bool isLE = DAG.getDataLayout().isLittleEndian();
14915 SDLoc DL(N);
14916
14917 // trunc(undef) = undef
14918 if (N0.isUndef())
14919 return DAG.getUNDEF(VT);
14920
14921 // fold (truncate (truncate x)) -> (truncate x)
14922 if (N0.getOpcode() == ISD::TRUNCATE)
14923 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14924
14925 // fold (truncate c1) -> c1
14926 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14927 return C;
14928
14929 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14930 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14931 N0.getOpcode() == ISD::SIGN_EXTEND ||
14932 N0.getOpcode() == ISD::ANY_EXTEND) {
14933 // if the source is smaller than the dest, we still need an extend.
14934 if (N0.getOperand(0).getValueType().bitsLT(VT))
14935 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14936 // if the source is larger than the dest, than we just need the truncate.
14937 if (N0.getOperand(0).getValueType().bitsGT(VT))
14938 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14939 // if the source and dest are the same type, we can drop both the extend
14940 // and the truncate.
14941 return N0.getOperand(0);
14942 }
14943
14944 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14945 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14946 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14947 N0.hasOneUse()) {
14948 SDValue X = N0.getOperand(0);
14949 SDValue ExtVal = N0.getOperand(1);
14950 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14951 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14952 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14953 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14954 }
14955 }
14956
14957 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14958 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14959 return SDValue();
14960
14961 // Fold extract-and-trunc into a narrow extract. For example:
14962 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14963 // i32 y = TRUNCATE(i64 x)
14964 // -- becomes --
14965 // v16i8 b = BITCAST (v2i64 val)
14966 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14967 //
14968 // Note: We only run this optimization after type legalization (which often
14969 // creates this pattern) and before operation legalization after which
14970 // we need to be more careful about the vector instructions that we generate.
14971 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14972 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14973 EVT VecTy = N0.getOperand(0).getValueType();
14974 EVT ExTy = N0.getValueType();
14975 EVT TrTy = N->getValueType(0);
14976
14977 auto EltCnt = VecTy.getVectorElementCount();
14978 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14979 auto NewEltCnt = EltCnt * SizeRatio;
14980
14981 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14982 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14983
14984 SDValue EltNo = N0->getOperand(1);
14985 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14986 int Elt = EltNo->getAsZExtVal();
14987 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14988 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14989 DAG.getBitcast(NVT, N0.getOperand(0)),
14991 }
14992 }
14993
14994 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14995 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14996 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14997 TLI.isTruncateFree(SrcVT, VT)) {
14998 SDLoc SL(N0);
14999 SDValue Cond = N0.getOperand(0);
15000 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15001 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15002 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15003 }
15004 }
15005
15006 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15007 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15008 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15009 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15010 SDValue Amt = N0.getOperand(1);
15011 KnownBits Known = DAG.computeKnownBits(Amt);
15012 unsigned Size = VT.getScalarSizeInBits();
15013 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15014 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15015 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15016 if (AmtVT != Amt.getValueType()) {
15017 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15018 AddToWorklist(Amt.getNode());
15019 }
15020 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15021 }
15022 }
15023
15024 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15025 return V;
15026
15027 if (SDValue ABD = foldABSToABD(N, DL))
15028 return ABD;
15029
15030 // Attempt to pre-truncate BUILD_VECTOR sources.
15031 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15032 N0.hasOneUse() &&
15033 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15034 // Avoid creating illegal types if running after type legalizer.
15035 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15036 EVT SVT = VT.getScalarType();
15037 SmallVector<SDValue, 8> TruncOps;
15038 for (const SDValue &Op : N0->op_values()) {
15039 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15040 TruncOps.push_back(TruncOp);
15041 }
15042 return DAG.getBuildVector(VT, DL, TruncOps);
15043 }
15044
15045 // trunc (splat_vector x) -> splat_vector (trunc x)
15046 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15047 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15048 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15049 EVT SVT = VT.getScalarType();
15050 return DAG.getSplatVector(
15051 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15052 }
15053
15054 // Fold a series of buildvector, bitcast, and truncate if possible.
15055 // For example fold
15056 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15057 // (2xi32 (buildvector x, y)).
15058 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15059 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15061 N0.getOperand(0).hasOneUse()) {
15062 SDValue BuildVect = N0.getOperand(0);
15063 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15064 EVT TruncVecEltTy = VT.getVectorElementType();
15065
15066 // Check that the element types match.
15067 if (BuildVectEltTy == TruncVecEltTy) {
15068 // Now we only need to compute the offset of the truncated elements.
15069 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15070 unsigned TruncVecNumElts = VT.getVectorNumElements();
15071 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15072
15073 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15074 "Invalid number of elements");
15075
15077 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
15078 Opnds.push_back(BuildVect.getOperand(i));
15079
15080 return DAG.getBuildVector(VT, DL, Opnds);
15081 }
15082 }
15083
15084 // fold (truncate (load x)) -> (smaller load x)
15085 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15086 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15087 if (SDValue Reduced = reduceLoadWidth(N))
15088 return Reduced;
15089
15090 // Handle the case where the truncated result is at least as wide as the
15091 // loaded type.
15092 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15093 auto *LN0 = cast<LoadSDNode>(N0);
15094 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15095 SDValue NewLoad = DAG.getExtLoad(
15096 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15097 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15098 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15099 return NewLoad;
15100 }
15101 }
15102 }
15103
15104 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15105 // where ... are all 'undef'.
15106 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15108 SDValue V;
15109 unsigned Idx = 0;
15110 unsigned NumDefs = 0;
15111
15112 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15113 SDValue X = N0.getOperand(i);
15114 if (!X.isUndef()) {
15115 V = X;
15116 Idx = i;
15117 NumDefs++;
15118 }
15119 // Stop if more than one members are non-undef.
15120 if (NumDefs > 1)
15121 break;
15122
15125 X.getValueType().getVectorElementCount()));
15126 }
15127
15128 if (NumDefs == 0)
15129 return DAG.getUNDEF(VT);
15130
15131 if (NumDefs == 1) {
15132 assert(V.getNode() && "The single defined operand is empty!");
15134 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15135 if (i != Idx) {
15136 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15137 continue;
15138 }
15139 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15140 AddToWorklist(NV.getNode());
15141 Opnds.push_back(NV);
15142 }
15143 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15144 }
15145 }
15146
15147 // Fold truncate of a bitcast of a vector to an extract of the low vector
15148 // element.
15149 //
15150 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15151 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15152 SDValue VecSrc = N0.getOperand(0);
15153 EVT VecSrcVT = VecSrc.getValueType();
15154 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15155 (!LegalOperations ||
15156 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15157 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15158 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15160 }
15161 }
15162
15163 // Simplify the operands using demanded-bits information.
15165 return SDValue(N, 0);
15166
15167 // fold (truncate (extract_subvector(ext x))) ->
15168 // (extract_subvector x)
15169 // TODO: This can be generalized to cover cases where the truncate and extract
15170 // do not fully cancel each other out.
15171 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15172 SDValue N00 = N0.getOperand(0);
15173 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15174 N00.getOpcode() == ISD::ZERO_EXTEND ||
15175 N00.getOpcode() == ISD::ANY_EXTEND) {
15176 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15178 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15179 N00.getOperand(0), N0.getOperand(1));
15180 }
15181 }
15182
15183 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15184 return NewVSel;
15185
15186 // Narrow a suitable binary operation with a non-opaque constant operand by
15187 // moving it ahead of the truncate. This is limited to pre-legalization
15188 // because targets may prefer a wider type during later combines and invert
15189 // this transform.
15190 switch (N0.getOpcode()) {
15191 case ISD::ADD:
15192 case ISD::SUB:
15193 case ISD::MUL:
15194 case ISD::AND:
15195 case ISD::OR:
15196 case ISD::XOR:
15197 if (!LegalOperations && N0.hasOneUse() &&
15198 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15199 isConstantOrConstantVector(N0.getOperand(1), true))) {
15200 // TODO: We already restricted this to pre-legalization, but for vectors
15201 // we are extra cautious to not create an unsupported operation.
15202 // Target-specific changes are likely needed to avoid regressions here.
15203 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15204 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15205 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15206 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15207 }
15208 }
15209 break;
15210 case ISD::ADDE:
15211 case ISD::UADDO_CARRY:
15212 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15213 // (trunc uaddo_carry(X, Y, Carry)) ->
15214 // (uaddo_carry trunc(X), trunc(Y), Carry)
15215 // When the adde's carry is not used.
15216 // We only do for uaddo_carry before legalize operation
15217 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15218 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15219 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15220 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15221 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15222 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15223 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15224 }
15225 break;
15226 case ISD::USUBSAT:
15227 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15228 // enough to know that the upper bits are zero we must ensure that we don't
15229 // introduce an extra truncate.
15230 if (!LegalOperations && N0.hasOneUse() &&
15233 VT.getScalarSizeInBits() &&
15234 hasOperation(N0.getOpcode(), VT)) {
15235 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15236 DAG, DL);
15237 }
15238 break;
15239 }
15240
15241 return SDValue();
15242}
15243
15244static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15245 SDValue Elt = N->getOperand(i);
15246 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15247 return Elt.getNode();
15248 return Elt.getOperand(Elt.getResNo()).getNode();
15249}
15250
15251/// build_pair (load, load) -> load
15252/// if load locations are consecutive.
15253SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15254 assert(N->getOpcode() == ISD::BUILD_PAIR);
15255
15256 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15257 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15258
15259 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15260 // most significant part in elt 1. So when combining into one large load, we
15261 // need to consider the endianness.
15262 if (DAG.getDataLayout().isBigEndian())
15263 std::swap(LD1, LD2);
15264
15265 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15266 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15267 LD1->getAddressSpace() != LD2->getAddressSpace())
15268 return SDValue();
15269
15270 unsigned LD1Fast = 0;
15271 EVT LD1VT = LD1->getValueType(0);
15272 unsigned LD1Bytes = LD1VT.getStoreSize();
15273 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15274 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15275 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15276 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15277 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15278 LD1->getPointerInfo(), LD1->getAlign());
15279
15280 return SDValue();
15281}
15282
15283static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15284 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15285 // and Lo parts; on big-endian machines it doesn't.
15286 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15287}
15288
15289SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15290 const TargetLowering &TLI) {
15291 // If this is not a bitcast to an FP type or if the target doesn't have
15292 // IEEE754-compliant FP logic, we're done.
15293 EVT VT = N->getValueType(0);
15294 SDValue N0 = N->getOperand(0);
15295 EVT SourceVT = N0.getValueType();
15296
15297 if (!VT.isFloatingPoint())
15298 return SDValue();
15299
15300 // TODO: Handle cases where the integer constant is a different scalar
15301 // bitwidth to the FP.
15302 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15303 return SDValue();
15304
15305 unsigned FPOpcode;
15306 APInt SignMask;
15307 switch (N0.getOpcode()) {
15308 case ISD::AND:
15309 FPOpcode = ISD::FABS;
15310 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15311 break;
15312 case ISD::XOR:
15313 FPOpcode = ISD::FNEG;
15314 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15315 break;
15316 case ISD::OR:
15317 FPOpcode = ISD::FABS;
15318 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15319 break;
15320 default:
15321 return SDValue();
15322 }
15323
15324 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15325 return SDValue();
15326
15327 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15328 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15329 // removing this would require more changes.
15330 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15331 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15332 return true;
15333
15334 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15335 };
15336
15337 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15338 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15339 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15340 // fneg (fabs X)
15341 SDValue LogicOp0 = N0.getOperand(0);
15342 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15343 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15344 IsBitCastOrFree(LogicOp0, VT)) {
15345 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15346 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15347 NumFPLogicOpsConv++;
15348 if (N0.getOpcode() == ISD::OR)
15349 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15350 return FPOp;
15351 }
15352
15353 return SDValue();
15354}
15355
15356SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15357 SDValue N0 = N->getOperand(0);
15358 EVT VT = N->getValueType(0);
15359
15360 if (N0.isUndef())
15361 return DAG.getUNDEF(VT);
15362
15363 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15364 // Only do this before legalize types, unless both types are integer and the
15365 // scalar type is legal. Only do this before legalize ops, since the target
15366 // maybe depending on the bitcast.
15367 // First check to see if this is all constant.
15368 // TODO: Support FP bitcasts after legalize types.
15369 if (VT.isVector() &&
15370 (!LegalTypes ||
15371 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15372 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15373 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15374 cast<BuildVectorSDNode>(N0)->isConstant())
15375 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15377
15378 // If the input is a constant, let getNode fold it.
15379 if (isIntOrFPConstant(N0)) {
15380 // If we can't allow illegal operations, we need to check that this is just
15381 // a fp -> int or int -> conversion and that the resulting operation will
15382 // be legal.
15383 if (!LegalOperations ||
15384 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15386 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15387 TLI.isOperationLegal(ISD::Constant, VT))) {
15388 SDValue C = DAG.getBitcast(VT, N0);
15389 if (C.getNode() != N)
15390 return C;
15391 }
15392 }
15393
15394 // (conv (conv x, t1), t2) -> (conv x, t2)
15395 if (N0.getOpcode() == ISD::BITCAST)
15396 return DAG.getBitcast(VT, N0.getOperand(0));
15397
15398 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15399 // iff the current bitwise logicop type isn't legal
15400 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15401 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15402 auto IsFreeBitcast = [VT](SDValue V) {
15403 return (V.getOpcode() == ISD::BITCAST &&
15404 V.getOperand(0).getValueType() == VT) ||
15406 V->hasOneUse());
15407 };
15408 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15409 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15410 DAG.getBitcast(VT, N0.getOperand(0)),
15411 DAG.getBitcast(VT, N0.getOperand(1)));
15412 }
15413
15414 // fold (conv (load x)) -> (load (conv*)x)
15415 // If the resultant load doesn't need a higher alignment than the original!
15416 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15417 // Do not remove the cast if the types differ in endian layout.
15419 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15420 // If the load is volatile, we only want to change the load type if the
15421 // resulting load is legal. Otherwise we might increase the number of
15422 // memory accesses. We don't care if the original type was legal or not
15423 // as we assume software couldn't rely on the number of accesses of an
15424 // illegal type.
15425 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15426 TLI.isOperationLegal(ISD::LOAD, VT))) {
15427 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15428
15429 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15430 *LN0->getMemOperand())) {
15431 SDValue Load =
15432 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15433 LN0->getMemOperand());
15434 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15435 return Load;
15436 }
15437 }
15438
15439 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15440 return V;
15441
15442 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15443 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15444 //
15445 // For ppc_fp128:
15446 // fold (bitcast (fneg x)) ->
15447 // flipbit = signbit
15448 // (xor (bitcast x) (build_pair flipbit, flipbit))
15449 //
15450 // fold (bitcast (fabs x)) ->
15451 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15452 // (xor (bitcast x) (build_pair flipbit, flipbit))
15453 // This often reduces constant pool loads.
15454 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15455 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15456 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15457 !N0.getValueType().isVector()) {
15458 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15459 AddToWorklist(NewConv.getNode());
15460
15461 SDLoc DL(N);
15462 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15463 assert(VT.getSizeInBits() == 128);
15464 SDValue SignBit = DAG.getConstant(
15465 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15466 SDValue FlipBit;
15467 if (N0.getOpcode() == ISD::FNEG) {
15468 FlipBit = SignBit;
15469 AddToWorklist(FlipBit.getNode());
15470 } else {
15471 assert(N0.getOpcode() == ISD::FABS);
15472 SDValue Hi =
15473 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15475 SDLoc(NewConv)));
15476 AddToWorklist(Hi.getNode());
15477 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15478 AddToWorklist(FlipBit.getNode());
15479 }
15480 SDValue FlipBits =
15481 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15482 AddToWorklist(FlipBits.getNode());
15483 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15484 }
15485 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15486 if (N0.getOpcode() == ISD::FNEG)
15487 return DAG.getNode(ISD::XOR, DL, VT,
15488 NewConv, DAG.getConstant(SignBit, DL, VT));
15489 assert(N0.getOpcode() == ISD::FABS);
15490 return DAG.getNode(ISD::AND, DL, VT,
15491 NewConv, DAG.getConstant(~SignBit, DL, VT));
15492 }
15493
15494 // fold (bitconvert (fcopysign cst, x)) ->
15495 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15496 // Note that we don't handle (copysign x, cst) because this can always be
15497 // folded to an fneg or fabs.
15498 //
15499 // For ppc_fp128:
15500 // fold (bitcast (fcopysign cst, x)) ->
15501 // flipbit = (and (extract_element
15502 // (xor (bitcast cst), (bitcast x)), 0),
15503 // signbit)
15504 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15505 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15506 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15507 !VT.isVector()) {
15508 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15509 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15510 if (isTypeLegal(IntXVT)) {
15511 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15512 AddToWorklist(X.getNode());
15513
15514 // If X has a different width than the result/lhs, sext it or truncate it.
15515 unsigned VTWidth = VT.getSizeInBits();
15516 if (OrigXWidth < VTWidth) {
15517 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15518 AddToWorklist(X.getNode());
15519 } else if (OrigXWidth > VTWidth) {
15520 // To get the sign bit in the right place, we have to shift it right
15521 // before truncating.
15522 SDLoc DL(X);
15523 X = DAG.getNode(ISD::SRL, DL,
15524 X.getValueType(), X,
15525 DAG.getConstant(OrigXWidth-VTWidth, DL,
15526 X.getValueType()));
15527 AddToWorklist(X.getNode());
15528 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15529 AddToWorklist(X.getNode());
15530 }
15531
15532 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15533 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15534 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15535 AddToWorklist(Cst.getNode());
15536 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15537 AddToWorklist(X.getNode());
15538 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15539 AddToWorklist(XorResult.getNode());
15540 SDValue XorResult64 = DAG.getNode(
15541 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15543 SDLoc(XorResult)));
15544 AddToWorklist(XorResult64.getNode());
15545 SDValue FlipBit =
15546 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15547 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15548 AddToWorklist(FlipBit.getNode());
15549 SDValue FlipBits =
15550 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15551 AddToWorklist(FlipBits.getNode());
15552 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15553 }
15554 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15555 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15556 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15557 AddToWorklist(X.getNode());
15558
15559 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15560 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15561 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15562 AddToWorklist(Cst.getNode());
15563
15564 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15565 }
15566 }
15567
15568 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15569 if (N0.getOpcode() == ISD::BUILD_PAIR)
15570 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15571 return CombineLD;
15572
15573 // Remove double bitcasts from shuffles - this is often a legacy of
15574 // XformToShuffleWithZero being used to combine bitmaskings (of
15575 // float vectors bitcast to integer vectors) into shuffles.
15576 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15577 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15578 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15581 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15582
15583 // If operands are a bitcast, peek through if it casts the original VT.
15584 // If operands are a constant, just bitcast back to original VT.
15585 auto PeekThroughBitcast = [&](SDValue Op) {
15586 if (Op.getOpcode() == ISD::BITCAST &&
15587 Op.getOperand(0).getValueType() == VT)
15588 return SDValue(Op.getOperand(0));
15589 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15590 return DAG.getBitcast(VT, Op);
15591 return SDValue();
15592 };
15593
15594 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15595 // the result type of this bitcast. This would eliminate at least one
15596 // bitcast. See the transform in InstCombine.
15597 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15598 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15599 if (!(SV0 && SV1))
15600 return SDValue();
15601
15602 int MaskScale =
15604 SmallVector<int, 8> NewMask;
15605 for (int M : SVN->getMask())
15606 for (int i = 0; i != MaskScale; ++i)
15607 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15608
15609 SDValue LegalShuffle =
15610 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15611 if (LegalShuffle)
15612 return LegalShuffle;
15613 }
15614
15615 return SDValue();
15616}
15617
15618SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15619 EVT VT = N->getValueType(0);
15620 return CombineConsecutiveLoads(N, VT);
15621}
15622
15623SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15624 SDValue N0 = N->getOperand(0);
15625
15626 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15627 return N0;
15628
15629 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
15630 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15631 // example https://reviews.llvm.org/D136529#4120959.
15632 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15633 return SDValue();
15634
15635 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15636 // Try to push freeze through instructions that propagate but don't produce
15637 // poison as far as possible. If an operand of freeze follows three
15638 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15639 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15640 // the freeze through to the operands that are not guaranteed non-poison.
15641 // NOTE: we will strip poison-generating flags, so ignore them here.
15642 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15643 /*ConsiderFlags*/ false) ||
15644 N0->getNumValues() != 1 || !N0->hasOneUse())
15645 return SDValue();
15646
15647 bool AllowMultipleMaybePoisonOperands =
15648 N0.getOpcode() == ISD::BUILD_VECTOR ||
15649 N0.getOpcode() == ISD::BUILD_PAIR ||
15652
15653 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15654 // ones" or "constant" into something that depends on FrozenUndef. We can
15655 // instead pick undef values to keep those properties, while at the same time
15656 // folding away the freeze.
15657 // If we implement a more general solution for folding away freeze(undef) in
15658 // the future, then this special handling can be removed.
15659 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15660 SDLoc DL(N0);
15661 EVT VT = N0.getValueType();
15663 return DAG.getAllOnesConstant(DL, VT);
15666 for (const SDValue &Op : N0->op_values())
15667 NewVecC.push_back(
15668 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15669 return DAG.getBuildVector(VT, DL, NewVecC);
15670 }
15671 }
15672
15673 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15674 for (SDValue Op : N0->ops()) {
15675 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15676 /*Depth*/ 1))
15677 continue;
15678 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15679 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15680 if (!HadMaybePoisonOperands)
15681 continue;
15682 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15683 // Multiple maybe-poison ops when not allowed - bail out.
15684 return SDValue();
15685 }
15686 }
15687 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15688 // it could create undef or poison due to it's poison-generating flags.
15689 // So not finding any maybe-poison operands is fine.
15690
15691 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15692 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15693 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15694 continue;
15695 // First, freeze each offending operand.
15696 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15697 // Then, change all other uses of unfrozen operand to use frozen operand.
15698 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15699 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15700 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15701 // But, that also updated the use in the freeze we just created, thus
15702 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15703 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15704 MaybePoisonOperand);
15705 }
15706 }
15707
15708 // This node has been merged with another.
15709 if (N->getOpcode() == ISD::DELETED_NODE)
15710 return SDValue(N, 0);
15711
15712 // The whole node may have been updated, so the value we were holding
15713 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15714 N0 = N->getOperand(0);
15715
15716 // Finally, recreate the node, it's operands were updated to use
15717 // frozen operands, so we just need to use it's "original" operands.
15718 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15719 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15720 for (SDValue &Op : Ops) {
15721 if (Op.getOpcode() == ISD::UNDEF)
15722 Op = DAG.getFreeze(Op);
15723 }
15724
15725 SDValue R;
15726 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
15727 // Special case handling for ShuffleVectorSDNode nodes.
15728 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
15729 SVN->getMask());
15730 } else {
15731 // NOTE: this strips poison generating flags.
15732 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15733 }
15734 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15735 "Can't create node that may be undef/poison!");
15736 return R;
15737}
15738
15739/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15740/// operands. DstEltVT indicates the destination element value type.
15741SDValue DAGCombiner::
15742ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15743 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15744
15745 // If this is already the right type, we're done.
15746 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15747
15748 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15749 unsigned DstBitSize = DstEltVT.getSizeInBits();
15750
15751 // If this is a conversion of N elements of one type to N elements of another
15752 // type, convert each element. This handles FP<->INT cases.
15753 if (SrcBitSize == DstBitSize) {
15755 for (SDValue Op : BV->op_values()) {
15756 // If the vector element type is not legal, the BUILD_VECTOR operands
15757 // are promoted and implicitly truncated. Make that explicit here.
15758 if (Op.getValueType() != SrcEltVT)
15759 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15760 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15761 AddToWorklist(Ops.back().getNode());
15762 }
15763 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15765 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15766 }
15767
15768 // Otherwise, we're growing or shrinking the elements. To avoid having to
15769 // handle annoying details of growing/shrinking FP values, we convert them to
15770 // int first.
15771 if (SrcEltVT.isFloatingPoint()) {
15772 // Convert the input float vector to a int vector where the elements are the
15773 // same sizes.
15774 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15775 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15776 SrcEltVT = IntVT;
15777 }
15778
15779 // Now we know the input is an integer vector. If the output is a FP type,
15780 // convert to integer first, then to FP of the right size.
15781 if (DstEltVT.isFloatingPoint()) {
15782 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15783 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15784
15785 // Next, convert to FP elements of the same size.
15786 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15787 }
15788
15789 // Okay, we know the src/dst types are both integers of differing types.
15790 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15791
15792 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15793 // BuildVectorSDNode?
15794 auto *BVN = cast<BuildVectorSDNode>(BV);
15795
15796 // Extract the constant raw bit data.
15797 BitVector UndefElements;
15798 SmallVector<APInt> RawBits;
15799 bool IsLE = DAG.getDataLayout().isLittleEndian();
15800 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15801 return SDValue();
15802
15803 SDLoc DL(BV);
15805 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15806 if (UndefElements[I])
15807 Ops.push_back(DAG.getUNDEF(DstEltVT));
15808 else
15809 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15810 }
15811
15812 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15813 return DAG.getBuildVector(VT, DL, Ops);
15814}
15815
15816// Returns true if floating point contraction is allowed on the FMUL-SDValue
15817// `N`
15819 assert(N.getOpcode() == ISD::FMUL);
15820
15821 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15822 N->getFlags().hasAllowContract();
15823}
15824
15825// Returns true if `N` can assume no infinities involved in its computation.
15827 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15828}
15829
15830/// Try to perform FMA combining on a given FADD node.
15831template <class MatchContextClass>
15832SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15833 SDValue N0 = N->getOperand(0);
15834 SDValue N1 = N->getOperand(1);
15835 EVT VT = N->getValueType(0);
15836 SDLoc SL(N);
15837 MatchContextClass matcher(DAG, TLI, N);
15838 const TargetOptions &Options = DAG.getTarget().Options;
15839
15840 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15841
15842 // Floating-point multiply-add with intermediate rounding.
15843 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15844 // FIXME: Add VP_FMAD opcode.
15845 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15846
15847 // Floating-point multiply-add without intermediate rounding.
15848 bool HasFMA =
15850 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15851
15852 // No valid opcode, do not combine.
15853 if (!HasFMAD && !HasFMA)
15854 return SDValue();
15855
15856 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15857 Options.UnsafeFPMath || HasFMAD);
15858 // If the addition is not contractable, do not combine.
15859 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15860 return SDValue();
15861
15862 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15863 // beneficial. It does not reduce latency. It increases register pressure. It
15864 // replaces an fadd with an fma which is a more complex instruction, so is
15865 // likely to have a larger encoding, use more functional units, etc.
15866 if (N0 == N1)
15867 return SDValue();
15868
15869 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15870 return SDValue();
15871
15872 // Always prefer FMAD to FMA for precision.
15873 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15875
15876 auto isFusedOp = [&](SDValue N) {
15877 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15878 };
15879
15880 // Is the node an FMUL and contractable either due to global flags or
15881 // SDNodeFlags.
15882 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15883 if (!matcher.match(N, ISD::FMUL))
15884 return false;
15885 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15886 };
15887 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15888 // prefer to fold the multiply with fewer uses.
15890 if (N0->use_size() > N1->use_size())
15891 std::swap(N0, N1);
15892 }
15893
15894 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15895 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15896 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15897 N0.getOperand(1), N1);
15898 }
15899
15900 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15901 // Note: Commutes FADD operands.
15902 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15903 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15904 N1.getOperand(1), N0);
15905 }
15906
15907 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15908 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15909 // This also works with nested fma instructions:
15910 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15911 // fma A, B, (fma C, D, fma (E, F, G))
15912 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15913 // fma A, B, (fma C, D, fma (E, F, G)).
15914 // This requires reassociation because it changes the order of operations.
15915 bool CanReassociate =
15916 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15917 if (CanReassociate) {
15918 SDValue FMA, E;
15919 if (isFusedOp(N0) && N0.hasOneUse()) {
15920 FMA = N0;
15921 E = N1;
15922 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15923 FMA = N1;
15924 E = N0;
15925 }
15926
15927 SDValue TmpFMA = FMA;
15928 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15929 SDValue FMul = TmpFMA->getOperand(2);
15930 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15931 SDValue C = FMul.getOperand(0);
15932 SDValue D = FMul.getOperand(1);
15933 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15935 // Replacing the inner FMul could cause the outer FMA to be simplified
15936 // away.
15937 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15938 }
15939
15940 TmpFMA = TmpFMA->getOperand(2);
15941 }
15942 }
15943
15944 // Look through FP_EXTEND nodes to do more combining.
15945
15946 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15947 if (matcher.match(N0, ISD::FP_EXTEND)) {
15948 SDValue N00 = N0.getOperand(0);
15949 if (isContractableFMUL(N00) &&
15950 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15951 N00.getValueType())) {
15952 return matcher.getNode(
15953 PreferredFusedOpcode, SL, VT,
15954 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15955 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15956 }
15957 }
15958
15959 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15960 // Note: Commutes FADD operands.
15961 if (matcher.match(N1, ISD::FP_EXTEND)) {
15962 SDValue N10 = N1.getOperand(0);
15963 if (isContractableFMUL(N10) &&
15964 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15965 N10.getValueType())) {
15966 return matcher.getNode(
15967 PreferredFusedOpcode, SL, VT,
15968 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15969 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15970 }
15971 }
15972
15973 // More folding opportunities when target permits.
15974 if (Aggressive) {
15975 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15976 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15977 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15978 SDValue Z) {
15979 return matcher.getNode(
15980 PreferredFusedOpcode, SL, VT, X, Y,
15981 matcher.getNode(PreferredFusedOpcode, SL, VT,
15982 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15983 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15984 };
15985 if (isFusedOp(N0)) {
15986 SDValue N02 = N0.getOperand(2);
15987 if (matcher.match(N02, ISD::FP_EXTEND)) {
15988 SDValue N020 = N02.getOperand(0);
15989 if (isContractableFMUL(N020) &&
15990 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15991 N020.getValueType())) {
15992 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15993 N020.getOperand(0), N020.getOperand(1),
15994 N1);
15995 }
15996 }
15997 }
15998
15999 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16000 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16001 // FIXME: This turns two single-precision and one double-precision
16002 // operation into two double-precision operations, which might not be
16003 // interesting for all targets, especially GPUs.
16004 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16005 SDValue Z) {
16006 return matcher.getNode(
16007 PreferredFusedOpcode, SL, VT,
16008 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16009 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16010 matcher.getNode(PreferredFusedOpcode, SL, VT,
16011 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16012 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16013 };
16014 if (N0.getOpcode() == ISD::FP_EXTEND) {
16015 SDValue N00 = N0.getOperand(0);
16016 if (isFusedOp(N00)) {
16017 SDValue N002 = N00.getOperand(2);
16018 if (isContractableFMUL(N002) &&
16019 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16020 N00.getValueType())) {
16021 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16022 N002.getOperand(0), N002.getOperand(1),
16023 N1);
16024 }
16025 }
16026 }
16027
16028 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16029 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16030 if (isFusedOp(N1)) {
16031 SDValue N12 = N1.getOperand(2);
16032 if (N12.getOpcode() == ISD::FP_EXTEND) {
16033 SDValue N120 = N12.getOperand(0);
16034 if (isContractableFMUL(N120) &&
16035 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16036 N120.getValueType())) {
16037 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16038 N120.getOperand(0), N120.getOperand(1),
16039 N0);
16040 }
16041 }
16042 }
16043
16044 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16045 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16046 // FIXME: This turns two single-precision and one double-precision
16047 // operation into two double-precision operations, which might not be
16048 // interesting for all targets, especially GPUs.
16049 if (N1.getOpcode() == ISD::FP_EXTEND) {
16050 SDValue N10 = N1.getOperand(0);
16051 if (isFusedOp(N10)) {
16052 SDValue N102 = N10.getOperand(2);
16053 if (isContractableFMUL(N102) &&
16054 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16055 N10.getValueType())) {
16056 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16057 N102.getOperand(0), N102.getOperand(1),
16058 N0);
16059 }
16060 }
16061 }
16062 }
16063
16064 return SDValue();
16065}
16066
16067/// Try to perform FMA combining on a given FSUB node.
16068template <class MatchContextClass>
16069SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16070 SDValue N0 = N->getOperand(0);
16071 SDValue N1 = N->getOperand(1);
16072 EVT VT = N->getValueType(0);
16073 SDLoc SL(N);
16074 MatchContextClass matcher(DAG, TLI, N);
16075 const TargetOptions &Options = DAG.getTarget().Options;
16076
16077 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16078
16079 // Floating-point multiply-add with intermediate rounding.
16080 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16081 // FIXME: Add VP_FMAD opcode.
16082 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16083
16084 // Floating-point multiply-add without intermediate rounding.
16085 bool HasFMA =
16087 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
16088
16089 // No valid opcode, do not combine.
16090 if (!HasFMAD && !HasFMA)
16091 return SDValue();
16092
16093 const SDNodeFlags Flags = N->getFlags();
16094 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16095 Options.UnsafeFPMath || HasFMAD);
16096
16097 // If the subtraction is not contractable, do not combine.
16098 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16099 return SDValue();
16100
16101 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16102 return SDValue();
16103
16104 // Always prefer FMAD to FMA for precision.
16105 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16107 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16108
16109 // Is the node an FMUL and contractable either due to global flags or
16110 // SDNodeFlags.
16111 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16112 if (!matcher.match(N, ISD::FMUL))
16113 return false;
16114 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16115 };
16116
16117 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16118 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16119 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16120 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16121 XY.getOperand(1),
16122 matcher.getNode(ISD::FNEG, SL, VT, Z));
16123 }
16124 return SDValue();
16125 };
16126
16127 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16128 // Note: Commutes FSUB operands.
16129 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16130 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16131 return matcher.getNode(
16132 PreferredFusedOpcode, SL, VT,
16133 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16134 YZ.getOperand(1), X);
16135 }
16136 return SDValue();
16137 };
16138
16139 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16140 // prefer to fold the multiply with fewer uses.
16141 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16142 (N0->use_size() > N1->use_size())) {
16143 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16144 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16145 return V;
16146 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16147 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16148 return V;
16149 } else {
16150 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16151 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16152 return V;
16153 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16154 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16155 return V;
16156 }
16157
16158 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16159 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16160 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16161 SDValue N00 = N0.getOperand(0).getOperand(0);
16162 SDValue N01 = N0.getOperand(0).getOperand(1);
16163 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16164 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16165 matcher.getNode(ISD::FNEG, SL, VT, N1));
16166 }
16167
16168 // Look through FP_EXTEND nodes to do more combining.
16169
16170 // fold (fsub (fpext (fmul x, y)), z)
16171 // -> (fma (fpext x), (fpext y), (fneg z))
16172 if (matcher.match(N0, ISD::FP_EXTEND)) {
16173 SDValue N00 = N0.getOperand(0);
16174 if (isContractableFMUL(N00) &&
16175 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16176 N00.getValueType())) {
16177 return matcher.getNode(
16178 PreferredFusedOpcode, SL, VT,
16179 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16180 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16181 matcher.getNode(ISD::FNEG, SL, VT, N1));
16182 }
16183 }
16184
16185 // fold (fsub x, (fpext (fmul y, z)))
16186 // -> (fma (fneg (fpext y)), (fpext z), x)
16187 // Note: Commutes FSUB operands.
16188 if (matcher.match(N1, ISD::FP_EXTEND)) {
16189 SDValue N10 = N1.getOperand(0);
16190 if (isContractableFMUL(N10) &&
16191 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16192 N10.getValueType())) {
16193 return matcher.getNode(
16194 PreferredFusedOpcode, SL, VT,
16195 matcher.getNode(
16196 ISD::FNEG, SL, VT,
16197 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16198 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16199 }
16200 }
16201
16202 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16203 // -> (fneg (fma (fpext x), (fpext y), z))
16204 // Note: This could be removed with appropriate canonicalization of the
16205 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16206 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16207 // from implementing the canonicalization in visitFSUB.
16208 if (matcher.match(N0, ISD::FP_EXTEND)) {
16209 SDValue N00 = N0.getOperand(0);
16210 if (matcher.match(N00, ISD::FNEG)) {
16211 SDValue N000 = N00.getOperand(0);
16212 if (isContractableFMUL(N000) &&
16213 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16214 N00.getValueType())) {
16215 return matcher.getNode(
16216 ISD::FNEG, SL, VT,
16217 matcher.getNode(
16218 PreferredFusedOpcode, SL, VT,
16219 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16220 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16221 N1));
16222 }
16223 }
16224 }
16225
16226 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16227 // -> (fneg (fma (fpext x)), (fpext y), z)
16228 // Note: This could be removed with appropriate canonicalization of the
16229 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16230 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16231 // from implementing the canonicalization in visitFSUB.
16232 if (matcher.match(N0, ISD::FNEG)) {
16233 SDValue N00 = N0.getOperand(0);
16234 if (matcher.match(N00, ISD::FP_EXTEND)) {
16235 SDValue N000 = N00.getOperand(0);
16236 if (isContractableFMUL(N000) &&
16237 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16238 N000.getValueType())) {
16239 return matcher.getNode(
16240 ISD::FNEG, SL, VT,
16241 matcher.getNode(
16242 PreferredFusedOpcode, SL, VT,
16243 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16244 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16245 N1));
16246 }
16247 }
16248 }
16249
16250 auto isReassociable = [&Options](SDNode *N) {
16251 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16252 };
16253
16254 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16255 &isReassociable](SDValue N) {
16256 return isContractableFMUL(N) && isReassociable(N.getNode());
16257 };
16258
16259 auto isFusedOp = [&](SDValue N) {
16260 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16261 };
16262
16263 // More folding opportunities when target permits.
16264 if (Aggressive && isReassociable(N)) {
16265 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16266 // fold (fsub (fma x, y, (fmul u, v)), z)
16267 // -> (fma x, y (fma u, v, (fneg z)))
16268 if (CanFuse && isFusedOp(N0) &&
16269 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16270 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16271 return matcher.getNode(
16272 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16273 matcher.getNode(PreferredFusedOpcode, SL, VT,
16274 N0.getOperand(2).getOperand(0),
16275 N0.getOperand(2).getOperand(1),
16276 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16277 }
16278
16279 // fold (fsub x, (fma y, z, (fmul u, v)))
16280 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16281 if (CanFuse && isFusedOp(N1) &&
16282 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16283 N1->hasOneUse() && NoSignedZero) {
16284 SDValue N20 = N1.getOperand(2).getOperand(0);
16285 SDValue N21 = N1.getOperand(2).getOperand(1);
16286 return matcher.getNode(
16287 PreferredFusedOpcode, SL, VT,
16288 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16289 N1.getOperand(1),
16290 matcher.getNode(PreferredFusedOpcode, SL, VT,
16291 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16292 }
16293
16294 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16295 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16296 if (isFusedOp(N0) && N0->hasOneUse()) {
16297 SDValue N02 = N0.getOperand(2);
16298 if (matcher.match(N02, ISD::FP_EXTEND)) {
16299 SDValue N020 = N02.getOperand(0);
16300 if (isContractableAndReassociableFMUL(N020) &&
16301 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16302 N020.getValueType())) {
16303 return matcher.getNode(
16304 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16305 matcher.getNode(
16306 PreferredFusedOpcode, SL, VT,
16307 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16308 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16309 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16310 }
16311 }
16312 }
16313
16314 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16315 // -> (fma (fpext x), (fpext y),
16316 // (fma (fpext u), (fpext v), (fneg z)))
16317 // FIXME: This turns two single-precision and one double-precision
16318 // operation into two double-precision operations, which might not be
16319 // interesting for all targets, especially GPUs.
16320 if (matcher.match(N0, ISD::FP_EXTEND)) {
16321 SDValue N00 = N0.getOperand(0);
16322 if (isFusedOp(N00)) {
16323 SDValue N002 = N00.getOperand(2);
16324 if (isContractableAndReassociableFMUL(N002) &&
16325 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16326 N00.getValueType())) {
16327 return matcher.getNode(
16328 PreferredFusedOpcode, SL, VT,
16329 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16330 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16331 matcher.getNode(
16332 PreferredFusedOpcode, SL, VT,
16333 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16334 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16335 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16336 }
16337 }
16338 }
16339
16340 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16341 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16342 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16343 N1->hasOneUse()) {
16344 SDValue N120 = N1.getOperand(2).getOperand(0);
16345 if (isContractableAndReassociableFMUL(N120) &&
16346 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16347 N120.getValueType())) {
16348 SDValue N1200 = N120.getOperand(0);
16349 SDValue N1201 = N120.getOperand(1);
16350 return matcher.getNode(
16351 PreferredFusedOpcode, SL, VT,
16352 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16353 N1.getOperand(1),
16354 matcher.getNode(
16355 PreferredFusedOpcode, SL, VT,
16356 matcher.getNode(ISD::FNEG, SL, VT,
16357 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16358 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16359 }
16360 }
16361
16362 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16363 // -> (fma (fneg (fpext y)), (fpext z),
16364 // (fma (fneg (fpext u)), (fpext v), x))
16365 // FIXME: This turns two single-precision and one double-precision
16366 // operation into two double-precision operations, which might not be
16367 // interesting for all targets, especially GPUs.
16368 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16369 SDValue CvtSrc = N1.getOperand(0);
16370 SDValue N100 = CvtSrc.getOperand(0);
16371 SDValue N101 = CvtSrc.getOperand(1);
16372 SDValue N102 = CvtSrc.getOperand(2);
16373 if (isContractableAndReassociableFMUL(N102) &&
16374 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16375 CvtSrc.getValueType())) {
16376 SDValue N1020 = N102.getOperand(0);
16377 SDValue N1021 = N102.getOperand(1);
16378 return matcher.getNode(
16379 PreferredFusedOpcode, SL, VT,
16380 matcher.getNode(ISD::FNEG, SL, VT,
16381 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16382 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16383 matcher.getNode(
16384 PreferredFusedOpcode, SL, VT,
16385 matcher.getNode(ISD::FNEG, SL, VT,
16386 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16387 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16388 }
16389 }
16390 }
16391
16392 return SDValue();
16393}
16394
16395/// Try to perform FMA combining on a given FMUL node based on the distributive
16396/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16397/// subtraction instead of addition).
16398SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16399 SDValue N0 = N->getOperand(0);
16400 SDValue N1 = N->getOperand(1);
16401 EVT VT = N->getValueType(0);
16402 SDLoc SL(N);
16403
16404 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16405
16406 const TargetOptions &Options = DAG.getTarget().Options;
16407
16408 // The transforms below are incorrect when x == 0 and y == inf, because the
16409 // intermediate multiplication produces a nan.
16410 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16411 if (!hasNoInfs(Options, FAdd))
16412 return SDValue();
16413
16414 // Floating-point multiply-add without intermediate rounding.
16415 bool HasFMA =
16418 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16419
16420 // Floating-point multiply-add with intermediate rounding. This can result
16421 // in a less precise result due to the changed rounding order.
16422 bool HasFMAD = Options.UnsafeFPMath &&
16423 (LegalOperations && TLI.isFMADLegal(DAG, N));
16424
16425 // No valid opcode, do not combine.
16426 if (!HasFMAD && !HasFMA)
16427 return SDValue();
16428
16429 // Always prefer FMAD to FMA for precision.
16430 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16432
16433 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16434 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16435 auto FuseFADD = [&](SDValue X, SDValue Y) {
16436 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16437 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16438 if (C->isExactlyValue(+1.0))
16439 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16440 Y);
16441 if (C->isExactlyValue(-1.0))
16442 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16443 DAG.getNode(ISD::FNEG, SL, VT, Y));
16444 }
16445 }
16446 return SDValue();
16447 };
16448
16449 if (SDValue FMA = FuseFADD(N0, N1))
16450 return FMA;
16451 if (SDValue FMA = FuseFADD(N1, N0))
16452 return FMA;
16453
16454 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16455 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16456 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16457 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16458 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16459 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16460 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16461 if (C0->isExactlyValue(+1.0))
16462 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16463 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16464 Y);
16465 if (C0->isExactlyValue(-1.0))
16466 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16467 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16468 DAG.getNode(ISD::FNEG, SL, VT, Y));
16469 }
16470 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16471 if (C1->isExactlyValue(+1.0))
16472 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16473 DAG.getNode(ISD::FNEG, SL, VT, Y));
16474 if (C1->isExactlyValue(-1.0))
16475 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16476 Y);
16477 }
16478 }
16479 return SDValue();
16480 };
16481
16482 if (SDValue FMA = FuseFSUB(N0, N1))
16483 return FMA;
16484 if (SDValue FMA = FuseFSUB(N1, N0))
16485 return FMA;
16486
16487 return SDValue();
16488}
16489
16490SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16491 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16492
16493 // FADD -> FMA combines:
16494 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16495 if (Fused.getOpcode() != ISD::DELETED_NODE)
16496 AddToWorklist(Fused.getNode());
16497 return Fused;
16498 }
16499 return SDValue();
16500}
16501
16502SDValue DAGCombiner::visitFADD(SDNode *N) {
16503 SDValue N0 = N->getOperand(0);
16504 SDValue N1 = N->getOperand(1);
16507 EVT VT = N->getValueType(0);
16508 SDLoc DL(N);
16509 const TargetOptions &Options = DAG.getTarget().Options;
16510 SDNodeFlags Flags = N->getFlags();
16511 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16512
16513 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16514 return R;
16515
16516 // fold (fadd c1, c2) -> c1 + c2
16517 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16518 return C;
16519
16520 // canonicalize constant to RHS
16521 if (N0CFP && !N1CFP)
16522 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16523
16524 // fold vector ops
16525 if (VT.isVector())
16526 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16527 return FoldedVOp;
16528
16529 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16530 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16531 if (N1C && N1C->isZero())
16532 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16533 return N0;
16534
16535 if (SDValue NewSel = foldBinOpIntoSelect(N))
16536 return NewSel;
16537
16538 // fold (fadd A, (fneg B)) -> (fsub A, B)
16539 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16540 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16541 N1, DAG, LegalOperations, ForCodeSize))
16542 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16543
16544 // fold (fadd (fneg A), B) -> (fsub B, A)
16545 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16546 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16547 N0, DAG, LegalOperations, ForCodeSize))
16548 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16549
16550 auto isFMulNegTwo = [](SDValue FMul) {
16551 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16552 return false;
16553 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16554 return C && C->isExactlyValue(-2.0);
16555 };
16556
16557 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16558 if (isFMulNegTwo(N0)) {
16559 SDValue B = N0.getOperand(0);
16560 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16561 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16562 }
16563 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16564 if (isFMulNegTwo(N1)) {
16565 SDValue B = N1.getOperand(0);
16566 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16567 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16568 }
16569
16570 // No FP constant should be created after legalization as Instruction
16571 // Selection pass has a hard time dealing with FP constants.
16572 bool AllowNewConst = (Level < AfterLegalizeDAG);
16573
16574 // If nnan is enabled, fold lots of things.
16575 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16576 // If allowed, fold (fadd (fneg x), x) -> 0.0
16577 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16578 return DAG.getConstantFP(0.0, DL, VT);
16579
16580 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16581 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16582 return DAG.getConstantFP(0.0, DL, VT);
16583 }
16584
16585 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16586 // TODO: break out portions of the transformations below for which Unsafe is
16587 // considered and which do not require both nsz and reassoc
16588 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16589 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16590 AllowNewConst) {
16591 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16592 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16594 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16595 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16596 }
16597
16598 // We can fold chains of FADD's of the same value into multiplications.
16599 // This transform is not safe in general because we are reducing the number
16600 // of rounding steps.
16601 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16602 if (N0.getOpcode() == ISD::FMUL) {
16603 SDNode *CFP00 =
16605 SDNode *CFP01 =
16607
16608 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16609 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16610 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16611 DAG.getConstantFP(1.0, DL, VT));
16612 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16613 }
16614
16615 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16616 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16617 N1.getOperand(0) == N1.getOperand(1) &&
16618 N0.getOperand(0) == N1.getOperand(0)) {
16619 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16620 DAG.getConstantFP(2.0, DL, VT));
16621 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16622 }
16623 }
16624
16625 if (N1.getOpcode() == ISD::FMUL) {
16626 SDNode *CFP10 =
16628 SDNode *CFP11 =
16630
16631 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16632 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16633 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16634 DAG.getConstantFP(1.0, DL, VT));
16635 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16636 }
16637
16638 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16639 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16640 N0.getOperand(0) == N0.getOperand(1) &&
16641 N1.getOperand(0) == N0.getOperand(0)) {
16642 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16643 DAG.getConstantFP(2.0, DL, VT));
16644 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16645 }
16646 }
16647
16648 if (N0.getOpcode() == ISD::FADD) {
16649 SDNode *CFP00 =
16651 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16652 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16653 (N0.getOperand(0) == N1)) {
16654 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16655 DAG.getConstantFP(3.0, DL, VT));
16656 }
16657 }
16658
16659 if (N1.getOpcode() == ISD::FADD) {
16660 SDNode *CFP10 =
16662 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16663 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16664 N1.getOperand(0) == N0) {
16665 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16666 DAG.getConstantFP(3.0, DL, VT));
16667 }
16668 }
16669
16670 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16671 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16672 N0.getOperand(0) == N0.getOperand(1) &&
16673 N1.getOperand(0) == N1.getOperand(1) &&
16674 N0.getOperand(0) == N1.getOperand(0)) {
16675 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16676 DAG.getConstantFP(4.0, DL, VT));
16677 }
16678 }
16679
16680 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16681 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16682 VT, N0, N1, Flags))
16683 return SD;
16684 } // enable-unsafe-fp-math
16685
16686 // FADD -> FMA combines:
16687 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16688 if (Fused.getOpcode() != ISD::DELETED_NODE)
16689 AddToWorklist(Fused.getNode());
16690 return Fused;
16691 }
16692 return SDValue();
16693}
16694
16695SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16696 SDValue Chain = N->getOperand(0);
16697 SDValue N0 = N->getOperand(1);
16698 SDValue N1 = N->getOperand(2);
16699 EVT VT = N->getValueType(0);
16700 EVT ChainVT = N->getValueType(1);
16701 SDLoc DL(N);
16702 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16703
16704 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16705 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16706 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16707 N1, DAG, LegalOperations, ForCodeSize)) {
16708 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16709 {Chain, N0, NegN1});
16710 }
16711
16712 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16713 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16714 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16715 N0, DAG, LegalOperations, ForCodeSize)) {
16716 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16717 {Chain, N1, NegN0});
16718 }
16719 return SDValue();
16720}
16721
16722SDValue DAGCombiner::visitFSUB(SDNode *N) {
16723 SDValue N0 = N->getOperand(0);
16724 SDValue N1 = N->getOperand(1);
16725 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16726 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16727 EVT VT = N->getValueType(0);
16728 SDLoc DL(N);
16729 const TargetOptions &Options = DAG.getTarget().Options;
16730 const SDNodeFlags Flags = N->getFlags();
16731 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16732
16733 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16734 return R;
16735
16736 // fold (fsub c1, c2) -> c1-c2
16737 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16738 return C;
16739
16740 // fold vector ops
16741 if (VT.isVector())
16742 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16743 return FoldedVOp;
16744
16745 if (SDValue NewSel = foldBinOpIntoSelect(N))
16746 return NewSel;
16747
16748 // (fsub A, 0) -> A
16749 if (N1CFP && N1CFP->isZero()) {
16750 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16751 Flags.hasNoSignedZeros()) {
16752 return N0;
16753 }
16754 }
16755
16756 if (N0 == N1) {
16757 // (fsub x, x) -> 0.0
16758 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16759 return DAG.getConstantFP(0.0f, DL, VT);
16760 }
16761
16762 // (fsub -0.0, N1) -> -N1
16763 if (N0CFP && N0CFP->isZero()) {
16764 if (N0CFP->isNegative() ||
16765 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16766 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16767 // flushed to zero, unless all users treat denorms as zero (DAZ).
16768 // FIXME: This transform will change the sign of a NaN and the behavior
16769 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16770 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16771 if (DenormMode == DenormalMode::getIEEE()) {
16772 if (SDValue NegN1 =
16773 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16774 return NegN1;
16775 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16776 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16777 }
16778 }
16779 }
16780
16781 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16782 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16783 N1.getOpcode() == ISD::FADD) {
16784 // X - (X + Y) -> -Y
16785 if (N0 == N1->getOperand(0))
16786 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16787 // X - (Y + X) -> -Y
16788 if (N0 == N1->getOperand(1))
16789 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16790 }
16791
16792 // fold (fsub A, (fneg B)) -> (fadd A, B)
16793 if (SDValue NegN1 =
16794 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16795 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16796
16797 // FSUB -> FMA combines:
16798 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16799 AddToWorklist(Fused.getNode());
16800 return Fused;
16801 }
16802
16803 return SDValue();
16804}
16805
16806// Transform IEEE Floats:
16807// (fmul C, (uitofp Pow2))
16808// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16809// (fdiv C, (uitofp Pow2))
16810// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16811//
16812// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16813// there is no need for more than an add/sub.
16814//
16815// This is valid under the following circumstances:
16816// 1) We are dealing with IEEE floats
16817// 2) C is normal
16818// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16819// TODO: Much of this could also be used for generating `ldexp` on targets the
16820// prefer it.
16821SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16822 EVT VT = N->getValueType(0);
16823 SDValue ConstOp, Pow2Op;
16824
16825 std::optional<int> Mantissa;
16826 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16827 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16828 return false;
16829
16830 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16831 Pow2Op = N->getOperand(1 - ConstOpIdx);
16832 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16833 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16834 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16835 return false;
16836
16837 Pow2Op = Pow2Op.getOperand(0);
16838
16839 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16840 // TODO: We could use knownbits to make this bound more precise.
16841 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16842
16843 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16844 if (CFP == nullptr)
16845 return false;
16846
16847 const APFloat &APF = CFP->getValueAPF();
16848
16849 // Make sure we have normal/ieee constant.
16850 if (!APF.isNormal() || !APF.isIEEE())
16851 return false;
16852
16853 // Make sure the floats exponent is within the bounds that this transform
16854 // produces bitwise equals value.
16855 int CurExp = ilogb(APF);
16856 // FMul by pow2 will only increase exponent.
16857 int MinExp =
16858 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16859 // FDiv by pow2 will only decrease exponent.
16860 int MaxExp =
16861 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16862 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16864 return false;
16865
16866 // Finally make sure we actually know the mantissa for the float type.
16867 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16868 if (!Mantissa)
16869 Mantissa = ThisMantissa;
16870
16871 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16872 };
16873
16874 // TODO: We may be able to include undefs.
16875 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16876 };
16877
16878 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16879 return SDValue();
16880
16881 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16882 return SDValue();
16883
16884 // Get log2 after all other checks have taken place. This is because
16885 // BuildLogBase2 may create a new node.
16886 SDLoc DL(N);
16887 // Get Log2 type with same bitwidth as the float type (VT).
16888 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16889 if (VT.isVector())
16890 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16892
16893 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16894 /*InexpensiveOnly*/ true, NewIntVT);
16895 if (!Log2)
16896 return SDValue();
16897
16898 // Perform actual transform.
16899 SDValue MantissaShiftCnt =
16900 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16901 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16902 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16903 // cast. We could implement that by handle here to handle the casts.
16904 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16905 SDValue ResAsInt =
16906 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16907 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16908 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16909 return ResAsFP;
16910}
16911
16912SDValue DAGCombiner::visitFMUL(SDNode *N) {
16913 SDValue N0 = N->getOperand(0);
16914 SDValue N1 = N->getOperand(1);
16915 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16916 EVT VT = N->getValueType(0);
16917 SDLoc DL(N);
16918 const TargetOptions &Options = DAG.getTarget().Options;
16919 const SDNodeFlags Flags = N->getFlags();
16920 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16921
16922 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16923 return R;
16924
16925 // fold (fmul c1, c2) -> c1*c2
16926 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16927 return C;
16928
16929 // canonicalize constant to RHS
16932 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16933
16934 // fold vector ops
16935 if (VT.isVector())
16936 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16937 return FoldedVOp;
16938
16939 if (SDValue NewSel = foldBinOpIntoSelect(N))
16940 return NewSel;
16941
16942 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16943 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16945 N0.getOpcode() == ISD::FMUL) {
16946 SDValue N00 = N0.getOperand(0);
16947 SDValue N01 = N0.getOperand(1);
16948 // Avoid an infinite loop by making sure that N00 is not a constant
16949 // (the inner multiply has not been constant folded yet).
16952 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16953 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16954 }
16955 }
16956
16957 // Match a special-case: we convert X * 2.0 into fadd.
16958 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16959 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16960 N0.getOperand(0) == N0.getOperand(1)) {
16961 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16962 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16963 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16964 }
16965
16966 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16967 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16968 VT, N0, N1, Flags))
16969 return SD;
16970 }
16971
16972 // fold (fmul X, 2.0) -> (fadd X, X)
16973 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16974 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16975
16976 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16977 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16978 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16979 return DAG.getNode(ISD::FSUB, DL, VT,
16980 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16981 }
16982 }
16983
16984 // -N0 * -N1 --> N0 * N1
16989 SDValue NegN0 =
16990 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16991 if (NegN0) {
16992 HandleSDNode NegN0Handle(NegN0);
16993 SDValue NegN1 =
16994 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16995 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16997 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16998 }
16999
17000 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17001 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17002 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17003 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17004 TLI.isOperationLegal(ISD::FABS, VT)) {
17005 SDValue Select = N0, X = N1;
17006 if (Select.getOpcode() != ISD::SELECT)
17007 std::swap(Select, X);
17008
17009 SDValue Cond = Select.getOperand(0);
17010 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17011 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17012
17013 if (TrueOpnd && FalseOpnd &&
17014 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17015 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17016 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17017 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17018 switch (CC) {
17019 default: break;
17020 case ISD::SETOLT:
17021 case ISD::SETULT:
17022 case ISD::SETOLE:
17023 case ISD::SETULE:
17024 case ISD::SETLT:
17025 case ISD::SETLE:
17026 std::swap(TrueOpnd, FalseOpnd);
17027 [[fallthrough]];
17028 case ISD::SETOGT:
17029 case ISD::SETUGT:
17030 case ISD::SETOGE:
17031 case ISD::SETUGE:
17032 case ISD::SETGT:
17033 case ISD::SETGE:
17034 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17035 TLI.isOperationLegal(ISD::FNEG, VT))
17036 return DAG.getNode(ISD::FNEG, DL, VT,
17037 DAG.getNode(ISD::FABS, DL, VT, X));
17038 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17039 return DAG.getNode(ISD::FABS, DL, VT, X);
17040
17041 break;
17042 }
17043 }
17044 }
17045
17046 // FMUL -> FMA combines:
17047 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17048 AddToWorklist(Fused.getNode());
17049 return Fused;
17050 }
17051
17052 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17053 // able to run.
17054 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17055 return R;
17056
17057 return SDValue();
17058}
17059
17060template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17061 SDValue N0 = N->getOperand(0);
17062 SDValue N1 = N->getOperand(1);
17063 SDValue N2 = N->getOperand(2);
17064 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17065 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17066 EVT VT = N->getValueType(0);
17067 SDLoc DL(N);
17068 const TargetOptions &Options = DAG.getTarget().Options;
17069 // FMA nodes have flags that propagate to the created nodes.
17070 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17071 MatchContextClass matcher(DAG, TLI, N);
17072
17073 // Constant fold FMA.
17074 if (isa<ConstantFPSDNode>(N0) &&
17075 isa<ConstantFPSDNode>(N1) &&
17076 isa<ConstantFPSDNode>(N2)) {
17077 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
17078 }
17079
17080 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17085 SDValue NegN0 =
17086 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17087 if (NegN0) {
17088 HandleSDNode NegN0Handle(NegN0);
17089 SDValue NegN1 =
17090 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17091 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17093 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17094 }
17095
17096 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17097 if (Options.UnsafeFPMath) {
17098 if (N0CFP && N0CFP->isZero())
17099 return N2;
17100 if (N1CFP && N1CFP->isZero())
17101 return N2;
17102 }
17103
17104 // FIXME: Support splat of constant.
17105 if (N0CFP && N0CFP->isExactlyValue(1.0))
17106 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
17107 if (N1CFP && N1CFP->isExactlyValue(1.0))
17108 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
17109
17110 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17113 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
17114
17115 bool CanReassociate =
17116 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17117 if (CanReassociate) {
17118 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17119 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17122 return matcher.getNode(
17123 ISD::FMUL, DL, VT, N0,
17124 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17125 }
17126
17127 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17128 if (matcher.match(N0, ISD::FMUL) &&
17131 return matcher.getNode(
17132 ISD::FMA, DL, VT, N0.getOperand(0),
17133 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17134 }
17135 }
17136
17137 // (fma x, -1, y) -> (fadd (fneg x), y)
17138 // FIXME: Support splat of constant.
17139 if (N1CFP) {
17140 if (N1CFP->isExactlyValue(1.0))
17141 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17142
17143 if (N1CFP->isExactlyValue(-1.0) &&
17144 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17145 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17146 AddToWorklist(RHSNeg.getNode());
17147 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17148 }
17149
17150 // fma (fneg x), K, y -> fma x -K, y
17151 if (matcher.match(N0, ISD::FNEG) &&
17153 (N1.hasOneUse() &&
17154 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17155 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17156 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17157 }
17158 }
17159
17160 // FIXME: Support splat of constant.
17161 if (CanReassociate) {
17162 // (fma x, c, x) -> (fmul x, (c+1))
17163 if (N1CFP && N0 == N2) {
17164 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17165 matcher.getNode(ISD::FADD, DL, VT, N1,
17166 DAG.getConstantFP(1.0, DL, VT)));
17167 }
17168
17169 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17170 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17171 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17172 matcher.getNode(ISD::FADD, DL, VT, N1,
17173 DAG.getConstantFP(-1.0, DL, VT)));
17174 }
17175 }
17176
17177 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17178 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17179 if (!TLI.isFNegFree(VT))
17181 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17182 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17183 return SDValue();
17184}
17185
17186SDValue DAGCombiner::visitFMAD(SDNode *N) {
17187 SDValue N0 = N->getOperand(0);
17188 SDValue N1 = N->getOperand(1);
17189 SDValue N2 = N->getOperand(2);
17190 EVT VT = N->getValueType(0);
17191 SDLoc DL(N);
17192
17193 // Constant fold FMAD.
17194 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17195 isa<ConstantFPSDNode>(N2))
17196 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17197
17198 return SDValue();
17199}
17200
17201// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17202// reciprocal.
17203// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17204// Notice that this is not always beneficial. One reason is different targets
17205// may have different costs for FDIV and FMUL, so sometimes the cost of two
17206// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17207// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17208SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17209 // TODO: Limit this transform based on optsize/minsize - it always creates at
17210 // least 1 extra instruction. But the perf win may be substantial enough
17211 // that only minsize should restrict this.
17212 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17213 const SDNodeFlags Flags = N->getFlags();
17214 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17215 return SDValue();
17216
17217 // Skip if current node is a reciprocal/fneg-reciprocal.
17218 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17219 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17220 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17221 return SDValue();
17222
17223 // Exit early if the target does not want this transform or if there can't
17224 // possibly be enough uses of the divisor to make the transform worthwhile.
17225 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17226
17227 // For splat vectors, scale the number of uses by the splat factor. If we can
17228 // convert the division into a scalar op, that will likely be much faster.
17229 unsigned NumElts = 1;
17230 EVT VT = N->getValueType(0);
17231 if (VT.isVector() && DAG.isSplatValue(N1))
17232 NumElts = VT.getVectorMinNumElements();
17233
17234 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17235 return SDValue();
17236
17237 // Find all FDIV users of the same divisor.
17238 // Use a set because duplicates may be present in the user list.
17240 for (auto *U : N1->uses()) {
17241 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17242 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17243 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17244 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17245 U->getFlags().hasAllowReassociation() &&
17246 U->getFlags().hasNoSignedZeros())
17247 continue;
17248
17249 // This division is eligible for optimization only if global unsafe math
17250 // is enabled or if this division allows reciprocal formation.
17251 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17252 Users.insert(U);
17253 }
17254 }
17255
17256 // Now that we have the actual number of divisor uses, make sure it meets
17257 // the minimum threshold specified by the target.
17258 if ((Users.size() * NumElts) < MinUses)
17259 return SDValue();
17260
17261 SDLoc DL(N);
17262 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17263 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17264
17265 // Dividend / Divisor -> Dividend * Reciprocal
17266 for (auto *U : Users) {
17267 SDValue Dividend = U->getOperand(0);
17268 if (Dividend != FPOne) {
17269 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17270 Reciprocal, Flags);
17271 CombineTo(U, NewNode);
17272 } else if (U != Reciprocal.getNode()) {
17273 // In the absence of fast-math-flags, this user node is always the
17274 // same node as Reciprocal, but with FMF they may be different nodes.
17275 CombineTo(U, Reciprocal);
17276 }
17277 }
17278 return SDValue(N, 0); // N was replaced.
17279}
17280
17281SDValue DAGCombiner::visitFDIV(SDNode *N) {
17282 SDValue N0 = N->getOperand(0);
17283 SDValue N1 = N->getOperand(1);
17284 EVT VT = N->getValueType(0);
17285 SDLoc DL(N);
17286 const TargetOptions &Options = DAG.getTarget().Options;
17287 SDNodeFlags Flags = N->getFlags();
17288 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17289
17290 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17291 return R;
17292
17293 // fold (fdiv c1, c2) -> c1/c2
17294 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17295 return C;
17296
17297 // fold vector ops
17298 if (VT.isVector())
17299 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17300 return FoldedVOp;
17301
17302 if (SDValue NewSel = foldBinOpIntoSelect(N))
17303 return NewSel;
17304
17306 return V;
17307
17308 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17309 // the loss is acceptable with AllowReciprocal.
17310 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17311 // Compute the reciprocal 1.0 / c2.
17312 const APFloat &N1APF = N1CFP->getValueAPF();
17313 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17315 // Only do the transform if the reciprocal is a legal fp immediate that
17316 // isn't too nasty (eg NaN, denormal, ...).
17317 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17318 (st == APFloat::opInexact &&
17319 (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17320 (!LegalOperations ||
17321 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17322 // backend)... we should handle this gracefully after Legalize.
17323 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17325 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17326 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17327 DAG.getConstantFP(Recip, DL, VT));
17328 }
17329
17330 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17331 // If this FDIV is part of a reciprocal square root, it may be folded
17332 // into a target-specific square root estimate instruction.
17333 if (N1.getOpcode() == ISD::FSQRT) {
17334 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17335 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17336 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17337 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17338 if (SDValue RV =
17339 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17340 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17341 AddToWorklist(RV.getNode());
17342 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17343 }
17344 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17345 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17346 if (SDValue RV =
17347 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17348 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17349 AddToWorklist(RV.getNode());
17350 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17351 }
17352 } else if (N1.getOpcode() == ISD::FMUL) {
17353 // Look through an FMUL. Even though this won't remove the FDIV directly,
17354 // it's still worthwhile to get rid of the FSQRT if possible.
17355 SDValue Sqrt, Y;
17356 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17357 Sqrt = N1.getOperand(0);
17358 Y = N1.getOperand(1);
17359 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17360 Sqrt = N1.getOperand(1);
17361 Y = N1.getOperand(0);
17362 }
17363 if (Sqrt.getNode()) {
17364 // If the other multiply operand is known positive, pull it into the
17365 // sqrt. That will eliminate the division if we convert to an estimate.
17366 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17367 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17368 SDValue A;
17369 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17370 A = Y.getOperand(0);
17371 else if (Y == Sqrt.getOperand(0))
17372 A = Y;
17373 if (A) {
17374 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17375 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17376 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17377 SDValue AAZ =
17378 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17379 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17380 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17381
17382 // Estimate creation failed. Clean up speculatively created nodes.
17383 recursivelyDeleteUnusedNodes(AAZ.getNode());
17384 }
17385 }
17386
17387 // We found a FSQRT, so try to make this fold:
17388 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17389 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17390 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17391 AddToWorklist(Div.getNode());
17392 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17393 }
17394 }
17395 }
17396
17397 // Fold into a reciprocal estimate and multiply instead of a real divide.
17398 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17399 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17400 return RV;
17401 }
17402
17403 // Fold X/Sqrt(X) -> Sqrt(X)
17404 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17405 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17406 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17407 return N1;
17408
17409 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17414 SDValue NegN0 =
17415 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17416 if (NegN0) {
17417 HandleSDNode NegN0Handle(NegN0);
17418 SDValue NegN1 =
17419 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17420 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17422 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17423 }
17424
17425 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17426 return R;
17427
17428 return SDValue();
17429}
17430
17431SDValue DAGCombiner::visitFREM(SDNode *N) {
17432 SDValue N0 = N->getOperand(0);
17433 SDValue N1 = N->getOperand(1);
17434 EVT VT = N->getValueType(0);
17435 SDNodeFlags Flags = N->getFlags();
17436 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17437 SDLoc DL(N);
17438
17439 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17440 return R;
17441
17442 // fold (frem c1, c2) -> fmod(c1,c2)
17443 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17444 return C;
17445
17446 if (SDValue NewSel = foldBinOpIntoSelect(N))
17447 return NewSel;
17448
17449 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17450 // power of 2.
17451 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17455 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17456 bool NeedsCopySign =
17457 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17458 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17459 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17460 SDValue MLA;
17462 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17463 N1, N0);
17464 } else {
17465 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17466 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17467 }
17468 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17469 }
17470
17471 return SDValue();
17472}
17473
17474SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17475 SDNodeFlags Flags = N->getFlags();
17476 const TargetOptions &Options = DAG.getTarget().Options;
17477
17478 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17479 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17480 if (!Flags.hasApproximateFuncs() ||
17481 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17482 return SDValue();
17483
17484 SDValue N0 = N->getOperand(0);
17485 if (TLI.isFsqrtCheap(N0, DAG))
17486 return SDValue();
17487
17488 // FSQRT nodes have flags that propagate to the created nodes.
17489 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17490 // transform the fdiv, we may produce a sub-optimal estimate sequence
17491 // because the reciprocal calculation may not have to filter out a
17492 // 0.0 input.
17493 return buildSqrtEstimate(N0, Flags);
17494}
17495
17496/// copysign(x, fp_extend(y)) -> copysign(x, y)
17497/// copysign(x, fp_round(y)) -> copysign(x, y)
17498/// Operands to the functions are the type of X and Y respectively.
17499static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17500 // Always fold no-op FP casts.
17501 if (XTy == YTy)
17502 return true;
17503
17504 // Do not optimize out type conversion of f128 type yet.
17505 // For some targets like x86_64, configuration is changed to keep one f128
17506 // value in one SSE register, but instruction selection cannot handle
17507 // FCOPYSIGN on SSE registers yet.
17508 if (YTy == MVT::f128)
17509 return false;
17510
17512}
17513
17515 SDValue N1 = N->getOperand(1);
17516 if (N1.getOpcode() != ISD::FP_EXTEND &&
17517 N1.getOpcode() != ISD::FP_ROUND)
17518 return false;
17519 EVT N1VT = N1->getValueType(0);
17520 EVT N1Op0VT = N1->getOperand(0).getValueType();
17521 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17522}
17523
17524SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17525 SDValue N0 = N->getOperand(0);
17526 SDValue N1 = N->getOperand(1);
17527 EVT VT = N->getValueType(0);
17528
17529 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17530 if (SDValue C =
17531 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17532 return C;
17533
17534 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17535 const APFloat &V = N1C->getValueAPF();
17536 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17537 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17538 if (!V.isNegative()) {
17539 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17540 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17541 } else {
17542 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17543 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17544 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17545 }
17546 }
17547
17548 // copysign(fabs(x), y) -> copysign(x, y)
17549 // copysign(fneg(x), y) -> copysign(x, y)
17550 // copysign(copysign(x,z), y) -> copysign(x, y)
17551 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17552 N0.getOpcode() == ISD::FCOPYSIGN)
17553 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17554
17555 // copysign(x, abs(y)) -> abs(x)
17556 if (N1.getOpcode() == ISD::FABS)
17557 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17558
17559 // copysign(x, copysign(y,z)) -> copysign(x, z)
17560 if (N1.getOpcode() == ISD::FCOPYSIGN)
17561 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17562
17563 // copysign(x, fp_extend(y)) -> copysign(x, y)
17564 // copysign(x, fp_round(y)) -> copysign(x, y)
17566 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17567
17568 return SDValue();
17569}
17570
17571SDValue DAGCombiner::visitFPOW(SDNode *N) {
17572 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17573 if (!ExponentC)
17574 return SDValue();
17575 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17576
17577 // Try to convert x ** (1/3) into cube root.
17578 // TODO: Handle the various flavors of long double.
17579 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17580 // Some range near 1/3 should be fine.
17581 EVT VT = N->getValueType(0);
17582 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17583 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17584 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17585 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17586 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17587 // For regular numbers, rounding may cause the results to differ.
17588 // Therefore, we require { nsz ninf nnan afn } for this transform.
17589 // TODO: We could select out the special cases if we don't have nsz/ninf.
17590 SDNodeFlags Flags = N->getFlags();
17591 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17592 !Flags.hasApproximateFuncs())
17593 return SDValue();
17594
17595 // Do not create a cbrt() libcall if the target does not have it, and do not
17596 // turn a pow that has lowering support into a cbrt() libcall.
17597 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17600 return SDValue();
17601
17602 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17603 }
17604
17605 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17606 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17607 // TODO: This could be extended (using a target hook) to handle smaller
17608 // power-of-2 fractional exponents.
17609 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17610 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17611 if (ExponentIs025 || ExponentIs075) {
17612 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17613 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17614 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17615 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17616 // For regular numbers, rounding may cause the results to differ.
17617 // Therefore, we require { nsz ninf afn } for this transform.
17618 // TODO: We could select out the special cases if we don't have nsz/ninf.
17619 SDNodeFlags Flags = N->getFlags();
17620
17621 // We only need no signed zeros for the 0.25 case.
17622 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17623 !Flags.hasApproximateFuncs())
17624 return SDValue();
17625
17626 // Don't double the number of libcalls. We are trying to inline fast code.
17628 return SDValue();
17629
17630 // Assume that libcalls are the smallest code.
17631 // TODO: This restriction should probably be lifted for vectors.
17632 if (ForCodeSize)
17633 return SDValue();
17634
17635 // pow(X, 0.25) --> sqrt(sqrt(X))
17636 SDLoc DL(N);
17637 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17638 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17639 if (ExponentIs025)
17640 return SqrtSqrt;
17641 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17642 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17643 }
17644
17645 return SDValue();
17646}
17647
17649 const TargetLowering &TLI) {
17650 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17651 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17652 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17653 // conversions would return +0.0.
17654 // FIXME: We should be able to use node-level FMF here.
17655 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17656 EVT VT = N->getValueType(0);
17657 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17659 return SDValue();
17660
17661 // fptosi/fptoui round towards zero, so converting from FP to integer and
17662 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17663 SDValue N0 = N->getOperand(0);
17664 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17665 N0.getOperand(0).getValueType() == VT)
17666 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17667
17668 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17669 N0.getOperand(0).getValueType() == VT)
17670 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17671
17672 return SDValue();
17673}
17674
17675SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17676 SDValue N0 = N->getOperand(0);
17677 EVT VT = N->getValueType(0);
17678 EVT OpVT = N0.getValueType();
17679
17680 // [us]itofp(undef) = 0, because the result value is bounded.
17681 if (N0.isUndef())
17682 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17683
17684 // fold (sint_to_fp c1) -> c1fp
17686 // ...but only if the target supports immediate floating-point values
17687 (!LegalOperations ||
17689 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17690
17691 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17692 // but UINT_TO_FP is legal on this target, try to convert.
17693 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17694 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17695 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17696 if (DAG.SignBitIsZero(N0))
17697 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17698 }
17699
17700 // The next optimizations are desirable only if SELECT_CC can be lowered.
17701 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17702 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17703 !VT.isVector() &&
17704 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17705 SDLoc DL(N);
17706 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17707 DAG.getConstantFP(0.0, DL, VT));
17708 }
17709
17710 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17711 // (select (setcc x, y, cc), 1.0, 0.0)
17712 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17713 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17714 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17715 SDLoc DL(N);
17716 return DAG.getSelect(DL, VT, N0.getOperand(0),
17717 DAG.getConstantFP(1.0, DL, VT),
17718 DAG.getConstantFP(0.0, DL, VT));
17719 }
17720
17721 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17722 return FTrunc;
17723
17724 return SDValue();
17725}
17726
17727SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17728 SDValue N0 = N->getOperand(0);
17729 EVT VT = N->getValueType(0);
17730 EVT OpVT = N0.getValueType();
17731
17732 // [us]itofp(undef) = 0, because the result value is bounded.
17733 if (N0.isUndef())
17734 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17735
17736 // fold (uint_to_fp c1) -> c1fp
17738 // ...but only if the target supports immediate floating-point values
17739 (!LegalOperations ||
17741 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17742
17743 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17744 // but SINT_TO_FP is legal on this target, try to convert.
17745 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17746 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17747 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17748 if (DAG.SignBitIsZero(N0))
17749 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17750 }
17751
17752 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17753 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17754 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17755 SDLoc DL(N);
17756 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17757 DAG.getConstantFP(0.0, DL, VT));
17758 }
17759
17760 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17761 return FTrunc;
17762
17763 return SDValue();
17764}
17765
17766// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17768 SDValue N0 = N->getOperand(0);
17769 EVT VT = N->getValueType(0);
17770
17771 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17772 return SDValue();
17773
17774 SDValue Src = N0.getOperand(0);
17775 EVT SrcVT = Src.getValueType();
17776 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17777 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17778
17779 // We can safely assume the conversion won't overflow the output range,
17780 // because (for example) (uint8_t)18293.f is undefined behavior.
17781
17782 // Since we can assume the conversion won't overflow, our decision as to
17783 // whether the input will fit in the float should depend on the minimum
17784 // of the input range and output range.
17785
17786 // This means this is also safe for a signed input and unsigned output, since
17787 // a negative input would lead to undefined behavior.
17788 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17789 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17790 unsigned ActualSize = std::min(InputSize, OutputSize);
17791 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17792
17793 // We can only fold away the float conversion if the input range can be
17794 // represented exactly in the float range.
17795 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17796 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17797 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17799 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17800 }
17801 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17802 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17803 return DAG.getBitcast(VT, Src);
17804 }
17805 return SDValue();
17806}
17807
17808SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17809 SDValue N0 = N->getOperand(0);
17810 EVT VT = N->getValueType(0);
17811
17812 // fold (fp_to_sint undef) -> undef
17813 if (N0.isUndef())
17814 return DAG.getUNDEF(VT);
17815
17816 // fold (fp_to_sint c1fp) -> c1
17818 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17819
17820 return FoldIntToFPToInt(N, DAG);
17821}
17822
17823SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17824 SDValue N0 = N->getOperand(0);
17825 EVT VT = N->getValueType(0);
17826
17827 // fold (fp_to_uint undef) -> undef
17828 if (N0.isUndef())
17829 return DAG.getUNDEF(VT);
17830
17831 // fold (fp_to_uint c1fp) -> c1
17833 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17834
17835 return FoldIntToFPToInt(N, DAG);
17836}
17837
17838SDValue DAGCombiner::visitXRINT(SDNode *N) {
17839 SDValue N0 = N->getOperand(0);
17840 EVT VT = N->getValueType(0);
17841
17842 // fold (lrint|llrint undef) -> undef
17843 if (N0.isUndef())
17844 return DAG.getUNDEF(VT);
17845
17846 // fold (lrint|llrint c1fp) -> c1
17848 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17849
17850 return SDValue();
17851}
17852
17853SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17854 SDValue N0 = N->getOperand(0);
17855 SDValue N1 = N->getOperand(1);
17856 EVT VT = N->getValueType(0);
17857
17858 // fold (fp_round c1fp) -> c1fp
17859 if (SDValue C =
17860 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17861 return C;
17862
17863 // fold (fp_round (fp_extend x)) -> x
17864 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17865 return N0.getOperand(0);
17866
17867 // fold (fp_round (fp_round x)) -> (fp_round x)
17868 if (N0.getOpcode() == ISD::FP_ROUND) {
17869 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17870 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17871
17872 // Avoid folding legal fp_rounds into non-legal ones.
17873 if (!hasOperation(ISD::FP_ROUND, VT))
17874 return SDValue();
17875
17876 // Skip this folding if it results in an fp_round from f80 to f16.
17877 //
17878 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17879 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17880 // instructions from f32 or f64. Moreover, the first (value-preserving)
17881 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17882 // x86.
17883 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17884 return SDValue();
17885
17886 // If the first fp_round isn't a value preserving truncation, it might
17887 // introduce a tie in the second fp_round, that wouldn't occur in the
17888 // single-step fp_round we want to fold to.
17889 // In other words, double rounding isn't the same as rounding.
17890 // Also, this is a value preserving truncation iff both fp_round's are.
17891 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17892 SDLoc DL(N);
17893 return DAG.getNode(
17894 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17895 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17896 }
17897 }
17898
17899 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17900 // Note: From a legality perspective, this is a two step transform. First,
17901 // we duplicate the fp_round to the arguments of the copysign, then we
17902 // eliminate the fp_round on Y. The second step requires an additional
17903 // predicate to match the implementation above.
17904 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17906 N0.getValueType())) {
17907 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17908 N0.getOperand(0), N1);
17909 AddToWorklist(Tmp.getNode());
17910 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17911 Tmp, N0.getOperand(1));
17912 }
17913
17914 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17915 return NewVSel;
17916
17917 return SDValue();
17918}
17919
17920SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17921 SDValue N0 = N->getOperand(0);
17922 EVT VT = N->getValueType(0);
17923
17924 if (VT.isVector())
17925 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17926 return FoldedVOp;
17927
17928 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17929 if (N->hasOneUse() &&
17930 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17931 return SDValue();
17932
17933 // fold (fp_extend c1fp) -> c1fp
17935 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17936
17937 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17938 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17940 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17941
17942 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17943 // value of X.
17944 if (N0.getOpcode() == ISD::FP_ROUND
17945 && N0.getConstantOperandVal(1) == 1) {
17946 SDValue In = N0.getOperand(0);
17947 if (In.getValueType() == VT) return In;
17948 if (VT.bitsLT(In.getValueType()))
17949 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17950 In, N0.getOperand(1));
17951 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17952 }
17953
17954 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17955 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17957 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17958 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17959 LN0->getChain(),
17960 LN0->getBasePtr(), N0.getValueType(),
17961 LN0->getMemOperand());
17962 CombineTo(N, ExtLoad);
17963 CombineTo(
17964 N0.getNode(),
17965 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17966 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17967 ExtLoad.getValue(1));
17968 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17969 }
17970
17971 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17972 return NewVSel;
17973
17974 return SDValue();
17975}
17976
17977SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17978 SDValue N0 = N->getOperand(0);
17979 EVT VT = N->getValueType(0);
17980
17981 // fold (fceil c1) -> fceil(c1)
17983 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17984
17985 return SDValue();
17986}
17987
17988SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17989 SDValue N0 = N->getOperand(0);
17990 EVT VT = N->getValueType(0);
17991
17992 // fold (ftrunc c1) -> ftrunc(c1)
17994 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17995
17996 // fold ftrunc (known rounded int x) -> x
17997 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17998 // likely to be generated to extract integer from a rounded floating value.
17999 switch (N0.getOpcode()) {
18000 default: break;
18001 case ISD::FRINT:
18002 case ISD::FTRUNC:
18003 case ISD::FNEARBYINT:
18004 case ISD::FROUNDEVEN:
18005 case ISD::FFLOOR:
18006 case ISD::FCEIL:
18007 return N0;
18008 }
18009
18010 return SDValue();
18011}
18012
18013SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18014 SDValue N0 = N->getOperand(0);
18015
18016 // fold (ffrexp c1) -> ffrexp(c1)
18018 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18019 return SDValue();
18020}
18021
18022SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18023 SDValue N0 = N->getOperand(0);
18024 EVT VT = N->getValueType(0);
18025
18026 // fold (ffloor c1) -> ffloor(c1)
18028 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
18029
18030 return SDValue();
18031}
18032
18033SDValue DAGCombiner::visitFNEG(SDNode *N) {
18034 SDValue N0 = N->getOperand(0);
18035 EVT VT = N->getValueType(0);
18036 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18037
18038 // Constant fold FNEG.
18040 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
18041
18042 if (SDValue NegN0 =
18043 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18044 return NegN0;
18045
18046 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18047 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18048 // know it was called from a context with a nsz flag if the input fsub does
18049 // not.
18050 if (N0.getOpcode() == ISD::FSUB &&
18052 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18053 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18054 N0.getOperand(0));
18055 }
18056
18057 if (SDValue Cast = foldSignChangeInBitcast(N))
18058 return Cast;
18059
18060 return SDValue();
18061}
18062
18063SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18064 SDValue N0 = N->getOperand(0);
18065 SDValue N1 = N->getOperand(1);
18066 EVT VT = N->getValueType(0);
18067 const SDNodeFlags Flags = N->getFlags();
18068 unsigned Opc = N->getOpcode();
18069 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18070 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18071 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18072
18073 // Constant fold.
18074 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18075 return C;
18076
18077 // Canonicalize to constant on RHS.
18080 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18081
18082 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18083 const APFloat &AF = N1CFP->getValueAPF();
18084
18085 // minnum(X, nan) -> X
18086 // maxnum(X, nan) -> X
18087 // minimum(X, nan) -> nan
18088 // maximum(X, nan) -> nan
18089 if (AF.isNaN())
18090 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18091
18092 // In the following folds, inf can be replaced with the largest finite
18093 // float, if the ninf flag is set.
18094 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18095 // minnum(X, -inf) -> -inf
18096 // maxnum(X, +inf) -> +inf
18097 // minimum(X, -inf) -> -inf if nnan
18098 // maximum(X, +inf) -> +inf if nnan
18099 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18100 return N->getOperand(1);
18101
18102 // minnum(X, +inf) -> X if nnan
18103 // maxnum(X, -inf) -> X if nnan
18104 // minimum(X, +inf) -> X
18105 // maximum(X, -inf) -> X
18106 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18107 return N->getOperand(0);
18108 }
18109 }
18110
18111 if (SDValue SD = reassociateReduction(
18112 PropagatesNaN
18115 Opc, SDLoc(N), VT, N0, N1, Flags))
18116 return SD;
18117
18118 return SDValue();
18119}
18120
18121SDValue DAGCombiner::visitFABS(SDNode *N) {
18122 SDValue N0 = N->getOperand(0);
18123 EVT VT = N->getValueType(0);
18124
18125 // fold (fabs c1) -> fabs(c1)
18127 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
18128
18129 // fold (fabs (fabs x)) -> (fabs x)
18130 if (N0.getOpcode() == ISD::FABS)
18131 return N->getOperand(0);
18132
18133 // fold (fabs (fneg x)) -> (fabs x)
18134 // fold (fabs (fcopysign x, y)) -> (fabs x)
18135 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18136 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
18137
18138 if (SDValue Cast = foldSignChangeInBitcast(N))
18139 return Cast;
18140
18141 return SDValue();
18142}
18143
18144SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18145 SDValue Chain = N->getOperand(0);
18146 SDValue N1 = N->getOperand(1);
18147 SDValue N2 = N->getOperand(2);
18148
18149 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18150 // nondeterministic jumps).
18151 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18152 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18153 N1->getOperand(0), N2);
18154 }
18155
18156 // Variant of the previous fold where there is a SETCC in between:
18157 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18158 // =>
18159 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18160 // =>
18161 // BRCOND(SETCC(X, CONST, Cond))
18162 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18163 // isn't equivalent to true or false.
18164 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18165 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18166 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18167 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18168 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18169 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18170 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18171 bool Updated = false;
18172
18173 // Is 'X Cond C' always true or false?
18174 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18175 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18176 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18177 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18178 (Cond == ISD::SETGT && C->isMaxSignedValue());
18179 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18180 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18181 (Cond == ISD::SETUGE && C->isZero()) ||
18182 (Cond == ISD::SETGE && C->isMinSignedValue());
18183 return True || False;
18184 };
18185
18186 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18187 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18188 S0 = S0->getOperand(0);
18189 Updated = true;
18190 }
18191 }
18192 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18193 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18194 S1 = S1->getOperand(0);
18195 Updated = true;
18196 }
18197 }
18198
18199 if (Updated)
18200 return DAG.getNode(
18201 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18202 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
18203 }
18204
18205 // If N is a constant we could fold this into a fallthrough or unconditional
18206 // branch. However that doesn't happen very often in normal code, because
18207 // Instcombine/SimplifyCFG should have handled the available opportunities.
18208 // If we did this folding here, it would be necessary to update the
18209 // MachineBasicBlock CFG, which is awkward.
18210
18211 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18212 // on the target.
18213 if (N1.getOpcode() == ISD::SETCC &&
18215 N1.getOperand(0).getValueType())) {
18216 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18217 Chain, N1.getOperand(2),
18218 N1.getOperand(0), N1.getOperand(1), N2);
18219 }
18220
18221 if (N1.hasOneUse()) {
18222 // rebuildSetCC calls visitXor which may change the Chain when there is a
18223 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18224 HandleSDNode ChainHandle(Chain);
18225 if (SDValue NewN1 = rebuildSetCC(N1))
18226 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18227 ChainHandle.getValue(), NewN1, N2);
18228 }
18229
18230 return SDValue();
18231}
18232
18233SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18234 if (N.getOpcode() == ISD::SRL ||
18235 (N.getOpcode() == ISD::TRUNCATE &&
18236 (N.getOperand(0).hasOneUse() &&
18237 N.getOperand(0).getOpcode() == ISD::SRL))) {
18238 // Look pass the truncate.
18239 if (N.getOpcode() == ISD::TRUNCATE)
18240 N = N.getOperand(0);
18241
18242 // Match this pattern so that we can generate simpler code:
18243 //
18244 // %a = ...
18245 // %b = and i32 %a, 2
18246 // %c = srl i32 %b, 1
18247 // brcond i32 %c ...
18248 //
18249 // into
18250 //
18251 // %a = ...
18252 // %b = and i32 %a, 2
18253 // %c = setcc eq %b, 0
18254 // brcond %c ...
18255 //
18256 // This applies only when the AND constant value has one bit set and the
18257 // SRL constant is equal to the log2 of the AND constant. The back-end is
18258 // smart enough to convert the result into a TEST/JMP sequence.
18259 SDValue Op0 = N.getOperand(0);
18260 SDValue Op1 = N.getOperand(1);
18261
18262 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18263 SDValue AndOp1 = Op0.getOperand(1);
18264
18265 if (AndOp1.getOpcode() == ISD::Constant) {
18266 const APInt &AndConst = AndOp1->getAsAPIntVal();
18267
18268 if (AndConst.isPowerOf2() &&
18269 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18270 SDLoc DL(N);
18271 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18272 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18273 ISD::SETNE);
18274 }
18275 }
18276 }
18277 }
18278
18279 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18280 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18281 if (N.getOpcode() == ISD::XOR) {
18282 // Because we may call this on a speculatively constructed
18283 // SimplifiedSetCC Node, we need to simplify this node first.
18284 // Ideally this should be folded into SimplifySetCC and not
18285 // here. For now, grab a handle to N so we don't lose it from
18286 // replacements interal to the visit.
18287 HandleSDNode XORHandle(N);
18288 while (N.getOpcode() == ISD::XOR) {
18289 SDValue Tmp = visitXOR(N.getNode());
18290 // No simplification done.
18291 if (!Tmp.getNode())
18292 break;
18293 // Returning N is form in-visit replacement that may invalidated
18294 // N. Grab value from Handle.
18295 if (Tmp.getNode() == N.getNode())
18296 N = XORHandle.getValue();
18297 else // Node simplified. Try simplifying again.
18298 N = Tmp;
18299 }
18300
18301 if (N.getOpcode() != ISD::XOR)
18302 return N;
18303
18304 SDValue Op0 = N->getOperand(0);
18305 SDValue Op1 = N->getOperand(1);
18306
18307 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18308 bool Equal = false;
18309 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18310 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18311 Op0.getValueType() == MVT::i1) {
18312 N = Op0;
18313 Op0 = N->getOperand(0);
18314 Op1 = N->getOperand(1);
18315 Equal = true;
18316 }
18317
18318 EVT SetCCVT = N.getValueType();
18319 if (LegalTypes)
18320 SetCCVT = getSetCCResultType(SetCCVT);
18321 // Replace the uses of XOR with SETCC
18322 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18323 Equal ? ISD::SETEQ : ISD::SETNE);
18324 }
18325 }
18326
18327 return SDValue();
18328}
18329
18330// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18331//
18332SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18333 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18334 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18335
18336 // If N is a constant we could fold this into a fallthrough or unconditional
18337 // branch. However that doesn't happen very often in normal code, because
18338 // Instcombine/SimplifyCFG should have handled the available opportunities.
18339 // If we did this folding here, it would be necessary to update the
18340 // MachineBasicBlock CFG, which is awkward.
18341
18342 // Use SimplifySetCC to simplify SETCC's.
18344 CondLHS, CondRHS, CC->get(), SDLoc(N),
18345 false);
18346 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18347
18348 // fold to a simpler setcc
18349 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18350 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18351 N->getOperand(0), Simp.getOperand(2),
18352 Simp.getOperand(0), Simp.getOperand(1),
18353 N->getOperand(4));
18354
18355 return SDValue();
18356}
18357
18358static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18359 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18360 const TargetLowering &TLI) {
18361 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18362 if (LD->isIndexed())
18363 return false;
18364 EVT VT = LD->getMemoryVT();
18365 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18366 return false;
18367 Ptr = LD->getBasePtr();
18368 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18369 if (ST->isIndexed())
18370 return false;
18371 EVT VT = ST->getMemoryVT();
18372 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18373 return false;
18374 Ptr = ST->getBasePtr();
18375 IsLoad = false;
18376 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18377 if (LD->isIndexed())
18378 return false;
18379 EVT VT = LD->getMemoryVT();
18380 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18381 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18382 return false;
18383 Ptr = LD->getBasePtr();
18384 IsMasked = true;
18385 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18386 if (ST->isIndexed())
18387 return false;
18388 EVT VT = ST->getMemoryVT();
18389 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18390 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18391 return false;
18392 Ptr = ST->getBasePtr();
18393 IsLoad = false;
18394 IsMasked = true;
18395 } else {
18396 return false;
18397 }
18398 return true;
18399}
18400
18401/// Try turning a load/store into a pre-indexed load/store when the base
18402/// pointer is an add or subtract and it has other uses besides the load/store.
18403/// After the transformation, the new indexed load/store has effectively folded
18404/// the add/subtract in and all of its other uses are redirected to the
18405/// new load/store.
18406bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18407 if (Level < AfterLegalizeDAG)
18408 return false;
18409
18410 bool IsLoad = true;
18411 bool IsMasked = false;
18412 SDValue Ptr;
18413 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18414 Ptr, TLI))
18415 return false;
18416
18417 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18418 // out. There is no reason to make this a preinc/predec.
18419 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18420 Ptr->hasOneUse())
18421 return false;
18422
18423 // Ask the target to do addressing mode selection.
18427 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18428 return false;
18429
18430 // Backends without true r+i pre-indexed forms may need to pass a
18431 // constant base with a variable offset so that constant coercion
18432 // will work with the patterns in canonical form.
18433 bool Swapped = false;
18434 if (isa<ConstantSDNode>(BasePtr)) {
18435 std::swap(BasePtr, Offset);
18436 Swapped = true;
18437 }
18438
18439 // Don't create a indexed load / store with zero offset.
18441 return false;
18442
18443 // Try turning it into a pre-indexed load / store except when:
18444 // 1) The new base ptr is a frame index.
18445 // 2) If N is a store and the new base ptr is either the same as or is a
18446 // predecessor of the value being stored.
18447 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18448 // that would create a cycle.
18449 // 4) All uses are load / store ops that use it as old base ptr.
18450
18451 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18452 // (plus the implicit offset) to a register to preinc anyway.
18453 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18454 return false;
18455
18456 // Check #2.
18457 if (!IsLoad) {
18458 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18459 : cast<StoreSDNode>(N)->getValue();
18460
18461 // Would require a copy.
18462 if (Val == BasePtr)
18463 return false;
18464
18465 // Would create a cycle.
18466 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18467 return false;
18468 }
18469
18470 // Caches for hasPredecessorHelper.
18473 Worklist.push_back(N);
18474
18475 // If the offset is a constant, there may be other adds of constants that
18476 // can be folded with this one. We should do this to avoid having to keep
18477 // a copy of the original base pointer.
18478 SmallVector<SDNode *, 16> OtherUses;
18479 constexpr unsigned int MaxSteps = 8192;
18480 if (isa<ConstantSDNode>(Offset))
18481 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18482 UE = BasePtr->use_end();
18483 UI != UE; ++UI) {
18484 SDUse &Use = UI.getUse();
18485 // Skip the use that is Ptr and uses of other results from BasePtr's
18486 // node (important for nodes that return multiple results).
18487 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18488 continue;
18489
18490 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18491 MaxSteps))
18492 continue;
18493
18494 if (Use.getUser()->getOpcode() != ISD::ADD &&
18495 Use.getUser()->getOpcode() != ISD::SUB) {
18496 OtherUses.clear();
18497 break;
18498 }
18499
18500 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18501 if (!isa<ConstantSDNode>(Op1)) {
18502 OtherUses.clear();
18503 break;
18504 }
18505
18506 // FIXME: In some cases, we can be smarter about this.
18507 if (Op1.getValueType() != Offset.getValueType()) {
18508 OtherUses.clear();
18509 break;
18510 }
18511
18512 OtherUses.push_back(Use.getUser());
18513 }
18514
18515 if (Swapped)
18516 std::swap(BasePtr, Offset);
18517
18518 // Now check for #3 and #4.
18519 bool RealUse = false;
18520
18521 for (SDNode *Use : Ptr->uses()) {
18522 if (Use == N)
18523 continue;
18524 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18525 return false;
18526
18527 // If Ptr may be folded in addressing mode of other use, then it's
18528 // not profitable to do this transformation.
18529 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18530 RealUse = true;
18531 }
18532
18533 if (!RealUse)
18534 return false;
18535
18537 if (!IsMasked) {
18538 if (IsLoad)
18539 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18540 else
18541 Result =
18542 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18543 } else {
18544 if (IsLoad)
18545 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18546 Offset, AM);
18547 else
18548 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18549 Offset, AM);
18550 }
18551 ++PreIndexedNodes;
18552 ++NodesCombined;
18553 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18554 Result.dump(&DAG); dbgs() << '\n');
18555 WorklistRemover DeadNodes(*this);
18556 if (IsLoad) {
18557 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18558 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18559 } else {
18560 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18561 }
18562
18563 // Finally, since the node is now dead, remove it from the graph.
18564 deleteAndRecombine(N);
18565
18566 if (Swapped)
18567 std::swap(BasePtr, Offset);
18568
18569 // Replace other uses of BasePtr that can be updated to use Ptr
18570 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18571 unsigned OffsetIdx = 1;
18572 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18573 OffsetIdx = 0;
18574 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18575 BasePtr.getNode() && "Expected BasePtr operand");
18576
18577 // We need to replace ptr0 in the following expression:
18578 // x0 * offset0 + y0 * ptr0 = t0
18579 // knowing that
18580 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18581 //
18582 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18583 // indexed load/store and the expression that needs to be re-written.
18584 //
18585 // Therefore, we have:
18586 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18587
18588 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18589 const APInt &Offset0 = CN->getAPIntValue();
18590 const APInt &Offset1 = Offset->getAsAPIntVal();
18591 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18592 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18593 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18594 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18595
18596 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18597
18598 APInt CNV = Offset0;
18599 if (X0 < 0) CNV = -CNV;
18600 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18601 else CNV = CNV - Offset1;
18602
18603 SDLoc DL(OtherUses[i]);
18604
18605 // We can now generate the new expression.
18606 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18607 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18608
18609 SDValue NewUse = DAG.getNode(Opcode,
18610 DL,
18611 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18612 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18613 deleteAndRecombine(OtherUses[i]);
18614 }
18615
18616 // Replace the uses of Ptr with uses of the updated base value.
18617 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18618 deleteAndRecombine(Ptr.getNode());
18619 AddToWorklist(Result.getNode());
18620
18621 return true;
18622}
18623
18625 SDValue &BasePtr, SDValue &Offset,
18627 SelectionDAG &DAG,
18628 const TargetLowering &TLI) {
18629 if (PtrUse == N ||
18630 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18631 return false;
18632
18633 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18634 return false;
18635
18636 // Don't create a indexed load / store with zero offset.
18638 return false;
18639
18640 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18641 return false;
18642
18644 for (SDNode *Use : BasePtr->uses()) {
18645 if (Use == Ptr.getNode())
18646 continue;
18647
18648 // No if there's a later user which could perform the index instead.
18649 if (isa<MemSDNode>(Use)) {
18650 bool IsLoad = true;
18651 bool IsMasked = false;
18652 SDValue OtherPtr;
18654 IsMasked, OtherPtr, TLI)) {
18656 Worklist.push_back(Use);
18657 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18658 return false;
18659 }
18660 }
18661
18662 // If all the uses are load / store addresses, then don't do the
18663 // transformation.
18664 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18665 for (SDNode *UseUse : Use->uses())
18666 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18667 return false;
18668 }
18669 }
18670 return true;
18671}
18672
18674 bool &IsMasked, SDValue &Ptr,
18675 SDValue &BasePtr, SDValue &Offset,
18677 SelectionDAG &DAG,
18678 const TargetLowering &TLI) {
18680 IsMasked, Ptr, TLI) ||
18681 Ptr->hasOneUse())
18682 return nullptr;
18683
18684 // Try turning it into a post-indexed load / store except when
18685 // 1) All uses are load / store ops that use it as base ptr (and
18686 // it may be folded as addressing mmode).
18687 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18688 // nor a successor of N. Otherwise, if Op is folded that would
18689 // create a cycle.
18690 for (SDNode *Op : Ptr->uses()) {
18691 // Check for #1.
18692 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18693 continue;
18694
18695 // Check for #2.
18698 constexpr unsigned int MaxSteps = 8192;
18699 // Ptr is predecessor to both N and Op.
18700 Visited.insert(Ptr.getNode());
18701 Worklist.push_back(N);
18702 Worklist.push_back(Op);
18703 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18704 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18705 return Op;
18706 }
18707 return nullptr;
18708}
18709
18710/// Try to combine a load/store with a add/sub of the base pointer node into a
18711/// post-indexed load/store. The transformation folded the add/subtract into the
18712/// new indexed load/store effectively and all of its uses are redirected to the
18713/// new load/store.
18714bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18715 if (Level < AfterLegalizeDAG)
18716 return false;
18717
18718 bool IsLoad = true;
18719 bool IsMasked = false;
18720 SDValue Ptr;
18724 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18725 Offset, AM, DAG, TLI);
18726 if (!Op)
18727 return false;
18728
18730 if (!IsMasked)
18731 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18732 Offset, AM)
18733 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18734 BasePtr, Offset, AM);
18735 else
18736 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18737 BasePtr, Offset, AM)
18739 BasePtr, Offset, AM);
18740 ++PostIndexedNodes;
18741 ++NodesCombined;
18742 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18743 Result.dump(&DAG); dbgs() << '\n');
18744 WorklistRemover DeadNodes(*this);
18745 if (IsLoad) {
18746 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18747 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18748 } else {
18749 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18750 }
18751
18752 // Finally, since the node is now dead, remove it from the graph.
18753 deleteAndRecombine(N);
18754
18755 // Replace the uses of Use with uses of the updated base value.
18757 Result.getValue(IsLoad ? 1 : 0));
18758 deleteAndRecombine(Op);
18759 return true;
18760}
18761
18762/// Return the base-pointer arithmetic from an indexed \p LD.
18763SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18764 ISD::MemIndexedMode AM = LD->getAddressingMode();
18765 assert(AM != ISD::UNINDEXED);
18766 SDValue BP = LD->getOperand(1);
18767 SDValue Inc = LD->getOperand(2);
18768
18769 // Some backends use TargetConstants for load offsets, but don't expect
18770 // TargetConstants in general ADD nodes. We can convert these constants into
18771 // regular Constants (if the constant is not opaque).
18773 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18774 "Cannot split out indexing using opaque target constants");
18775 if (Inc.getOpcode() == ISD::TargetConstant) {
18776 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18777 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18778 ConstInc->getValueType(0));
18779 }
18780
18781 unsigned Opc =
18782 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18783 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18784}
18785
18787 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18788}
18789
18790bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18791 EVT STType = Val.getValueType();
18792 EVT STMemType = ST->getMemoryVT();
18793 if (STType == STMemType)
18794 return true;
18795 if (isTypeLegal(STMemType))
18796 return false; // fail.
18797 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18798 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18799 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18800 return true;
18801 }
18802 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18803 STType.isInteger() && STMemType.isInteger()) {
18804 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18805 return true;
18806 }
18807 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18808 Val = DAG.getBitcast(STMemType, Val);
18809 return true;
18810 }
18811 return false; // fail.
18812}
18813
18814bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18815 EVT LDMemType = LD->getMemoryVT();
18816 EVT LDType = LD->getValueType(0);
18817 assert(Val.getValueType() == LDMemType &&
18818 "Attempting to extend value of non-matching type");
18819 if (LDType == LDMemType)
18820 return true;
18821 if (LDMemType.isInteger() && LDType.isInteger()) {
18822 switch (LD->getExtensionType()) {
18823 case ISD::NON_EXTLOAD:
18824 Val = DAG.getBitcast(LDType, Val);
18825 return true;
18826 case ISD::EXTLOAD:
18827 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18828 return true;
18829 case ISD::SEXTLOAD:
18830 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18831 return true;
18832 case ISD::ZEXTLOAD:
18833 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18834 return true;
18835 }
18836 }
18837 return false;
18838}
18839
18840StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18841 int64_t &Offset) {
18842 SDValue Chain = LD->getOperand(0);
18843
18844 // Look through CALLSEQ_START.
18845 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18846 Chain = Chain->getOperand(0);
18847
18848 StoreSDNode *ST = nullptr;
18850 if (Chain.getOpcode() == ISD::TokenFactor) {
18851 // Look for unique store within the TokenFactor.
18852 for (SDValue Op : Chain->ops()) {
18853 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18854 if (!Store)
18855 continue;
18856 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18857 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18858 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18859 continue;
18860 // Make sure the store is not aliased with any nodes in TokenFactor.
18861 GatherAllAliases(Store, Chain, Aliases);
18862 if (Aliases.empty() ||
18863 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18864 ST = Store;
18865 break;
18866 }
18867 } else {
18868 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18869 if (Store) {
18870 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18871 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18872 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18873 ST = Store;
18874 }
18875 }
18876
18877 return ST;
18878}
18879
18880SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18881 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18882 return SDValue();
18883 SDValue Chain = LD->getOperand(0);
18884 int64_t Offset;
18885
18886 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18887 // TODO: Relax this restriction for unordered atomics (see D66309)
18888 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18889 return SDValue();
18890
18891 EVT LDType = LD->getValueType(0);
18892 EVT LDMemType = LD->getMemoryVT();
18893 EVT STMemType = ST->getMemoryVT();
18894 EVT STType = ST->getValue().getValueType();
18895
18896 // There are two cases to consider here:
18897 // 1. The store is fixed width and the load is scalable. In this case we
18898 // don't know at compile time if the store completely envelops the load
18899 // so we abandon the optimisation.
18900 // 2. The store is scalable and the load is fixed width. We could
18901 // potentially support a limited number of cases here, but there has been
18902 // no cost-benefit analysis to prove it's worth it.
18903 bool LdStScalable = LDMemType.isScalableVT();
18904 if (LdStScalable != STMemType.isScalableVT())
18905 return SDValue();
18906
18907 // If we are dealing with scalable vectors on a big endian platform the
18908 // calculation of offsets below becomes trickier, since we do not know at
18909 // compile time the absolute size of the vector. Until we've done more
18910 // analysis on big-endian platforms it seems better to bail out for now.
18911 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18912 return SDValue();
18913
18914 // Normalize for Endianness. After this Offset=0 will denote that the least
18915 // significant bit in the loaded value maps to the least significant bit in
18916 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18917 // n:th least significant byte of the stored value.
18918 int64_t OrigOffset = Offset;
18919 if (DAG.getDataLayout().isBigEndian())
18920 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18921 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18922 8 -
18923 Offset;
18924
18925 // Check that the stored value cover all bits that are loaded.
18926 bool STCoversLD;
18927
18928 TypeSize LdMemSize = LDMemType.getSizeInBits();
18929 TypeSize StMemSize = STMemType.getSizeInBits();
18930 if (LdStScalable)
18931 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18932 else
18933 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18934 StMemSize.getFixedValue());
18935
18936 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18937 if (LD->isIndexed()) {
18938 // Cannot handle opaque target constants and we must respect the user's
18939 // request not to split indexes from loads.
18940 if (!canSplitIdx(LD))
18941 return SDValue();
18942 SDValue Idx = SplitIndexingFromLoad(LD);
18943 SDValue Ops[] = {Val, Idx, Chain};
18944 return CombineTo(LD, Ops, 3);
18945 }
18946 return CombineTo(LD, Val, Chain);
18947 };
18948
18949 if (!STCoversLD)
18950 return SDValue();
18951
18952 // Memory as copy space (potentially masked).
18953 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18954 // Simple case: Direct non-truncating forwarding
18955 if (LDType.getSizeInBits() == LdMemSize)
18956 return ReplaceLd(LD, ST->getValue(), Chain);
18957 // Can we model the truncate and extension with an and mask?
18958 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18959 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18960 // Mask to size of LDMemType
18961 auto Mask =
18963 StMemSize.getFixedValue()),
18964 SDLoc(ST), STType);
18965 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18966 return ReplaceLd(LD, Val, Chain);
18967 }
18968 }
18969
18970 // Handle some cases for big-endian that would be Offset 0 and handled for
18971 // little-endian.
18972 SDValue Val = ST->getValue();
18973 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18974 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18975 !LDType.isVector() && isTypeLegal(STType) &&
18976 TLI.isOperationLegal(ISD::SRL, STType)) {
18977 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18978 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18979 Offset = 0;
18980 }
18981 }
18982
18983 // TODO: Deal with nonzero offset.
18984 if (LD->getBasePtr().isUndef() || Offset != 0)
18985 return SDValue();
18986 // Model necessary truncations / extenstions.
18987 // Truncate Value To Stored Memory Size.
18988 do {
18989 if (!getTruncatedStoreValue(ST, Val))
18990 continue;
18991 if (!isTypeLegal(LDMemType))
18992 continue;
18993 if (STMemType != LDMemType) {
18994 // TODO: Support vectors? This requires extract_subvector/bitcast.
18995 if (!STMemType.isVector() && !LDMemType.isVector() &&
18996 STMemType.isInteger() && LDMemType.isInteger())
18997 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18998 else
18999 continue;
19000 }
19001 if (!extendLoadedValueToExtension(LD, Val))
19002 continue;
19003 return ReplaceLd(LD, Val, Chain);
19004 } while (false);
19005
19006 // On failure, cleanup dead nodes we may have created.
19007 if (Val->use_empty())
19008 deleteAndRecombine(Val.getNode());
19009 return SDValue();
19010}
19011
19012SDValue DAGCombiner::visitLOAD(SDNode *N) {
19013 LoadSDNode *LD = cast<LoadSDNode>(N);
19014 SDValue Chain = LD->getChain();
19015 SDValue Ptr = LD->getBasePtr();
19016
19017 // If load is not volatile and there are no uses of the loaded value (and
19018 // the updated indexed value in case of indexed loads), change uses of the
19019 // chain value into uses of the chain input (i.e. delete the dead load).
19020 // TODO: Allow this for unordered atomics (see D66309)
19021 if (LD->isSimple()) {
19022 if (N->getValueType(1) == MVT::Other) {
19023 // Unindexed loads.
19024 if (!N->hasAnyUseOfValue(0)) {
19025 // It's not safe to use the two value CombineTo variant here. e.g.
19026 // v1, chain2 = load chain1, loc
19027 // v2, chain3 = load chain2, loc
19028 // v3 = add v2, c
19029 // Now we replace use of chain2 with chain1. This makes the second load
19030 // isomorphic to the one we are deleting, and thus makes this load live.
19031 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19032 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19033 dbgs() << "\n");
19034 WorklistRemover DeadNodes(*this);
19035 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19036 AddUsersToWorklist(Chain.getNode());
19037 if (N->use_empty())
19038 deleteAndRecombine(N);
19039
19040 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19041 }
19042 } else {
19043 // Indexed loads.
19044 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19045
19046 // If this load has an opaque TargetConstant offset, then we cannot split
19047 // the indexing into an add/sub directly (that TargetConstant may not be
19048 // valid for a different type of node, and we cannot convert an opaque
19049 // target constant into a regular constant).
19050 bool CanSplitIdx = canSplitIdx(LD);
19051
19052 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19053 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19054 SDValue Index;
19055 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19056 Index = SplitIndexingFromLoad(LD);
19057 // Try to fold the base pointer arithmetic into subsequent loads and
19058 // stores.
19059 AddUsersToWorklist(N);
19060 } else
19061 Index = DAG.getUNDEF(N->getValueType(1));
19062 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19063 dbgs() << "\nWith: "; Undef.dump(&DAG);
19064 dbgs() << " and 2 other values\n");
19065 WorklistRemover DeadNodes(*this);
19066 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19068 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19069 deleteAndRecombine(N);
19070 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19071 }
19072 }
19073 }
19074
19075 // If this load is directly stored, replace the load value with the stored
19076 // value.
19077 if (auto V = ForwardStoreValueToDirectLoad(LD))
19078 return V;
19079
19080 // Try to infer better alignment information than the load already has.
19081 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19082 !LD->isAtomic()) {
19083 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19084 if (*Alignment > LD->getAlign() &&
19085 isAligned(*Alignment, LD->getSrcValueOffset())) {
19086 SDValue NewLoad = DAG.getExtLoad(
19087 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19088 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19089 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19090 // NewLoad will always be N as we are only refining the alignment
19091 assert(NewLoad.getNode() == N);
19092 (void)NewLoad;
19093 }
19094 }
19095 }
19096
19097 if (LD->isUnindexed()) {
19098 // Walk up chain skipping non-aliasing memory nodes.
19099 SDValue BetterChain = FindBetterChain(LD, Chain);
19100
19101 // If there is a better chain.
19102 if (Chain != BetterChain) {
19103 SDValue ReplLoad;
19104
19105 // Replace the chain to void dependency.
19106 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19107 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19108 BetterChain, Ptr, LD->getMemOperand());
19109 } else {
19110 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19111 LD->getValueType(0),
19112 BetterChain, Ptr, LD->getMemoryVT(),
19113 LD->getMemOperand());
19114 }
19115
19116 // Create token factor to keep old chain connected.
19117 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19118 MVT::Other, Chain, ReplLoad.getValue(1));
19119
19120 // Replace uses with load result and token factor
19121 return CombineTo(N, ReplLoad.getValue(0), Token);
19122 }
19123 }
19124
19125 // Try transforming N to an indexed load.
19126 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19127 return SDValue(N, 0);
19128
19129 // Try to slice up N to more direct loads if the slices are mapped to
19130 // different register banks or pairing can take place.
19131 if (SliceUpLoad(N))
19132 return SDValue(N, 0);
19133
19134 return SDValue();
19135}
19136
19137namespace {
19138
19139/// Helper structure used to slice a load in smaller loads.
19140/// Basically a slice is obtained from the following sequence:
19141/// Origin = load Ty1, Base
19142/// Shift = srl Ty1 Origin, CstTy Amount
19143/// Inst = trunc Shift to Ty2
19144///
19145/// Then, it will be rewritten into:
19146/// Slice = load SliceTy, Base + SliceOffset
19147/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19148///
19149/// SliceTy is deduced from the number of bits that are actually used to
19150/// build Inst.
19151struct LoadedSlice {
19152 /// Helper structure used to compute the cost of a slice.
19153 struct Cost {
19154 /// Are we optimizing for code size.
19155 bool ForCodeSize = false;
19156
19157 /// Various cost.
19158 unsigned Loads = 0;
19159 unsigned Truncates = 0;
19160 unsigned CrossRegisterBanksCopies = 0;
19161 unsigned ZExts = 0;
19162 unsigned Shift = 0;
19163
19164 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19165
19166 /// Get the cost of one isolated slice.
19167 Cost(const LoadedSlice &LS, bool ForCodeSize)
19168 : ForCodeSize(ForCodeSize), Loads(1) {
19169 EVT TruncType = LS.Inst->getValueType(0);
19170 EVT LoadedType = LS.getLoadedType();
19171 if (TruncType != LoadedType &&
19172 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19173 ZExts = 1;
19174 }
19175
19176 /// Account for slicing gain in the current cost.
19177 /// Slicing provide a few gains like removing a shift or a
19178 /// truncate. This method allows to grow the cost of the original
19179 /// load with the gain from this slice.
19180 void addSliceGain(const LoadedSlice &LS) {
19181 // Each slice saves a truncate.
19182 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19183 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19184 ++Truncates;
19185 // If there is a shift amount, this slice gets rid of it.
19186 if (LS.Shift)
19187 ++Shift;
19188 // If this slice can merge a cross register bank copy, account for it.
19189 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19190 ++CrossRegisterBanksCopies;
19191 }
19192
19193 Cost &operator+=(const Cost &RHS) {
19194 Loads += RHS.Loads;
19195 Truncates += RHS.Truncates;
19196 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19197 ZExts += RHS.ZExts;
19198 Shift += RHS.Shift;
19199 return *this;
19200 }
19201
19202 bool operator==(const Cost &RHS) const {
19203 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19204 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19205 ZExts == RHS.ZExts && Shift == RHS.Shift;
19206 }
19207
19208 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19209
19210 bool operator<(const Cost &RHS) const {
19211 // Assume cross register banks copies are as expensive as loads.
19212 // FIXME: Do we want some more target hooks?
19213 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19214 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19215 // Unless we are optimizing for code size, consider the
19216 // expensive operation first.
19217 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19218 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19219 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19220 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19221 }
19222
19223 bool operator>(const Cost &RHS) const { return RHS < *this; }
19224
19225 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19226
19227 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19228 };
19229
19230 // The last instruction that represent the slice. This should be a
19231 // truncate instruction.
19232 SDNode *Inst;
19233
19234 // The original load instruction.
19235 LoadSDNode *Origin;
19236
19237 // The right shift amount in bits from the original load.
19238 unsigned Shift;
19239
19240 // The DAG from which Origin came from.
19241 // This is used to get some contextual information about legal types, etc.
19242 SelectionDAG *DAG;
19243
19244 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19245 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19246 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19247
19248 /// Get the bits used in a chunk of bits \p BitWidth large.
19249 /// \return Result is \p BitWidth and has used bits set to 1 and
19250 /// not used bits set to 0.
19251 APInt getUsedBits() const {
19252 // Reproduce the trunc(lshr) sequence:
19253 // - Start from the truncated value.
19254 // - Zero extend to the desired bit width.
19255 // - Shift left.
19256 assert(Origin && "No original load to compare against.");
19257 unsigned BitWidth = Origin->getValueSizeInBits(0);
19258 assert(Inst && "This slice is not bound to an instruction");
19259 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19260 "Extracted slice is bigger than the whole type!");
19261 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19262 UsedBits.setAllBits();
19263 UsedBits = UsedBits.zext(BitWidth);
19264 UsedBits <<= Shift;
19265 return UsedBits;
19266 }
19267
19268 /// Get the size of the slice to be loaded in bytes.
19269 unsigned getLoadedSize() const {
19270 unsigned SliceSize = getUsedBits().popcount();
19271 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19272 return SliceSize / 8;
19273 }
19274
19275 /// Get the type that will be loaded for this slice.
19276 /// Note: This may not be the final type for the slice.
19277 EVT getLoadedType() const {
19278 assert(DAG && "Missing context");
19279 LLVMContext &Ctxt = *DAG->getContext();
19280 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19281 }
19282
19283 /// Get the alignment of the load used for this slice.
19284 Align getAlign() const {
19285 Align Alignment = Origin->getAlign();
19286 uint64_t Offset = getOffsetFromBase();
19287 if (Offset != 0)
19288 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19289 return Alignment;
19290 }
19291
19292 /// Check if this slice can be rewritten with legal operations.
19293 bool isLegal() const {
19294 // An invalid slice is not legal.
19295 if (!Origin || !Inst || !DAG)
19296 return false;
19297
19298 // Offsets are for indexed load only, we do not handle that.
19299 if (!Origin->getOffset().isUndef())
19300 return false;
19301
19302 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19303
19304 // Check that the type is legal.
19305 EVT SliceType = getLoadedType();
19306 if (!TLI.isTypeLegal(SliceType))
19307 return false;
19308
19309 // Check that the load is legal for this type.
19310 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19311 return false;
19312
19313 // Check that the offset can be computed.
19314 // 1. Check its type.
19315 EVT PtrType = Origin->getBasePtr().getValueType();
19316 if (PtrType == MVT::Untyped || PtrType.isExtended())
19317 return false;
19318
19319 // 2. Check that it fits in the immediate.
19320 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19321 return false;
19322
19323 // 3. Check that the computation is legal.
19324 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19325 return false;
19326
19327 // Check that the zext is legal if it needs one.
19328 EVT TruncateType = Inst->getValueType(0);
19329 if (TruncateType != SliceType &&
19330 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19331 return false;
19332
19333 return true;
19334 }
19335
19336 /// Get the offset in bytes of this slice in the original chunk of
19337 /// bits.
19338 /// \pre DAG != nullptr.
19339 uint64_t getOffsetFromBase() const {
19340 assert(DAG && "Missing context.");
19341 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19342 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19343 uint64_t Offset = Shift / 8;
19344 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19345 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19346 "The size of the original loaded type is not a multiple of a"
19347 " byte.");
19348 // If Offset is bigger than TySizeInBytes, it means we are loading all
19349 // zeros. This should have been optimized before in the process.
19350 assert(TySizeInBytes > Offset &&
19351 "Invalid shift amount for given loaded size");
19352 if (IsBigEndian)
19353 Offset = TySizeInBytes - Offset - getLoadedSize();
19354 return Offset;
19355 }
19356
19357 /// Generate the sequence of instructions to load the slice
19358 /// represented by this object and redirect the uses of this slice to
19359 /// this new sequence of instructions.
19360 /// \pre this->Inst && this->Origin are valid Instructions and this
19361 /// object passed the legal check: LoadedSlice::isLegal returned true.
19362 /// \return The last instruction of the sequence used to load the slice.
19363 SDValue loadSlice() const {
19364 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19365 const SDValue &OldBaseAddr = Origin->getBasePtr();
19366 SDValue BaseAddr = OldBaseAddr;
19367 // Get the offset in that chunk of bytes w.r.t. the endianness.
19368 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19369 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19370 if (Offset) {
19371 // BaseAddr = BaseAddr + Offset.
19372 EVT ArithType = BaseAddr.getValueType();
19373 SDLoc DL(Origin);
19374 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19375 DAG->getConstant(Offset, DL, ArithType));
19376 }
19377
19378 // Create the type of the loaded slice according to its size.
19379 EVT SliceType = getLoadedType();
19380
19381 // Create the load for the slice.
19382 SDValue LastInst =
19383 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19385 Origin->getMemOperand()->getFlags());
19386 // If the final type is not the same as the loaded type, this means that
19387 // we have to pad with zero. Create a zero extend for that.
19388 EVT FinalType = Inst->getValueType(0);
19389 if (SliceType != FinalType)
19390 LastInst =
19391 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19392 return LastInst;
19393 }
19394
19395 /// Check if this slice can be merged with an expensive cross register
19396 /// bank copy. E.g.,
19397 /// i = load i32
19398 /// f = bitcast i32 i to float
19399 bool canMergeExpensiveCrossRegisterBankCopy() const {
19400 if (!Inst || !Inst->hasOneUse())
19401 return false;
19402 SDNode *Use = *Inst->use_begin();
19403 if (Use->getOpcode() != ISD::BITCAST)
19404 return false;
19405 assert(DAG && "Missing context");
19406 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19407 EVT ResVT = Use->getValueType(0);
19408 const TargetRegisterClass *ResRC =
19409 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19410 const TargetRegisterClass *ArgRC =
19411 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19412 Use->getOperand(0)->isDivergent());
19413 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19414 return false;
19415
19416 // At this point, we know that we perform a cross-register-bank copy.
19417 // Check if it is expensive.
19419 // Assume bitcasts are cheap, unless both register classes do not
19420 // explicitly share a common sub class.
19421 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19422 return false;
19423
19424 // Check if it will be merged with the load.
19425 // 1. Check the alignment / fast memory access constraint.
19426 unsigned IsFast = 0;
19427 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19428 Origin->getAddressSpace(), getAlign(),
19429 Origin->getMemOperand()->getFlags(), &IsFast) ||
19430 !IsFast)
19431 return false;
19432
19433 // 2. Check that the load is a legal operation for that type.
19434 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19435 return false;
19436
19437 // 3. Check that we do not have a zext in the way.
19438 if (Inst->getValueType(0) != getLoadedType())
19439 return false;
19440
19441 return true;
19442 }
19443};
19444
19445} // end anonymous namespace
19446
19447/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19448/// \p UsedBits looks like 0..0 1..1 0..0.
19449static bool areUsedBitsDense(const APInt &UsedBits) {
19450 // If all the bits are one, this is dense!
19451 if (UsedBits.isAllOnes())
19452 return true;
19453
19454 // Get rid of the unused bits on the right.
19455 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19456 // Get rid of the unused bits on the left.
19457 if (NarrowedUsedBits.countl_zero())
19458 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19459 // Check that the chunk of bits is completely used.
19460 return NarrowedUsedBits.isAllOnes();
19461}
19462
19463/// Check whether or not \p First and \p Second are next to each other
19464/// in memory. This means that there is no hole between the bits loaded
19465/// by \p First and the bits loaded by \p Second.
19466static bool areSlicesNextToEachOther(const LoadedSlice &First,
19467 const LoadedSlice &Second) {
19468 assert(First.Origin == Second.Origin && First.Origin &&
19469 "Unable to match different memory origins.");
19470 APInt UsedBits = First.getUsedBits();
19471 assert((UsedBits & Second.getUsedBits()) == 0 &&
19472 "Slices are not supposed to overlap.");
19473 UsedBits |= Second.getUsedBits();
19474 return areUsedBitsDense(UsedBits);
19475}
19476
19477/// Adjust the \p GlobalLSCost according to the target
19478/// paring capabilities and the layout of the slices.
19479/// \pre \p GlobalLSCost should account for at least as many loads as
19480/// there is in the slices in \p LoadedSlices.
19482 LoadedSlice::Cost &GlobalLSCost) {
19483 unsigned NumberOfSlices = LoadedSlices.size();
19484 // If there is less than 2 elements, no pairing is possible.
19485 if (NumberOfSlices < 2)
19486 return;
19487
19488 // Sort the slices so that elements that are likely to be next to each
19489 // other in memory are next to each other in the list.
19490 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19491 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19492 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19493 });
19494 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19495 // First (resp. Second) is the first (resp. Second) potentially candidate
19496 // to be placed in a paired load.
19497 const LoadedSlice *First = nullptr;
19498 const LoadedSlice *Second = nullptr;
19499 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19500 // Set the beginning of the pair.
19501 First = Second) {
19502 Second = &LoadedSlices[CurrSlice];
19503
19504 // If First is NULL, it means we start a new pair.
19505 // Get to the next slice.
19506 if (!First)
19507 continue;
19508
19509 EVT LoadedType = First->getLoadedType();
19510
19511 // If the types of the slices are different, we cannot pair them.
19512 if (LoadedType != Second->getLoadedType())
19513 continue;
19514
19515 // Check if the target supplies paired loads for this type.
19516 Align RequiredAlignment;
19517 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19518 // move to the next pair, this type is hopeless.
19519 Second = nullptr;
19520 continue;
19521 }
19522 // Check if we meet the alignment requirement.
19523 if (First->getAlign() < RequiredAlignment)
19524 continue;
19525
19526 // Check that both loads are next to each other in memory.
19527 if (!areSlicesNextToEachOther(*First, *Second))
19528 continue;
19529
19530 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19531 --GlobalLSCost.Loads;
19532 // Move to the next pair.
19533 Second = nullptr;
19534 }
19535}
19536
19537/// Check the profitability of all involved LoadedSlice.
19538/// Currently, it is considered profitable if there is exactly two
19539/// involved slices (1) which are (2) next to each other in memory, and
19540/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19541///
19542/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19543/// the elements themselves.
19544///
19545/// FIXME: When the cost model will be mature enough, we can relax
19546/// constraints (1) and (2).
19548 const APInt &UsedBits, bool ForCodeSize) {
19549 unsigned NumberOfSlices = LoadedSlices.size();
19551 return NumberOfSlices > 1;
19552
19553 // Check (1).
19554 if (NumberOfSlices != 2)
19555 return false;
19556
19557 // Check (2).
19558 if (!areUsedBitsDense(UsedBits))
19559 return false;
19560
19561 // Check (3).
19562 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19563 // The original code has one big load.
19564 OrigCost.Loads = 1;
19565 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19566 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19567 // Accumulate the cost of all the slices.
19568 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19569 GlobalSlicingCost += SliceCost;
19570
19571 // Account as cost in the original configuration the gain obtained
19572 // with the current slices.
19573 OrigCost.addSliceGain(LS);
19574 }
19575
19576 // If the target supports paired load, adjust the cost accordingly.
19577 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19578 return OrigCost > GlobalSlicingCost;
19579}
19580
19581/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19582/// operations, split it in the various pieces being extracted.
19583///
19584/// This sort of thing is introduced by SROA.
19585/// This slicing takes care not to insert overlapping loads.
19586/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19587bool DAGCombiner::SliceUpLoad(SDNode *N) {
19588 if (Level < AfterLegalizeDAG)
19589 return false;
19590
19591 LoadSDNode *LD = cast<LoadSDNode>(N);
19592 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19593 !LD->getValueType(0).isInteger())
19594 return false;
19595
19596 // The algorithm to split up a load of a scalable vector into individual
19597 // elements currently requires knowing the length of the loaded type,
19598 // so will need adjusting to work on scalable vectors.
19599 if (LD->getValueType(0).isScalableVector())
19600 return false;
19601
19602 // Keep track of already used bits to detect overlapping values.
19603 // In that case, we will just abort the transformation.
19604 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19605
19606 SmallVector<LoadedSlice, 4> LoadedSlices;
19607
19608 // Check if this load is used as several smaller chunks of bits.
19609 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19610 // of computation for each trunc.
19611 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19612 UI != UIEnd; ++UI) {
19613 // Skip the uses of the chain.
19614 if (UI.getUse().getResNo() != 0)
19615 continue;
19616
19617 SDNode *User = *UI;
19618 unsigned Shift = 0;
19619
19620 // Check if this is a trunc(lshr).
19621 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19622 isa<ConstantSDNode>(User->getOperand(1))) {
19623 Shift = User->getConstantOperandVal(1);
19624 User = *User->use_begin();
19625 }
19626
19627 // At this point, User is a Truncate, iff we encountered, trunc or
19628 // trunc(lshr).
19629 if (User->getOpcode() != ISD::TRUNCATE)
19630 return false;
19631
19632 // The width of the type must be a power of 2 and greater than 8-bits.
19633 // Otherwise the load cannot be represented in LLVM IR.
19634 // Moreover, if we shifted with a non-8-bits multiple, the slice
19635 // will be across several bytes. We do not support that.
19636 unsigned Width = User->getValueSizeInBits(0);
19637 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19638 return false;
19639
19640 // Build the slice for this chain of computations.
19641 LoadedSlice LS(User, LD, Shift, &DAG);
19642 APInt CurrentUsedBits = LS.getUsedBits();
19643
19644 // Check if this slice overlaps with another.
19645 if ((CurrentUsedBits & UsedBits) != 0)
19646 return false;
19647 // Update the bits used globally.
19648 UsedBits |= CurrentUsedBits;
19649
19650 // Check if the new slice would be legal.
19651 if (!LS.isLegal())
19652 return false;
19653
19654 // Record the slice.
19655 LoadedSlices.push_back(LS);
19656 }
19657
19658 // Abort slicing if it does not seem to be profitable.
19659 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19660 return false;
19661
19662 ++SlicedLoads;
19663
19664 // Rewrite each chain to use an independent load.
19665 // By construction, each chain can be represented by a unique load.
19666
19667 // Prepare the argument for the new token factor for all the slices.
19668 SmallVector<SDValue, 8> ArgChains;
19669 for (const LoadedSlice &LS : LoadedSlices) {
19670 SDValue SliceInst = LS.loadSlice();
19671 CombineTo(LS.Inst, SliceInst, true);
19672 if (SliceInst.getOpcode() != ISD::LOAD)
19673 SliceInst = SliceInst.getOperand(0);
19674 assert(SliceInst->getOpcode() == ISD::LOAD &&
19675 "It takes more than a zext to get to the loaded slice!!");
19676 ArgChains.push_back(SliceInst.getValue(1));
19677 }
19678
19679 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19680 ArgChains);
19681 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19682 AddToWorklist(Chain.getNode());
19683 return true;
19684}
19685
19686/// Check to see if V is (and load (ptr), imm), where the load is having
19687/// specific bytes cleared out. If so, return the byte size being masked out
19688/// and the shift amount.
19689static std::pair<unsigned, unsigned>
19691 std::pair<unsigned, unsigned> Result(0, 0);
19692
19693 // Check for the structure we're looking for.
19694 if (V->getOpcode() != ISD::AND ||
19695 !isa<ConstantSDNode>(V->getOperand(1)) ||
19696 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19697 return Result;
19698
19699 // Check the chain and pointer.
19700 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19701 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19702
19703 // This only handles simple types.
19704 if (V.getValueType() != MVT::i16 &&
19705 V.getValueType() != MVT::i32 &&
19706 V.getValueType() != MVT::i64)
19707 return Result;
19708
19709 // Check the constant mask. Invert it so that the bits being masked out are
19710 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19711 // follow the sign bit for uniformity.
19712 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19713 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19714 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19715 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19716 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19717 if (NotMaskLZ == 64) return Result; // All zero mask.
19718
19719 // See if we have a continuous run of bits. If so, we have 0*1+0*
19720 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19721 return Result;
19722
19723 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19724 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19725 NotMaskLZ -= 64-V.getValueSizeInBits();
19726
19727 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19728 switch (MaskedBytes) {
19729 case 1:
19730 case 2:
19731 case 4: break;
19732 default: return Result; // All one mask, or 5-byte mask.
19733 }
19734
19735 // Verify that the first bit starts at a multiple of mask so that the access
19736 // is aligned the same as the access width.
19737 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19738
19739 // For narrowing to be valid, it must be the case that the load the
19740 // immediately preceding memory operation before the store.
19741 if (LD == Chain.getNode())
19742 ; // ok.
19743 else if (Chain->getOpcode() == ISD::TokenFactor &&
19744 SDValue(LD, 1).hasOneUse()) {
19745 // LD has only 1 chain use so they are no indirect dependencies.
19746 if (!LD->isOperandOf(Chain.getNode()))
19747 return Result;
19748 } else
19749 return Result; // Fail.
19750
19751 Result.first = MaskedBytes;
19752 Result.second = NotMaskTZ/8;
19753 return Result;
19754}
19755
19756/// Check to see if IVal is something that provides a value as specified by
19757/// MaskInfo. If so, replace the specified store with a narrower store of
19758/// truncated IVal.
19759static SDValue
19760ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19761 SDValue IVal, StoreSDNode *St,
19762 DAGCombiner *DC) {
19763 unsigned NumBytes = MaskInfo.first;
19764 unsigned ByteShift = MaskInfo.second;
19765 SelectionDAG &DAG = DC->getDAG();
19766
19767 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19768 // that uses this. If not, this is not a replacement.
19769 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19770 ByteShift*8, (ByteShift+NumBytes)*8);
19771 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19772
19773 // Check that it is legal on the target to do this. It is legal if the new
19774 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19775 // legalization. If the source type is legal, but the store type isn't, see
19776 // if we can use a truncating store.
19777 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19778 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19779 bool UseTruncStore;
19780 if (DC->isTypeLegal(VT))
19781 UseTruncStore = false;
19782 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19783 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19784 UseTruncStore = true;
19785 else
19786 return SDValue();
19787
19788 // Can't do this for indexed stores.
19789 if (St->isIndexed())
19790 return SDValue();
19791
19792 // Check that the target doesn't think this is a bad idea.
19793 if (St->getMemOperand() &&
19794 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19795 *St->getMemOperand()))
19796 return SDValue();
19797
19798 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19799 // shifted by ByteShift and truncated down to NumBytes.
19800 if (ByteShift) {
19801 SDLoc DL(IVal);
19802 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19803 DAG.getConstant(ByteShift*8, DL,
19804 DC->getShiftAmountTy(IVal.getValueType())));
19805 }
19806
19807 // Figure out the offset for the store and the alignment of the access.
19808 unsigned StOffset;
19809 if (DAG.getDataLayout().isLittleEndian())
19810 StOffset = ByteShift;
19811 else
19812 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19813
19814 SDValue Ptr = St->getBasePtr();
19815 if (StOffset) {
19816 SDLoc DL(IVal);
19818 }
19819
19820 ++OpsNarrowed;
19821 if (UseTruncStore)
19822 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19823 St->getPointerInfo().getWithOffset(StOffset),
19824 VT, St->getOriginalAlign());
19825
19826 // Truncate down to the new size.
19827 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19828
19829 return DAG
19830 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19831 St->getPointerInfo().getWithOffset(StOffset),
19832 St->getOriginalAlign());
19833}
19834
19835/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19836/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19837/// narrowing the load and store if it would end up being a win for performance
19838/// or code size.
19839SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19840 StoreSDNode *ST = cast<StoreSDNode>(N);
19841 if (!ST->isSimple())
19842 return SDValue();
19843
19844 SDValue Chain = ST->getChain();
19845 SDValue Value = ST->getValue();
19846 SDValue Ptr = ST->getBasePtr();
19847 EVT VT = Value.getValueType();
19848
19849 if (ST->isTruncatingStore() || VT.isVector())
19850 return SDValue();
19851
19852 unsigned Opc = Value.getOpcode();
19853
19854 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19855 !Value.hasOneUse())
19856 return SDValue();
19857
19858 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19859 // is a byte mask indicating a consecutive number of bytes, check to see if
19860 // Y is known to provide just those bytes. If so, we try to replace the
19861 // load + replace + store sequence with a single (narrower) store, which makes
19862 // the load dead.
19864 std::pair<unsigned, unsigned> MaskedLoad;
19865 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19866 if (MaskedLoad.first)
19867 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19868 Value.getOperand(1), ST,this))
19869 return NewST;
19870
19871 // Or is commutative, so try swapping X and Y.
19872 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19873 if (MaskedLoad.first)
19874 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19875 Value.getOperand(0), ST,this))
19876 return NewST;
19877 }
19878
19880 return SDValue();
19881
19882 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19883 return SDValue();
19884
19885 SDValue N0 = Value.getOperand(0);
19886 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19887 Chain == SDValue(N0.getNode(), 1)) {
19888 LoadSDNode *LD = cast<LoadSDNode>(N0);
19889 if (LD->getBasePtr() != Ptr ||
19890 LD->getPointerInfo().getAddrSpace() !=
19891 ST->getPointerInfo().getAddrSpace())
19892 return SDValue();
19893
19894 // Find the type to narrow it the load / op / store to.
19895 SDValue N1 = Value.getOperand(1);
19896 unsigned BitWidth = N1.getValueSizeInBits();
19897 APInt Imm = N1->getAsAPIntVal();
19898 if (Opc == ISD::AND)
19900 if (Imm == 0 || Imm.isAllOnes())
19901 return SDValue();
19902 unsigned ShAmt = Imm.countr_zero();
19903 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19904 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19905 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19906 // The narrowing should be profitable, the load/store operation should be
19907 // legal (or custom) and the store size should be equal to the NewVT width.
19908 while (NewBW < BitWidth &&
19909 (NewVT.getStoreSizeInBits() != NewBW ||
19910 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19911 !TLI.isNarrowingProfitable(VT, NewVT))) {
19912 NewBW = NextPowerOf2(NewBW);
19913 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19914 }
19915 if (NewBW >= BitWidth)
19916 return SDValue();
19917
19918 // If the lsb changed does not start at the type bitwidth boundary,
19919 // start at the previous one.
19920 if (ShAmt % NewBW)
19921 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19923 std::min(BitWidth, ShAmt + NewBW));
19924 if ((Imm & Mask) == Imm) {
19925 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19926 if (Opc == ISD::AND)
19927 NewImm ^= APInt::getAllOnes(NewBW);
19928 uint64_t PtrOff = ShAmt / 8;
19929 // For big endian targets, we need to adjust the offset to the pointer to
19930 // load the correct bytes.
19931 if (DAG.getDataLayout().isBigEndian())
19932 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19933
19934 unsigned IsFast = 0;
19935 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19936 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19937 LD->getAddressSpace(), NewAlign,
19938 LD->getMemOperand()->getFlags(), &IsFast) ||
19939 !IsFast)
19940 return SDValue();
19941
19942 SDValue NewPtr =
19944 SDValue NewLD =
19945 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19946 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19947 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19948 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19949 DAG.getConstant(NewImm, SDLoc(Value),
19950 NewVT));
19951 SDValue NewST =
19952 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19953 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19954
19955 AddToWorklist(NewPtr.getNode());
19956 AddToWorklist(NewLD.getNode());
19957 AddToWorklist(NewVal.getNode());
19958 WorklistRemover DeadNodes(*this);
19959 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19960 ++OpsNarrowed;
19961 return NewST;
19962 }
19963 }
19964
19965 return SDValue();
19966}
19967
19968/// For a given floating point load / store pair, if the load value isn't used
19969/// by any other operations, then consider transforming the pair to integer
19970/// load / store operations if the target deems the transformation profitable.
19971SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19972 StoreSDNode *ST = cast<StoreSDNode>(N);
19973 SDValue Value = ST->getValue();
19974 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19975 Value.hasOneUse()) {
19976 LoadSDNode *LD = cast<LoadSDNode>(Value);
19977 EVT VT = LD->getMemoryVT();
19978 if (!VT.isFloatingPoint() ||
19979 VT != ST->getMemoryVT() ||
19980 LD->isNonTemporal() ||
19981 ST->isNonTemporal() ||
19982 LD->getPointerInfo().getAddrSpace() != 0 ||
19983 ST->getPointerInfo().getAddrSpace() != 0)
19984 return SDValue();
19985
19986 TypeSize VTSize = VT.getSizeInBits();
19987
19988 // We don't know the size of scalable types at compile time so we cannot
19989 // create an integer of the equivalent size.
19990 if (VTSize.isScalable())
19991 return SDValue();
19992
19993 unsigned FastLD = 0, FastST = 0;
19994 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19995 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19996 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19999 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20000 *LD->getMemOperand(), &FastLD) ||
20001 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20002 *ST->getMemOperand(), &FastST) ||
20003 !FastLD || !FastST)
20004 return SDValue();
20005
20006 SDValue NewLD =
20007 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
20008 LD->getPointerInfo(), LD->getAlign());
20009
20010 SDValue NewST =
20011 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
20012 ST->getPointerInfo(), ST->getAlign());
20013
20014 AddToWorklist(NewLD.getNode());
20015 AddToWorklist(NewST.getNode());
20016 WorklistRemover DeadNodes(*this);
20017 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20018 ++LdStFP2Int;
20019 return NewST;
20020 }
20021
20022 return SDValue();
20023}
20024
20025// This is a helper function for visitMUL to check the profitability
20026// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20027// MulNode is the original multiply, AddNode is (add x, c1),
20028// and ConstNode is c2.
20029//
20030// If the (add x, c1) has multiple uses, we could increase
20031// the number of adds if we make this transformation.
20032// It would only be worth doing this if we can remove a
20033// multiply in the process. Check for that here.
20034// To illustrate:
20035// (A + c1) * c3
20036// (A + c2) * c3
20037// We're checking for cases where we have common "c3 * A" expressions.
20038bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20039 SDValue ConstNode) {
20040 APInt Val;
20041
20042 // If the add only has one use, and the target thinks the folding is
20043 // profitable or does not lead to worse code, this would be OK to do.
20044 if (AddNode->hasOneUse() &&
20045 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20046 return true;
20047
20048 // Walk all the users of the constant with which we're multiplying.
20049 for (SDNode *Use : ConstNode->uses()) {
20050 if (Use == MulNode) // This use is the one we're on right now. Skip it.
20051 continue;
20052
20053 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
20054 SDNode *OtherOp;
20055 SDNode *MulVar = AddNode.getOperand(0).getNode();
20056
20057 // OtherOp is what we're multiplying against the constant.
20058 if (Use->getOperand(0) == ConstNode)
20059 OtherOp = Use->getOperand(1).getNode();
20060 else
20061 OtherOp = Use->getOperand(0).getNode();
20062
20063 // Check to see if multiply is with the same operand of our "add".
20064 //
20065 // ConstNode = CONST
20066 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
20067 // ...
20068 // AddNode = (A + c1) <-- MulVar is A.
20069 // = AddNode * ConstNode <-- current visiting instruction.
20070 //
20071 // If we make this transformation, we will have a common
20072 // multiply (ConstNode * A) that we can save.
20073 if (OtherOp == MulVar)
20074 return true;
20075
20076 // Now check to see if a future expansion will give us a common
20077 // multiply.
20078 //
20079 // ConstNode = CONST
20080 // AddNode = (A + c1)
20081 // ... = AddNode * ConstNode <-- current visiting instruction.
20082 // ...
20083 // OtherOp = (A + c2)
20084 // Use = OtherOp * ConstNode <-- visiting Use.
20085 //
20086 // If we make this transformation, we will have a common
20087 // multiply (CONST * A) after we also do the same transformation
20088 // to the "t2" instruction.
20089 if (OtherOp->getOpcode() == ISD::ADD &&
20091 OtherOp->getOperand(0).getNode() == MulVar)
20092 return true;
20093 }
20094 }
20095
20096 // Didn't find a case where this would be profitable.
20097 return false;
20098}
20099
20100SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20101 unsigned NumStores) {
20104 SDLoc StoreDL(StoreNodes[0].MemNode);
20105
20106 for (unsigned i = 0; i < NumStores; ++i) {
20107 Visited.insert(StoreNodes[i].MemNode);
20108 }
20109
20110 // don't include nodes that are children or repeated nodes.
20111 for (unsigned i = 0; i < NumStores; ++i) {
20112 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20113 Chains.push_back(StoreNodes[i].MemNode->getChain());
20114 }
20115
20116 assert(!Chains.empty() && "Chain should have generated a chain");
20117 return DAG.getTokenFactor(StoreDL, Chains);
20118}
20119
20120bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20121 const Value *UnderlyingObj = nullptr;
20122 for (const auto &MemOp : StoreNodes) {
20123 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20124 // Pseudo value like stack frame has its own frame index and size, should
20125 // not use the first store's frame index for other frames.
20126 if (MMO->getPseudoValue())
20127 return false;
20128
20129 if (!MMO->getValue())
20130 return false;
20131
20132 const Value *Obj = getUnderlyingObject(MMO->getValue());
20133
20134 if (UnderlyingObj && UnderlyingObj != Obj)
20135 return false;
20136
20137 if (!UnderlyingObj)
20138 UnderlyingObj = Obj;
20139 }
20140
20141 return true;
20142}
20143
20144bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20145 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20146 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20147 // Make sure we have something to merge.
20148 if (NumStores < 2)
20149 return false;
20150
20151 assert((!UseTrunc || !UseVector) &&
20152 "This optimization cannot emit a vector truncating store");
20153
20154 // The latest Node in the DAG.
20155 SDLoc DL(StoreNodes[0].MemNode);
20156
20157 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20158 unsigned SizeInBits = NumStores * ElementSizeBits;
20159 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20160
20161 std::optional<MachineMemOperand::Flags> Flags;
20162 AAMDNodes AAInfo;
20163 for (unsigned I = 0; I != NumStores; ++I) {
20164 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20165 if (!Flags) {
20166 Flags = St->getMemOperand()->getFlags();
20167 AAInfo = St->getAAInfo();
20168 continue;
20169 }
20170 // Skip merging if there's an inconsistent flag.
20171 if (Flags != St->getMemOperand()->getFlags())
20172 return false;
20173 // Concatenate AA metadata.
20174 AAInfo = AAInfo.concat(St->getAAInfo());
20175 }
20176
20177 EVT StoreTy;
20178 if (UseVector) {
20179 unsigned Elts = NumStores * NumMemElts;
20180 // Get the type for the merged vector store.
20181 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20182 } else
20183 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20184
20185 SDValue StoredVal;
20186 if (UseVector) {
20187 if (IsConstantSrc) {
20188 SmallVector<SDValue, 8> BuildVector;
20189 for (unsigned I = 0; I != NumStores; ++I) {
20190 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20191 SDValue Val = St->getValue();
20192 // If constant is of the wrong type, convert it now. This comes up
20193 // when one of our stores was truncating.
20194 if (MemVT != Val.getValueType()) {
20195 Val = peekThroughBitcasts(Val);
20196 // Deal with constants of wrong size.
20197 if (ElementSizeBits != Val.getValueSizeInBits()) {
20198 auto *C = dyn_cast<ConstantSDNode>(Val);
20199 if (!C)
20200 // Not clear how to truncate FP values.
20201 // TODO: Handle truncation of build_vector constants
20202 return false;
20203
20204 EVT IntMemVT =
20206 Val = DAG.getConstant(C->getAPIntValue()
20207 .zextOrTrunc(Val.getValueSizeInBits())
20208 .zextOrTrunc(ElementSizeBits),
20209 SDLoc(C), IntMemVT);
20210 }
20211 // Make sure correctly size type is the correct type.
20212 Val = DAG.getBitcast(MemVT, Val);
20213 }
20214 BuildVector.push_back(Val);
20215 }
20216 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20218 DL, StoreTy, BuildVector);
20219 } else {
20221 for (unsigned i = 0; i < NumStores; ++i) {
20222 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20224 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20225 // type MemVT. If the underlying value is not the correct
20226 // type, but it is an extraction of an appropriate vector we
20227 // can recast Val to be of the correct type. This may require
20228 // converting between EXTRACT_VECTOR_ELT and
20229 // EXTRACT_SUBVECTOR.
20230 if ((MemVT != Val.getValueType()) &&
20233 EVT MemVTScalarTy = MemVT.getScalarType();
20234 // We may need to add a bitcast here to get types to line up.
20235 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20236 Val = DAG.getBitcast(MemVT, Val);
20237 } else if (MemVT.isVector() &&
20239 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20240 } else {
20241 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20243 SDValue Vec = Val.getOperand(0);
20244 SDValue Idx = Val.getOperand(1);
20245 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20246 }
20247 }
20248 Ops.push_back(Val);
20249 }
20250
20251 // Build the extracted vector elements back into a vector.
20252 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20254 DL, StoreTy, Ops);
20255 }
20256 } else {
20257 // We should always use a vector store when merging extracted vector
20258 // elements, so this path implies a store of constants.
20259 assert(IsConstantSrc && "Merged vector elements should use vector store");
20260
20261 APInt StoreInt(SizeInBits, 0);
20262
20263 // Construct a single integer constant which is made of the smaller
20264 // constant inputs.
20265 bool IsLE = DAG.getDataLayout().isLittleEndian();
20266 for (unsigned i = 0; i < NumStores; ++i) {
20267 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20268 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20269
20270 SDValue Val = St->getValue();
20271 Val = peekThroughBitcasts(Val);
20272 StoreInt <<= ElementSizeBits;
20273 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20274 StoreInt |= C->getAPIntValue()
20275 .zextOrTrunc(ElementSizeBits)
20276 .zextOrTrunc(SizeInBits);
20277 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20278 StoreInt |= C->getValueAPF()
20279 .bitcastToAPInt()
20280 .zextOrTrunc(ElementSizeBits)
20281 .zextOrTrunc(SizeInBits);
20282 // If fp truncation is necessary give up for now.
20283 if (MemVT.getSizeInBits() != ElementSizeBits)
20284 return false;
20285 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20287 // Not yet handled
20288 return false;
20289 } else {
20290 llvm_unreachable("Invalid constant element type");
20291 }
20292 }
20293
20294 // Create the new Load and Store operations.
20295 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20296 }
20297
20298 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20299 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20300 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20301
20302 // make sure we use trunc store if it's necessary to be legal.
20303 // When generate the new widen store, if the first store's pointer info can
20304 // not be reused, discard the pointer info except the address space because
20305 // now the widen store can not be represented by the original pointer info
20306 // which is for the narrow memory object.
20307 SDValue NewStore;
20308 if (!UseTrunc) {
20309 NewStore = DAG.getStore(
20310 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20311 CanReusePtrInfo
20312 ? FirstInChain->getPointerInfo()
20313 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20314 FirstInChain->getAlign(), *Flags, AAInfo);
20315 } else { // Must be realized as a trunc store
20316 EVT LegalizedStoredValTy =
20317 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20318 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20319 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20320 SDValue ExtendedStoreVal =
20321 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20322 LegalizedStoredValTy);
20323 NewStore = DAG.getTruncStore(
20324 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20325 CanReusePtrInfo
20326 ? FirstInChain->getPointerInfo()
20327 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20328 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20329 AAInfo);
20330 }
20331
20332 // Replace all merged stores with the new store.
20333 for (unsigned i = 0; i < NumStores; ++i)
20334 CombineTo(StoreNodes[i].MemNode, NewStore);
20335
20336 AddToWorklist(NewChain.getNode());
20337 return true;
20338}
20339
20340void DAGCombiner::getStoreMergeCandidates(
20341 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20342 SDNode *&RootNode) {
20343 // This holds the base pointer, index, and the offset in bytes from the base
20344 // pointer. We must have a base and an offset. Do not handle stores to undef
20345 // base pointers.
20347 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20348 return;
20349
20351 StoreSource StoreSrc = getStoreSource(Val);
20352 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20353
20354 // Match on loadbaseptr if relevant.
20355 EVT MemVT = St->getMemoryVT();
20356 BaseIndexOffset LBasePtr;
20357 EVT LoadVT;
20358 if (StoreSrc == StoreSource::Load) {
20359 auto *Ld = cast<LoadSDNode>(Val);
20360 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20361 LoadVT = Ld->getMemoryVT();
20362 // Load and store should be the same type.
20363 if (MemVT != LoadVT)
20364 return;
20365 // Loads must only have one use.
20366 if (!Ld->hasNUsesOfValue(1, 0))
20367 return;
20368 // The memory operands must not be volatile/indexed/atomic.
20369 // TODO: May be able to relax for unordered atomics (see D66309)
20370 if (!Ld->isSimple() || Ld->isIndexed())
20371 return;
20372 }
20373 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20374 int64_t &Offset) -> bool {
20375 // The memory operands must not be volatile/indexed/atomic.
20376 // TODO: May be able to relax for unordered atomics (see D66309)
20377 if (!Other->isSimple() || Other->isIndexed())
20378 return false;
20379 // Don't mix temporal stores with non-temporal stores.
20380 if (St->isNonTemporal() != Other->isNonTemporal())
20381 return false;
20383 return false;
20384 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20385 // Allow merging constants of different types as integers.
20386 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20387 : Other->getMemoryVT() != MemVT;
20388 switch (StoreSrc) {
20389 case StoreSource::Load: {
20390 if (NoTypeMatch)
20391 return false;
20392 // The Load's Base Ptr must also match.
20393 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20394 if (!OtherLd)
20395 return false;
20396 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20397 if (LoadVT != OtherLd->getMemoryVT())
20398 return false;
20399 // Loads must only have one use.
20400 if (!OtherLd->hasNUsesOfValue(1, 0))
20401 return false;
20402 // The memory operands must not be volatile/indexed/atomic.
20403 // TODO: May be able to relax for unordered atomics (see D66309)
20404 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20405 return false;
20406 // Don't mix temporal loads with non-temporal loads.
20407 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20408 return false;
20409 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20410 *OtherLd))
20411 return false;
20412 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20413 return false;
20414 break;
20415 }
20416 case StoreSource::Constant:
20417 if (NoTypeMatch)
20418 return false;
20419 if (getStoreSource(OtherBC) != StoreSource::Constant)
20420 return false;
20421 break;
20422 case StoreSource::Extract:
20423 // Do not merge truncated stores here.
20424 if (Other->isTruncatingStore())
20425 return false;
20426 if (!MemVT.bitsEq(OtherBC.getValueType()))
20427 return false;
20428 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20429 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20430 return false;
20431 break;
20432 default:
20433 llvm_unreachable("Unhandled store source for merging");
20434 }
20436 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20437 };
20438
20439 // Check if the pair of StoreNode and the RootNode already bail out many
20440 // times which is over the limit in dependence check.
20441 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20442 SDNode *RootNode) -> bool {
20443 auto RootCount = StoreRootCountMap.find(StoreNode);
20444 return RootCount != StoreRootCountMap.end() &&
20445 RootCount->second.first == RootNode &&
20446 RootCount->second.second > StoreMergeDependenceLimit;
20447 };
20448
20449 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20450 // This must be a chain use.
20451 if (UseIter.getOperandNo() != 0)
20452 return;
20453 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20455 int64_t PtrDiff;
20456 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20457 !OverLimitInDependenceCheck(OtherStore, RootNode))
20458 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20459 }
20460 };
20461
20462 // We looking for a root node which is an ancestor to all mergable
20463 // stores. We search up through a load, to our root and then down
20464 // through all children. For instance we will find Store{1,2,3} if
20465 // St is Store1, Store2. or Store3 where the root is not a load
20466 // which always true for nonvolatile ops. TODO: Expand
20467 // the search to find all valid candidates through multiple layers of loads.
20468 //
20469 // Root
20470 // |-------|-------|
20471 // Load Load Store3
20472 // | |
20473 // Store1 Store2
20474 //
20475 // FIXME: We should be able to climb and
20476 // descend TokenFactors to find candidates as well.
20477
20478 RootNode = St->getChain().getNode();
20479
20480 unsigned NumNodesExplored = 0;
20481 const unsigned MaxSearchNodes = 1024;
20482 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20483 RootNode = Ldn->getChain().getNode();
20484 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20485 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20486 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20487 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20488 TryToAddCandidate(I2);
20489 }
20490 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20491 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20492 TryToAddCandidate(I);
20493 }
20494 }
20495 } else {
20496 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20497 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20498 TryToAddCandidate(I);
20499 }
20500}
20501
20502// We need to check that merging these stores does not cause a loop in the
20503// DAG. Any store candidate may depend on another candidate indirectly through
20504// its operands. Check in parallel by searching up from operands of candidates.
20505bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20506 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20507 SDNode *RootNode) {
20508 // FIXME: We should be able to truncate a full search of
20509 // predecessors by doing a BFS and keeping tabs the originating
20510 // stores from which worklist nodes come from in a similar way to
20511 // TokenFactor simplfication.
20512
20515
20516 // RootNode is a predecessor to all candidates so we need not search
20517 // past it. Add RootNode (peeking through TokenFactors). Do not count
20518 // these towards size check.
20519
20520 Worklist.push_back(RootNode);
20521 while (!Worklist.empty()) {
20522 auto N = Worklist.pop_back_val();
20523 if (!Visited.insert(N).second)
20524 continue; // Already present in Visited.
20525 if (N->getOpcode() == ISD::TokenFactor) {
20526 for (SDValue Op : N->ops())
20527 Worklist.push_back(Op.getNode());
20528 }
20529 }
20530
20531 // Don't count pruning nodes towards max.
20532 unsigned int Max = 1024 + Visited.size();
20533 // Search Ops of store candidates.
20534 for (unsigned i = 0; i < NumStores; ++i) {
20535 SDNode *N = StoreNodes[i].MemNode;
20536 // Of the 4 Store Operands:
20537 // * Chain (Op 0) -> We have already considered these
20538 // in candidate selection, but only by following the
20539 // chain dependencies. We could still have a chain
20540 // dependency to a load, that has a non-chain dep to
20541 // another load, that depends on a store, etc. So it is
20542 // possible to have dependencies that consist of a mix
20543 // of chain and non-chain deps, and we need to include
20544 // chain operands in the analysis here..
20545 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20546 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20547 // but aren't necessarily fromt the same base node, so
20548 // cycles possible (e.g. via indexed store).
20549 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20550 // non-indexed stores). Not constant on all targets (e.g. ARM)
20551 // and so can participate in a cycle.
20552 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20553 Worklist.push_back(N->getOperand(j).getNode());
20554 }
20555 // Search through DAG. We can stop early if we find a store node.
20556 for (unsigned i = 0; i < NumStores; ++i)
20557 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20558 Max)) {
20559 // If the searching bail out, record the StoreNode and RootNode in the
20560 // StoreRootCountMap. If we have seen the pair many times over a limit,
20561 // we won't add the StoreNode into StoreNodes set again.
20562 if (Visited.size() >= Max) {
20563 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20564 if (RootCount.first == RootNode)
20565 RootCount.second++;
20566 else
20567 RootCount = {RootNode, 1};
20568 }
20569 return false;
20570 }
20571 return true;
20572}
20573
20574unsigned
20575DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20576 int64_t ElementSizeBytes) const {
20577 while (true) {
20578 // Find a store past the width of the first store.
20579 size_t StartIdx = 0;
20580 while ((StartIdx + 1 < StoreNodes.size()) &&
20581 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20582 StoreNodes[StartIdx + 1].OffsetFromBase)
20583 ++StartIdx;
20584
20585 // Bail if we don't have enough candidates to merge.
20586 if (StartIdx + 1 >= StoreNodes.size())
20587 return 0;
20588
20589 // Trim stores that overlapped with the first store.
20590 if (StartIdx)
20591 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20592
20593 // Scan the memory operations on the chain and find the first
20594 // non-consecutive store memory address.
20595 unsigned NumConsecutiveStores = 1;
20596 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20597 // Check that the addresses are consecutive starting from the second
20598 // element in the list of stores.
20599 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20600 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20601 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20602 break;
20603 NumConsecutiveStores = i + 1;
20604 }
20605 if (NumConsecutiveStores > 1)
20606 return NumConsecutiveStores;
20607
20608 // There are no consecutive stores at the start of the list.
20609 // Remove the first store and try again.
20610 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20611 }
20612}
20613
20614bool DAGCombiner::tryStoreMergeOfConstants(
20615 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20616 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20617 LLVMContext &Context = *DAG.getContext();
20618 const DataLayout &DL = DAG.getDataLayout();
20619 int64_t ElementSizeBytes = MemVT.getStoreSize();
20620 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20621 bool MadeChange = false;
20622
20623 // Store the constants into memory as one consecutive store.
20624 while (NumConsecutiveStores >= 2) {
20625 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20626 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20627 Align FirstStoreAlign = FirstInChain->getAlign();
20628 unsigned LastLegalType = 1;
20629 unsigned LastLegalVectorType = 1;
20630 bool LastIntegerTrunc = false;
20631 bool NonZero = false;
20632 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20633 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20634 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20635 SDValue StoredVal = ST->getValue();
20636 bool IsElementZero = false;
20637 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20638 IsElementZero = C->isZero();
20639 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20640 IsElementZero = C->getConstantFPValue()->isNullValue();
20641 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20642 IsElementZero = true;
20643 if (IsElementZero) {
20644 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20645 FirstZeroAfterNonZero = i;
20646 }
20647 NonZero |= !IsElementZero;
20648
20649 // Find a legal type for the constant store.
20650 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20651 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20652 unsigned IsFast = 0;
20653
20654 // Break early when size is too large to be legal.
20655 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20656 break;
20657
20658 if (TLI.isTypeLegal(StoreTy) &&
20659 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20660 DAG.getMachineFunction()) &&
20661 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20662 *FirstInChain->getMemOperand(), &IsFast) &&
20663 IsFast) {
20664 LastIntegerTrunc = false;
20665 LastLegalType = i + 1;
20666 // Or check whether a truncstore is legal.
20667 } else if (TLI.getTypeAction(Context, StoreTy) ==
20669 EVT LegalizedStoredValTy =
20670 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20671 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20672 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20673 DAG.getMachineFunction()) &&
20674 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20675 *FirstInChain->getMemOperand(), &IsFast) &&
20676 IsFast) {
20677 LastIntegerTrunc = true;
20678 LastLegalType = i + 1;
20679 }
20680 }
20681
20682 // We only use vectors if the target allows it and the function is not
20683 // marked with the noimplicitfloat attribute.
20684 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20685 AllowVectors) {
20686 // Find a legal type for the vector store.
20687 unsigned Elts = (i + 1) * NumMemElts;
20688 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20689 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20690 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20691 TLI.allowsMemoryAccess(Context, DL, Ty,
20692 *FirstInChain->getMemOperand(), &IsFast) &&
20693 IsFast)
20694 LastLegalVectorType = i + 1;
20695 }
20696 }
20697
20698 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20699 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20700 bool UseTrunc = LastIntegerTrunc && !UseVector;
20701
20702 // Check if we found a legal integer type that creates a meaningful
20703 // merge.
20704 if (NumElem < 2) {
20705 // We know that candidate stores are in order and of correct
20706 // shape. While there is no mergeable sequence from the
20707 // beginning one may start later in the sequence. The only
20708 // reason a merge of size N could have failed where another of
20709 // the same size would not have, is if the alignment has
20710 // improved or we've dropped a non-zero value. Drop as many
20711 // candidates as we can here.
20712 unsigned NumSkip = 1;
20713 while ((NumSkip < NumConsecutiveStores) &&
20714 (NumSkip < FirstZeroAfterNonZero) &&
20715 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20716 NumSkip++;
20717
20718 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20719 NumConsecutiveStores -= NumSkip;
20720 continue;
20721 }
20722
20723 // Check that we can merge these candidates without causing a cycle.
20724 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20725 RootNode)) {
20726 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20727 NumConsecutiveStores -= NumElem;
20728 continue;
20729 }
20730
20731 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20732 /*IsConstantSrc*/ true,
20733 UseVector, UseTrunc);
20734
20735 // Remove merged stores for next iteration.
20736 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20737 NumConsecutiveStores -= NumElem;
20738 }
20739 return MadeChange;
20740}
20741
20742bool DAGCombiner::tryStoreMergeOfExtracts(
20743 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20744 EVT MemVT, SDNode *RootNode) {
20745 LLVMContext &Context = *DAG.getContext();
20746 const DataLayout &DL = DAG.getDataLayout();
20747 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20748 bool MadeChange = false;
20749
20750 // Loop on Consecutive Stores on success.
20751 while (NumConsecutiveStores >= 2) {
20752 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20753 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20754 Align FirstStoreAlign = FirstInChain->getAlign();
20755 unsigned NumStoresToMerge = 1;
20756 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20757 // Find a legal type for the vector store.
20758 unsigned Elts = (i + 1) * NumMemElts;
20759 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20760 unsigned IsFast = 0;
20761
20762 // Break early when size is too large to be legal.
20763 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20764 break;
20765
20766 if (TLI.isTypeLegal(Ty) &&
20767 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20768 TLI.allowsMemoryAccess(Context, DL, Ty,
20769 *FirstInChain->getMemOperand(), &IsFast) &&
20770 IsFast)
20771 NumStoresToMerge = i + 1;
20772 }
20773
20774 // Check if we found a legal integer type creating a meaningful
20775 // merge.
20776 if (NumStoresToMerge < 2) {
20777 // We know that candidate stores are in order and of correct
20778 // shape. While there is no mergeable sequence from the
20779 // beginning one may start later in the sequence. The only
20780 // reason a merge of size N could have failed where another of
20781 // the same size would not have, is if the alignment has
20782 // improved. Drop as many candidates as we can here.
20783 unsigned NumSkip = 1;
20784 while ((NumSkip < NumConsecutiveStores) &&
20785 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20786 NumSkip++;
20787
20788 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20789 NumConsecutiveStores -= NumSkip;
20790 continue;
20791 }
20792
20793 // Check that we can merge these candidates without causing a cycle.
20794 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20795 RootNode)) {
20796 StoreNodes.erase(StoreNodes.begin(),
20797 StoreNodes.begin() + NumStoresToMerge);
20798 NumConsecutiveStores -= NumStoresToMerge;
20799 continue;
20800 }
20801
20802 MadeChange |= mergeStoresOfConstantsOrVecElts(
20803 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20804 /*UseVector*/ true, /*UseTrunc*/ false);
20805
20806 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20807 NumConsecutiveStores -= NumStoresToMerge;
20808 }
20809 return MadeChange;
20810}
20811
20812bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20813 unsigned NumConsecutiveStores, EVT MemVT,
20814 SDNode *RootNode, bool AllowVectors,
20815 bool IsNonTemporalStore,
20816 bool IsNonTemporalLoad) {
20817 LLVMContext &Context = *DAG.getContext();
20818 const DataLayout &DL = DAG.getDataLayout();
20819 int64_t ElementSizeBytes = MemVT.getStoreSize();
20820 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20821 bool MadeChange = false;
20822
20823 // Look for load nodes which are used by the stored values.
20824 SmallVector<MemOpLink, 8> LoadNodes;
20825
20826 // Find acceptable loads. Loads need to have the same chain (token factor),
20827 // must not be zext, volatile, indexed, and they must be consecutive.
20828 BaseIndexOffset LdBasePtr;
20829
20830 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20831 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20833 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20834
20835 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20836 // If this is not the first ptr that we check.
20837 int64_t LdOffset = 0;
20838 if (LdBasePtr.getBase().getNode()) {
20839 // The base ptr must be the same.
20840 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20841 break;
20842 } else {
20843 // Check that all other base pointers are the same as this one.
20844 LdBasePtr = LdPtr;
20845 }
20846
20847 // We found a potential memory operand to merge.
20848 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20849 }
20850
20851 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20852 Align RequiredAlignment;
20853 bool NeedRotate = false;
20854 if (LoadNodes.size() == 2) {
20855 // If we have load/store pair instructions and we only have two values,
20856 // don't bother merging.
20857 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20858 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20859 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20860 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20861 break;
20862 }
20863 // If the loads are reversed, see if we can rotate the halves into place.
20864 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20865 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20866 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20867 if (Offset0 - Offset1 == ElementSizeBytes &&
20868 (hasOperation(ISD::ROTL, PairVT) ||
20869 hasOperation(ISD::ROTR, PairVT))) {
20870 std::swap(LoadNodes[0], LoadNodes[1]);
20871 NeedRotate = true;
20872 }
20873 }
20874 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20875 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20876 Align FirstStoreAlign = FirstInChain->getAlign();
20877 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20878
20879 // Scan the memory operations on the chain and find the first
20880 // non-consecutive load memory address. These variables hold the index in
20881 // the store node array.
20882
20883 unsigned LastConsecutiveLoad = 1;
20884
20885 // This variable refers to the size and not index in the array.
20886 unsigned LastLegalVectorType = 1;
20887 unsigned LastLegalIntegerType = 1;
20888 bool isDereferenceable = true;
20889 bool DoIntegerTruncate = false;
20890 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20891 SDValue LoadChain = FirstLoad->getChain();
20892 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20893 // All loads must share the same chain.
20894 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20895 break;
20896
20897 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20898 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20899 break;
20900 LastConsecutiveLoad = i;
20901
20902 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20903 isDereferenceable = false;
20904
20905 // Find a legal type for the vector store.
20906 unsigned Elts = (i + 1) * NumMemElts;
20907 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20908
20909 // Break early when size is too large to be legal.
20910 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20911 break;
20912
20913 unsigned IsFastSt = 0;
20914 unsigned IsFastLd = 0;
20915 // Don't try vector types if we need a rotate. We may still fail the
20916 // legality checks for the integer type, but we can't handle the rotate
20917 // case with vectors.
20918 // FIXME: We could use a shuffle in place of the rotate.
20919 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20920 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20921 DAG.getMachineFunction()) &&
20922 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20923 *FirstInChain->getMemOperand(), &IsFastSt) &&
20924 IsFastSt &&
20925 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20926 *FirstLoad->getMemOperand(), &IsFastLd) &&
20927 IsFastLd) {
20928 LastLegalVectorType = i + 1;
20929 }
20930
20931 // Find a legal type for the integer store.
20932 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20933 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20934 if (TLI.isTypeLegal(StoreTy) &&
20935 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20936 DAG.getMachineFunction()) &&
20937 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20938 *FirstInChain->getMemOperand(), &IsFastSt) &&
20939 IsFastSt &&
20940 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20941 *FirstLoad->getMemOperand(), &IsFastLd) &&
20942 IsFastLd) {
20943 LastLegalIntegerType = i + 1;
20944 DoIntegerTruncate = false;
20945 // Or check whether a truncstore and extload is legal.
20946 } else if (TLI.getTypeAction(Context, StoreTy) ==
20948 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20949 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20950 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20951 DAG.getMachineFunction()) &&
20952 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20953 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20954 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20955 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20956 *FirstInChain->getMemOperand(), &IsFastSt) &&
20957 IsFastSt &&
20958 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20959 *FirstLoad->getMemOperand(), &IsFastLd) &&
20960 IsFastLd) {
20961 LastLegalIntegerType = i + 1;
20962 DoIntegerTruncate = true;
20963 }
20964 }
20965 }
20966
20967 // Only use vector types if the vector type is larger than the integer
20968 // type. If they are the same, use integers.
20969 bool UseVectorTy =
20970 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20971 unsigned LastLegalType =
20972 std::max(LastLegalVectorType, LastLegalIntegerType);
20973
20974 // We add +1 here because the LastXXX variables refer to location while
20975 // the NumElem refers to array/index size.
20976 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20977 NumElem = std::min(LastLegalType, NumElem);
20978 Align FirstLoadAlign = FirstLoad->getAlign();
20979
20980 if (NumElem < 2) {
20981 // We know that candidate stores are in order and of correct
20982 // shape. While there is no mergeable sequence from the
20983 // beginning one may start later in the sequence. The only
20984 // reason a merge of size N could have failed where another of
20985 // the same size would not have is if the alignment or either
20986 // the load or store has improved. Drop as many candidates as we
20987 // can here.
20988 unsigned NumSkip = 1;
20989 while ((NumSkip < LoadNodes.size()) &&
20990 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20991 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20992 NumSkip++;
20993 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20994 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20995 NumConsecutiveStores -= NumSkip;
20996 continue;
20997 }
20998
20999 // Check that we can merge these candidates without causing a cycle.
21000 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21001 RootNode)) {
21002 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21003 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21004 NumConsecutiveStores -= NumElem;
21005 continue;
21006 }
21007
21008 // Find if it is better to use vectors or integers to load and store
21009 // to memory.
21010 EVT JointMemOpVT;
21011 if (UseVectorTy) {
21012 // Find a legal type for the vector store.
21013 unsigned Elts = NumElem * NumMemElts;
21014 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21015 } else {
21016 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21017 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21018 }
21019
21020 SDLoc LoadDL(LoadNodes[0].MemNode);
21021 SDLoc StoreDL(StoreNodes[0].MemNode);
21022
21023 // The merged loads are required to have the same incoming chain, so
21024 // using the first's chain is acceptable.
21025
21026 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21027 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21028 AddToWorklist(NewStoreChain.getNode());
21029
21030 MachineMemOperand::Flags LdMMOFlags =
21031 isDereferenceable ? MachineMemOperand::MODereferenceable
21033 if (IsNonTemporalLoad)
21035
21036 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21037
21038 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21041
21042 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21043
21044 SDValue NewLoad, NewStore;
21045 if (UseVectorTy || !DoIntegerTruncate) {
21046 NewLoad = DAG.getLoad(
21047 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21048 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21049 SDValue StoreOp = NewLoad;
21050 if (NeedRotate) {
21051 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21052 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21053 "Unexpected type for rotate-able load pair");
21054 SDValue RotAmt =
21055 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21056 // Target can convert to the identical ROTR if it does not have ROTL.
21057 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21058 }
21059 NewStore = DAG.getStore(
21060 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21061 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21062 : MachinePointerInfo(FirstStoreAS),
21063 FirstStoreAlign, StMMOFlags);
21064 } else { // This must be the truncstore/extload case
21065 EVT ExtendedTy =
21066 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21067 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21068 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21069 FirstLoad->getPointerInfo(), JointMemOpVT,
21070 FirstLoadAlign, LdMMOFlags);
21071 NewStore = DAG.getTruncStore(
21072 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21073 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21074 : MachinePointerInfo(FirstStoreAS),
21075 JointMemOpVT, FirstInChain->getAlign(),
21076 FirstInChain->getMemOperand()->getFlags());
21077 }
21078
21079 // Transfer chain users from old loads to the new load.
21080 for (unsigned i = 0; i < NumElem; ++i) {
21081 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21083 SDValue(NewLoad.getNode(), 1));
21084 }
21085
21086 // Replace all stores with the new store. Recursively remove corresponding
21087 // values if they are no longer used.
21088 for (unsigned i = 0; i < NumElem; ++i) {
21089 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21090 CombineTo(StoreNodes[i].MemNode, NewStore);
21091 if (Val->use_empty())
21092 recursivelyDeleteUnusedNodes(Val.getNode());
21093 }
21094
21095 MadeChange = true;
21096 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21097 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21098 NumConsecutiveStores -= NumElem;
21099 }
21100 return MadeChange;
21101}
21102
21103bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21104 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21105 return false;
21106
21107 // TODO: Extend this function to merge stores of scalable vectors.
21108 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21109 // store since we know <vscale x 16 x i8> is exactly twice as large as
21110 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21111 EVT MemVT = St->getMemoryVT();
21112 if (MemVT.isScalableVT())
21113 return false;
21114 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21115 return false;
21116
21117 // This function cannot currently deal with non-byte-sized memory sizes.
21118 int64_t ElementSizeBytes = MemVT.getStoreSize();
21119 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21120 return false;
21121
21122 // Do not bother looking at stored values that are not constants, loads, or
21123 // extracted vector elements.
21124 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21125 const StoreSource StoreSrc = getStoreSource(StoredVal);
21126 if (StoreSrc == StoreSource::Unknown)
21127 return false;
21128
21129 SmallVector<MemOpLink, 8> StoreNodes;
21130 SDNode *RootNode;
21131 // Find potential store merge candidates by searching through chain sub-DAG
21132 getStoreMergeCandidates(St, StoreNodes, RootNode);
21133
21134 // Check if there is anything to merge.
21135 if (StoreNodes.size() < 2)
21136 return false;
21137
21138 // Sort the memory operands according to their distance from the
21139 // base pointer.
21140 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21141 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21142 });
21143
21144 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21145 Attribute::NoImplicitFloat);
21146 bool IsNonTemporalStore = St->isNonTemporal();
21147 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21148 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21149
21150 // Store Merge attempts to merge the lowest stores. This generally
21151 // works out as if successful, as the remaining stores are checked
21152 // after the first collection of stores is merged. However, in the
21153 // case that a non-mergeable store is found first, e.g., {p[-2],
21154 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21155 // mergeable cases. To prevent this, we prune such stores from the
21156 // front of StoreNodes here.
21157 bool MadeChange = false;
21158 while (StoreNodes.size() > 1) {
21159 unsigned NumConsecutiveStores =
21160 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21161 // There are no more stores in the list to examine.
21162 if (NumConsecutiveStores == 0)
21163 return MadeChange;
21164
21165 // We have at least 2 consecutive stores. Try to merge them.
21166 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21167 switch (StoreSrc) {
21168 case StoreSource::Constant:
21169 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21170 MemVT, RootNode, AllowVectors);
21171 break;
21172
21173 case StoreSource::Extract:
21174 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21175 MemVT, RootNode);
21176 break;
21177
21178 case StoreSource::Load:
21179 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21180 MemVT, RootNode, AllowVectors,
21181 IsNonTemporalStore, IsNonTemporalLoad);
21182 break;
21183
21184 default:
21185 llvm_unreachable("Unhandled store source type");
21186 }
21187 }
21188 return MadeChange;
21189}
21190
21191SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21192 SDLoc SL(ST);
21193 SDValue ReplStore;
21194
21195 // Replace the chain to avoid dependency.
21196 if (ST->isTruncatingStore()) {
21197 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21198 ST->getBasePtr(), ST->getMemoryVT(),
21199 ST->getMemOperand());
21200 } else {
21201 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21202 ST->getMemOperand());
21203 }
21204
21205 // Create token to keep both nodes around.
21206 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21207 MVT::Other, ST->getChain(), ReplStore);
21208
21209 // Make sure the new and old chains are cleaned up.
21210 AddToWorklist(Token.getNode());
21211
21212 // Don't add users to work list.
21213 return CombineTo(ST, Token, false);
21214}
21215
21216SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21217 SDValue Value = ST->getValue();
21218 if (Value.getOpcode() == ISD::TargetConstantFP)
21219 return SDValue();
21220
21221 if (!ISD::isNormalStore(ST))
21222 return SDValue();
21223
21224 SDLoc DL(ST);
21225
21226 SDValue Chain = ST->getChain();
21227 SDValue Ptr = ST->getBasePtr();
21228
21229 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21230
21231 // NOTE: If the original store is volatile, this transform must not increase
21232 // the number of stores. For example, on x86-32 an f64 can be stored in one
21233 // processor operation but an i64 (which is not legal) requires two. So the
21234 // transform should not be done in this case.
21235
21236 SDValue Tmp;
21237 switch (CFP->getSimpleValueType(0).SimpleTy) {
21238 default:
21239 llvm_unreachable("Unknown FP type");
21240 case MVT::f16: // We don't do this for these yet.
21241 case MVT::bf16:
21242 case MVT::f80:
21243 case MVT::f128:
21244 case MVT::ppcf128:
21245 return SDValue();
21246 case MVT::f32:
21247 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21248 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21249 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21250 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21251 MVT::i32);
21252 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21253 }
21254
21255 return SDValue();
21256 case MVT::f64:
21257 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21258 ST->isSimple()) ||
21259 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21260 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21261 getZExtValue(), SDLoc(CFP), MVT::i64);
21262 return DAG.getStore(Chain, DL, Tmp,
21263 Ptr, ST->getMemOperand());
21264 }
21265
21266 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21267 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21268 // Many FP stores are not made apparent until after legalize, e.g. for
21269 // argument passing. Since this is so common, custom legalize the
21270 // 64-bit integer store into two 32-bit stores.
21272 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21273 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21274 if (DAG.getDataLayout().isBigEndian())
21275 std::swap(Lo, Hi);
21276
21277 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21278 AAMDNodes AAInfo = ST->getAAInfo();
21279
21280 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21281 ST->getOriginalAlign(), MMOFlags, AAInfo);
21283 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21284 ST->getPointerInfo().getWithOffset(4),
21285 ST->getOriginalAlign(), MMOFlags, AAInfo);
21286 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21287 St0, St1);
21288 }
21289
21290 return SDValue();
21291 }
21292}
21293
21294// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21295//
21296// If a store of a load with an element inserted into it has no other
21297// uses in between the chain, then we can consider the vector store
21298// dead and replace it with just the single scalar element store.
21299SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21300 SDLoc DL(ST);
21301 SDValue Value = ST->getValue();
21302 SDValue Ptr = ST->getBasePtr();
21303 SDValue Chain = ST->getChain();
21304 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21305 return SDValue();
21306
21307 SDValue Elt = Value.getOperand(1);
21308 SDValue Idx = Value.getOperand(2);
21309
21310 // If the element isn't byte sized or is implicitly truncated then we can't
21311 // compute an offset.
21312 EVT EltVT = Elt.getValueType();
21313 if (!EltVT.isByteSized() ||
21314 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21315 return SDValue();
21316
21317 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21318 if (!Ld || Ld->getBasePtr() != Ptr ||
21319 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21320 !ISD::isNormalStore(ST) ||
21321 Ld->getAddressSpace() != ST->getAddressSpace() ||
21323 return SDValue();
21324
21325 unsigned IsFast;
21326 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21327 Elt.getValueType(), ST->getAddressSpace(),
21328 ST->getAlign(), ST->getMemOperand()->getFlags(),
21329 &IsFast) ||
21330 !IsFast)
21331 return SDValue();
21332
21333 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21334
21335 // If the offset is a known constant then try to recover the pointer
21336 // info
21337 SDValue NewPtr;
21338 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21339 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21340 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21341 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21342 } else {
21343 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21344 }
21345
21346 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21347 ST->getMemOperand()->getFlags());
21348}
21349
21350SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21351 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21352 SDValue Val = ST->getVal();
21353 EVT VT = Val.getValueType();
21354 EVT MemVT = ST->getMemoryVT();
21355
21356 if (MemVT.bitsLT(VT)) { // Is truncating store
21357 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21358 MemVT.getScalarSizeInBits());
21359 // See if we can simplify the operation with SimplifyDemandedBits, which
21360 // only works if the value has a single use.
21361 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21362 return SDValue(N, 0);
21363 }
21364
21365 return SDValue();
21366}
21367
21368SDValue DAGCombiner::visitSTORE(SDNode *N) {
21369 StoreSDNode *ST = cast<StoreSDNode>(N);
21370 SDValue Chain = ST->getChain();
21371 SDValue Value = ST->getValue();
21372 SDValue Ptr = ST->getBasePtr();
21373
21374 // If this is a store of a bit convert, store the input value if the
21375 // resultant store does not need a higher alignment than the original.
21376 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21377 ST->isUnindexed()) {
21378 EVT SVT = Value.getOperand(0).getValueType();
21379 // If the store is volatile, we only want to change the store type if the
21380 // resulting store is legal. Otherwise we might increase the number of
21381 // memory accesses. We don't care if the original type was legal or not
21382 // as we assume software couldn't rely on the number of accesses of an
21383 // illegal type.
21384 // TODO: May be able to relax for unordered atomics (see D66309)
21385 if (((!LegalOperations && ST->isSimple()) ||
21386 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21387 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21388 DAG, *ST->getMemOperand())) {
21389 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21390 ST->getMemOperand());
21391 }
21392 }
21393
21394 // Turn 'store undef, Ptr' -> nothing.
21395 if (Value.isUndef() && ST->isUnindexed())
21396 return Chain;
21397
21398 // Try to infer better alignment information than the store already has.
21399 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21400 !ST->isAtomic()) {
21401 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21402 if (*Alignment > ST->getAlign() &&
21403 isAligned(*Alignment, ST->getSrcValueOffset())) {
21404 SDValue NewStore =
21405 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21406 ST->getMemoryVT(), *Alignment,
21407 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21408 // NewStore will always be N as we are only refining the alignment
21409 assert(NewStore.getNode() == N);
21410 (void)NewStore;
21411 }
21412 }
21413 }
21414
21415 // Try transforming a pair floating point load / store ops to integer
21416 // load / store ops.
21417 if (SDValue NewST = TransformFPLoadStorePair(N))
21418 return NewST;
21419
21420 // Try transforming several stores into STORE (BSWAP).
21421 if (SDValue Store = mergeTruncStores(ST))
21422 return Store;
21423
21424 if (ST->isUnindexed()) {
21425 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21426 // adjacent stores.
21427 if (findBetterNeighborChains(ST)) {
21428 // replaceStoreChain uses CombineTo, which handled all of the worklist
21429 // manipulation. Return the original node to not do anything else.
21430 return SDValue(ST, 0);
21431 }
21432 Chain = ST->getChain();
21433 }
21434
21435 // FIXME: is there such a thing as a truncating indexed store?
21436 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21437 Value.getValueType().isInteger() &&
21438 (!isa<ConstantSDNode>(Value) ||
21439 !cast<ConstantSDNode>(Value)->isOpaque())) {
21440 // Convert a truncating store of a extension into a standard store.
21441 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21442 Value.getOpcode() == ISD::SIGN_EXTEND ||
21443 Value.getOpcode() == ISD::ANY_EXTEND) &&
21444 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21445 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21446 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21447 ST->getMemOperand());
21448
21449 APInt TruncDemandedBits =
21450 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21451 ST->getMemoryVT().getScalarSizeInBits());
21452
21453 // See if we can simplify the operation with SimplifyDemandedBits, which
21454 // only works if the value has a single use.
21455 AddToWorklist(Value.getNode());
21456 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21457 // Re-visit the store if anything changed and the store hasn't been merged
21458 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21459 // node back to the worklist if necessary, but we also need to re-visit
21460 // the Store node itself.
21461 if (N->getOpcode() != ISD::DELETED_NODE)
21462 AddToWorklist(N);
21463 return SDValue(N, 0);
21464 }
21465
21466 // Otherwise, see if we can simplify the input to this truncstore with
21467 // knowledge that only the low bits are being used. For example:
21468 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21469 if (SDValue Shorter =
21470 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21471 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21472 ST->getMemOperand());
21473
21474 // If we're storing a truncated constant, see if we can simplify it.
21475 // TODO: Move this to targetShrinkDemandedConstant?
21476 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21477 if (!Cst->isOpaque()) {
21478 const APInt &CValue = Cst->getAPIntValue();
21479 APInt NewVal = CValue & TruncDemandedBits;
21480 if (NewVal != CValue) {
21481 SDValue Shorter =
21482 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21483 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21484 ST->getMemoryVT(), ST->getMemOperand());
21485 }
21486 }
21487 }
21488
21489 // If this is a load followed by a store to the same location, then the store
21490 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21491 // TODO: Add big-endian truncate support with test coverage.
21492 // TODO: Can relax for unordered atomics (see D66309)
21493 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21495 : Value;
21496 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21497 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21498 ST->isUnindexed() && ST->isSimple() &&
21499 Ld->getAddressSpace() == ST->getAddressSpace() &&
21500 // There can't be any side effects between the load and store, such as
21501 // a call or store.
21503 // The store is dead, remove it.
21504 return Chain;
21505 }
21506 }
21507
21508 // Try scalarizing vector stores of loads where we only change one element
21509 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21510 return NewST;
21511
21512 // TODO: Can relax for unordered atomics (see D66309)
21513 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21514 if (ST->isUnindexed() && ST->isSimple() &&
21515 ST1->isUnindexed() && ST1->isSimple()) {
21516 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21517 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21518 ST->getAddressSpace() == ST1->getAddressSpace()) {
21519 // If this is a store followed by a store with the same value to the
21520 // same location, then the store is dead/noop.
21521 return Chain;
21522 }
21523
21524 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21525 !ST1->getBasePtr().isUndef() &&
21526 ST->getAddressSpace() == ST1->getAddressSpace()) {
21527 // If we consider two stores and one smaller in size is a scalable
21528 // vector type and another one a bigger size store with a fixed type,
21529 // then we could not allow the scalable store removal because we don't
21530 // know its final size in the end.
21531 if (ST->getMemoryVT().isScalableVector() ||
21532 ST1->getMemoryVT().isScalableVector()) {
21533 if (ST1->getBasePtr() == Ptr &&
21534 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21535 ST->getMemoryVT().getStoreSize())) {
21536 CombineTo(ST1, ST1->getChain());
21537 return SDValue(N, 0);
21538 }
21539 } else {
21540 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21541 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21542 // If this is a store who's preceding store to a subset of the current
21543 // location and no one other node is chained to that store we can
21544 // effectively drop the store. Do not remove stores to undef as they
21545 // may be used as data sinks.
21546 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21547 ChainBase,
21548 ST1->getMemoryVT().getFixedSizeInBits())) {
21549 CombineTo(ST1, ST1->getChain());
21550 return SDValue(N, 0);
21551 }
21552 }
21553 }
21554 }
21555 }
21556
21557 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21558 // truncating store. We can do this even if this is already a truncstore.
21559 if ((Value.getOpcode() == ISD::FP_ROUND ||
21560 Value.getOpcode() == ISD::TRUNCATE) &&
21561 Value->hasOneUse() && ST->isUnindexed() &&
21562 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21563 ST->getMemoryVT(), LegalOperations)) {
21564 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21565 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21566 }
21567
21568 // Always perform this optimization before types are legal. If the target
21569 // prefers, also try this after legalization to catch stores that were created
21570 // by intrinsics or other nodes.
21571 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21572 while (true) {
21573 // There can be multiple store sequences on the same chain.
21574 // Keep trying to merge store sequences until we are unable to do so
21575 // or until we merge the last store on the chain.
21576 bool Changed = mergeConsecutiveStores(ST);
21577 if (!Changed) break;
21578 // Return N as merge only uses CombineTo and no worklist clean
21579 // up is necessary.
21580 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21581 return SDValue(N, 0);
21582 }
21583 }
21584
21585 // Try transforming N to an indexed store.
21586 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21587 return SDValue(N, 0);
21588
21589 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21590 //
21591 // Make sure to do this only after attempting to merge stores in order to
21592 // avoid changing the types of some subset of stores due to visit order,
21593 // preventing their merging.
21594 if (isa<ConstantFPSDNode>(ST->getValue())) {
21595 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21596 return NewSt;
21597 }
21598
21599 if (SDValue NewSt = splitMergedValStore(ST))
21600 return NewSt;
21601
21602 return ReduceLoadOpStoreWidth(N);
21603}
21604
21605SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21606 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21607 if (!LifetimeEnd->hasOffset())
21608 return SDValue();
21609
21610 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21611 LifetimeEnd->getOffset(), false);
21612
21613 // We walk up the chains to find stores.
21614 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21615 while (!Chains.empty()) {
21616 SDValue Chain = Chains.pop_back_val();
21617 if (!Chain.hasOneUse())
21618 continue;
21619 switch (Chain.getOpcode()) {
21620 case ISD::TokenFactor:
21621 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21622 Chains.push_back(Chain.getOperand(--Nops));
21623 break;
21625 case ISD::LIFETIME_END:
21626 // We can forward past any lifetime start/end that can be proven not to
21627 // alias the node.
21628 if (!mayAlias(Chain.getNode(), N))
21629 Chains.push_back(Chain.getOperand(0));
21630 break;
21631 case ISD::STORE: {
21632 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21633 // TODO: Can relax for unordered atomics (see D66309)
21634 if (!ST->isSimple() || ST->isIndexed())
21635 continue;
21636 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21637 // The bounds of a scalable store are not known until runtime, so this
21638 // store cannot be elided.
21639 if (StoreSize.isScalable())
21640 continue;
21641 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21642 // If we store purely within object bounds just before its lifetime ends,
21643 // we can remove the store.
21644 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21645 StoreSize.getFixedValue() * 8)) {
21646 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21647 dbgs() << "\nwithin LIFETIME_END of : ";
21648 LifetimeEndBase.dump(); dbgs() << "\n");
21649 CombineTo(ST, ST->getChain());
21650 return SDValue(N, 0);
21651 }
21652 }
21653 }
21654 }
21655 return SDValue();
21656}
21657
21658/// For the instruction sequence of store below, F and I values
21659/// are bundled together as an i64 value before being stored into memory.
21660/// Sometimes it is more efficent to generate separate stores for F and I,
21661/// which can remove the bitwise instructions or sink them to colder places.
21662///
21663/// (store (or (zext (bitcast F to i32) to i64),
21664/// (shl (zext I to i64), 32)), addr) -->
21665/// (store F, addr) and (store I, addr+4)
21666///
21667/// Similarly, splitting for other merged store can also be beneficial, like:
21668/// For pair of {i32, i32}, i64 store --> two i32 stores.
21669/// For pair of {i32, i16}, i64 store --> two i32 stores.
21670/// For pair of {i16, i16}, i32 store --> two i16 stores.
21671/// For pair of {i16, i8}, i32 store --> two i16 stores.
21672/// For pair of {i8, i8}, i16 store --> two i8 stores.
21673///
21674/// We allow each target to determine specifically which kind of splitting is
21675/// supported.
21676///
21677/// The store patterns are commonly seen from the simple code snippet below
21678/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21679/// void goo(const std::pair<int, float> &);
21680/// hoo() {
21681/// ...
21682/// goo(std::make_pair(tmp, ftmp));
21683/// ...
21684/// }
21685///
21686SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21687 if (OptLevel == CodeGenOptLevel::None)
21688 return SDValue();
21689
21690 // Can't change the number of memory accesses for a volatile store or break
21691 // atomicity for an atomic one.
21692 if (!ST->isSimple())
21693 return SDValue();
21694
21695 SDValue Val = ST->getValue();
21696 SDLoc DL(ST);
21697
21698 // Match OR operand.
21699 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21700 return SDValue();
21701
21702 // Match SHL operand and get Lower and Higher parts of Val.
21703 SDValue Op1 = Val.getOperand(0);
21704 SDValue Op2 = Val.getOperand(1);
21705 SDValue Lo, Hi;
21706 if (Op1.getOpcode() != ISD::SHL) {
21707 std::swap(Op1, Op2);
21708 if (Op1.getOpcode() != ISD::SHL)
21709 return SDValue();
21710 }
21711 Lo = Op2;
21712 Hi = Op1.getOperand(0);
21713 if (!Op1.hasOneUse())
21714 return SDValue();
21715
21716 // Match shift amount to HalfValBitSize.
21717 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21718 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21719 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21720 return SDValue();
21721
21722 // Lo and Hi are zero-extended from int with size less equal than 32
21723 // to i64.
21724 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21725 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21726 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21727 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21728 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21729 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21730 return SDValue();
21731
21732 // Use the EVT of low and high parts before bitcast as the input
21733 // of target query.
21734 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21735 ? Lo.getOperand(0).getValueType()
21736 : Lo.getValueType();
21737 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21738 ? Hi.getOperand(0).getValueType()
21739 : Hi.getValueType();
21740 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21741 return SDValue();
21742
21743 // Start to split store.
21744 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21745 AAMDNodes AAInfo = ST->getAAInfo();
21746
21747 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21748 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21749 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21750 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21751
21752 SDValue Chain = ST->getChain();
21753 SDValue Ptr = ST->getBasePtr();
21754 // Lower value store.
21755 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21756 ST->getOriginalAlign(), MMOFlags, AAInfo);
21757 Ptr =
21758 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21759 // Higher value store.
21760 SDValue St1 = DAG.getStore(
21761 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21762 ST->getOriginalAlign(), MMOFlags, AAInfo);
21763 return St1;
21764}
21765
21766// Merge an insertion into an existing shuffle:
21767// (insert_vector_elt (vector_shuffle X, Y, Mask),
21768// .(extract_vector_elt X, N), InsIndex)
21769// --> (vector_shuffle X, Y, NewMask)
21770// and variations where shuffle operands may be CONCAT_VECTORS.
21772 SmallVectorImpl<int> &NewMask, SDValue Elt,
21773 unsigned InsIndex) {
21774 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21775 !isa<ConstantSDNode>(Elt.getOperand(1)))
21776 return false;
21777
21778 // Vec's operand 0 is using indices from 0 to N-1 and
21779 // operand 1 from N to 2N - 1, where N is the number of
21780 // elements in the vectors.
21781 SDValue InsertVal0 = Elt.getOperand(0);
21782 int ElementOffset = -1;
21783
21784 // We explore the inputs of the shuffle in order to see if we find the
21785 // source of the extract_vector_elt. If so, we can use it to modify the
21786 // shuffle rather than perform an insert_vector_elt.
21788 ArgWorkList.emplace_back(Mask.size(), Y);
21789 ArgWorkList.emplace_back(0, X);
21790
21791 while (!ArgWorkList.empty()) {
21792 int ArgOffset;
21793 SDValue ArgVal;
21794 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21795
21796 if (ArgVal == InsertVal0) {
21797 ElementOffset = ArgOffset;
21798 break;
21799 }
21800
21801 // Peek through concat_vector.
21802 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21803 int CurrentArgOffset =
21804 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21805 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21806 for (SDValue Op : reverse(ArgVal->ops())) {
21807 CurrentArgOffset -= Step;
21808 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21809 }
21810
21811 // Make sure we went through all the elements and did not screw up index
21812 // computation.
21813 assert(CurrentArgOffset == ArgOffset);
21814 }
21815 }
21816
21817 // If we failed to find a match, see if we can replace an UNDEF shuffle
21818 // operand.
21819 if (ElementOffset == -1) {
21820 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21821 return false;
21822 ElementOffset = Mask.size();
21823 Y = InsertVal0;
21824 }
21825
21826 NewMask.assign(Mask.begin(), Mask.end());
21827 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21828 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21829 "NewMask[InsIndex] is out of bound");
21830 return true;
21831}
21832
21833// Merge an insertion into an existing shuffle:
21834// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21835// InsIndex)
21836// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21837// CONCAT_VECTORS.
21838SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21839 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21840 "Expected extract_vector_elt");
21841 SDValue InsertVal = N->getOperand(1);
21842 SDValue Vec = N->getOperand(0);
21843
21844 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21845 if (!SVN || !Vec.hasOneUse())
21846 return SDValue();
21847
21848 ArrayRef<int> Mask = SVN->getMask();
21849 SDValue X = Vec.getOperand(0);
21850 SDValue Y = Vec.getOperand(1);
21851
21852 SmallVector<int, 16> NewMask(Mask);
21853 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21854 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21855 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21856 if (LegalShuffle)
21857 return LegalShuffle;
21858 }
21859
21860 return SDValue();
21861}
21862
21863// Convert a disguised subvector insertion into a shuffle:
21864// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21865// bitcast(shuffle (bitcast V), (extended X), Mask)
21866// Note: We do not use an insert_subvector node because that requires a
21867// legal subvector type.
21868SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21869 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21870 "Expected extract_vector_elt");
21871 SDValue InsertVal = N->getOperand(1);
21872
21873 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21874 !InsertVal.getOperand(0).getValueType().isVector())
21875 return SDValue();
21876
21877 SDValue SubVec = InsertVal.getOperand(0);
21878 SDValue DestVec = N->getOperand(0);
21879 EVT SubVecVT = SubVec.getValueType();
21880 EVT VT = DestVec.getValueType();
21881 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21882 // If the source only has a single vector element, the cost of creating adding
21883 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21884 if (NumSrcElts == 1)
21885 return SDValue();
21886 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21887 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21888
21889 // Step 1: Create a shuffle mask that implements this insert operation. The
21890 // vector that we are inserting into will be operand 0 of the shuffle, so
21891 // those elements are just 'i'. The inserted subvector is in the first
21892 // positions of operand 1 of the shuffle. Example:
21893 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21894 SmallVector<int, 16> Mask(NumMaskVals);
21895 for (unsigned i = 0; i != NumMaskVals; ++i) {
21896 if (i / NumSrcElts == InsIndex)
21897 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21898 else
21899 Mask[i] = i;
21900 }
21901
21902 // Bail out if the target can not handle the shuffle we want to create.
21903 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21904 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21905 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21906 return SDValue();
21907
21908 // Step 2: Create a wide vector from the inserted source vector by appending
21909 // undefined elements. This is the same size as our destination vector.
21910 SDLoc DL(N);
21911 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21912 ConcatOps[0] = SubVec;
21913 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21914
21915 // Step 3: Shuffle in the padded subvector.
21916 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21917 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21918 AddToWorklist(PaddedSubV.getNode());
21919 AddToWorklist(DestVecBC.getNode());
21920 AddToWorklist(Shuf.getNode());
21921 return DAG.getBitcast(VT, Shuf);
21922}
21923
21924// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21925// possible and the new load will be quick. We use more loads but less shuffles
21926// and inserts.
21927SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21928 EVT VT = N->getValueType(0);
21929
21930 // InsIndex is expected to be the first of last lane.
21931 if (!VT.isFixedLengthVector() ||
21932 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21933 return SDValue();
21934
21935 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21936 // depending on the InsIndex.
21937 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21938 SDValue Scalar = N->getOperand(1);
21939 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21940 return InsIndex == P.index() || P.value() < 0 ||
21941 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21942 (InsIndex == VT.getVectorNumElements() - 1 &&
21943 P.value() == (int)P.index() + 1);
21944 }))
21945 return SDValue();
21946
21947 // We optionally skip over an extend so long as both loads are extended in the
21948 // same way from the same type.
21949 unsigned Extend = 0;
21950 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21951 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21952 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21953 Extend = Scalar.getOpcode();
21954 Scalar = Scalar.getOperand(0);
21955 }
21956
21957 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21958 if (!ScalarLoad)
21959 return SDValue();
21960
21961 SDValue Vec = Shuffle->getOperand(0);
21962 if (Extend) {
21963 if (Vec.getOpcode() != Extend)
21964 return SDValue();
21965 Vec = Vec.getOperand(0);
21966 }
21967 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21968 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21969 return SDValue();
21970
21971 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21972 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21973 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21974 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21975 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21976 return SDValue();
21977
21978 // Check that the offset between the pointers to produce a single continuous
21979 // load.
21980 if (InsIndex == 0) {
21981 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21982 -1))
21983 return SDValue();
21984 } else {
21986 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21987 return SDValue();
21988 }
21989
21990 // And that the new unaligned load will be fast.
21991 unsigned IsFast = 0;
21992 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21993 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21994 Vec.getValueType(), VecLoad->getAddressSpace(),
21995 NewAlign, VecLoad->getMemOperand()->getFlags(),
21996 &IsFast) ||
21997 !IsFast)
21998 return SDValue();
21999
22000 // Calculate the new Ptr and create the new load.
22001 SDLoc DL(N);
22002 SDValue Ptr = ScalarLoad->getBasePtr();
22003 if (InsIndex != 0)
22004 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22005 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22006 MachinePointerInfo PtrInfo =
22007 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22008 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22009
22010 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22011 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22012 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22013 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22014 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22015}
22016
22017SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22018 SDValue InVec = N->getOperand(0);
22019 SDValue InVal = N->getOperand(1);
22020 SDValue EltNo = N->getOperand(2);
22021 SDLoc DL(N);
22022
22023 EVT VT = InVec.getValueType();
22024 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22025
22026 // Insert into out-of-bounds element is undefined.
22027 if (IndexC && VT.isFixedLengthVector() &&
22028 IndexC->getZExtValue() >= VT.getVectorNumElements())
22029 return DAG.getUNDEF(VT);
22030
22031 // Remove redundant insertions:
22032 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22033 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22034 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22035 return InVec;
22036
22037 if (!IndexC) {
22038 // If this is variable insert to undef vector, it might be better to splat:
22039 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22040 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22041 return DAG.getSplat(VT, DL, InVal);
22042 return SDValue();
22043 }
22044
22045 if (VT.isScalableVector())
22046 return SDValue();
22047
22048 unsigned NumElts = VT.getVectorNumElements();
22049
22050 // We must know which element is being inserted for folds below here.
22051 unsigned Elt = IndexC->getZExtValue();
22052
22053 // Handle <1 x ???> vector insertion special cases.
22054 if (NumElts == 1) {
22055 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22056 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22057 InVal.getOperand(0).getValueType() == VT &&
22058 isNullConstant(InVal.getOperand(1)))
22059 return InVal.getOperand(0);
22060 }
22061
22062 // Canonicalize insert_vector_elt dag nodes.
22063 // Example:
22064 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22065 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22066 //
22067 // Do this only if the child insert_vector node has one use; also
22068 // do this only if indices are both constants and Idx1 < Idx0.
22069 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22070 && isa<ConstantSDNode>(InVec.getOperand(2))) {
22071 unsigned OtherElt = InVec.getConstantOperandVal(2);
22072 if (Elt < OtherElt) {
22073 // Swap nodes.
22074 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22075 InVec.getOperand(0), InVal, EltNo);
22076 AddToWorklist(NewOp.getNode());
22077 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22078 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22079 }
22080 }
22081
22082 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22083 return Shuf;
22084
22085 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22086 return Shuf;
22087
22088 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22089 return Shuf;
22090
22091 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22092 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22093 // vXi1 vector - we don't need to recurse.
22094 if (NumElts == 1)
22095 return DAG.getBuildVector(VT, DL, {InVal});
22096
22097 // If we haven't already collected the element, insert into the op list.
22098 EVT MaxEltVT = InVal.getValueType();
22099 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22100 unsigned Idx) {
22101 if (!Ops[Idx]) {
22102 Ops[Idx] = Elt;
22103 if (VT.isInteger()) {
22104 EVT EltVT = Elt.getValueType();
22105 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22106 }
22107 }
22108 };
22109
22110 // Ensure all the operands are the same value type, fill any missing
22111 // operands with UNDEF and create the BUILD_VECTOR.
22112 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22113 assert(Ops.size() == NumElts && "Unexpected vector size");
22114 for (SDValue &Op : Ops) {
22115 if (Op)
22116 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22117 else
22118 Op = DAG.getUNDEF(MaxEltVT);
22119 }
22120 return DAG.getBuildVector(VT, DL, Ops);
22121 };
22122
22123 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22124 Ops[Elt] = InVal;
22125
22126 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22127 for (SDValue CurVec = InVec; CurVec;) {
22128 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22129 if (CurVec.isUndef())
22130 return CanonicalizeBuildVector(Ops);
22131
22132 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22133 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22134 for (unsigned I = 0; I != NumElts; ++I)
22135 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22136 return CanonicalizeBuildVector(Ops);
22137 }
22138
22139 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22140 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22141 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22142 return CanonicalizeBuildVector(Ops);
22143 }
22144
22145 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22146 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22147 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22148 if (CurIdx->getAPIntValue().ult(NumElts)) {
22149 unsigned Idx = CurIdx->getZExtValue();
22150 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22151
22152 // Found entire BUILD_VECTOR.
22153 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22154 return CanonicalizeBuildVector(Ops);
22155
22156 CurVec = CurVec->getOperand(0);
22157 continue;
22158 }
22159
22160 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22161 // update the shuffle mask (and second operand if we started with unary
22162 // shuffle) and create a new legal shuffle.
22163 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22164 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22165 SDValue LHS = SVN->getOperand(0);
22166 SDValue RHS = SVN->getOperand(1);
22168 bool Merged = true;
22169 for (auto I : enumerate(Ops)) {
22170 SDValue &Op = I.value();
22171 if (Op) {
22172 SmallVector<int, 16> NewMask;
22173 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22174 Merged = false;
22175 break;
22176 }
22177 Mask = std::move(NewMask);
22178 }
22179 }
22180 if (Merged)
22181 if (SDValue NewShuffle =
22182 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22183 return NewShuffle;
22184 }
22185
22186 // If all insertions are zero value, try to convert to AND mask.
22187 // TODO: Do this for -1 with OR mask?
22188 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22189 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22190 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22191 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22192 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22194 for (unsigned I = 0; I != NumElts; ++I)
22195 Mask[I] = Ops[I] ? Zero : AllOnes;
22196 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22197 DAG.getBuildVector(VT, DL, Mask));
22198 }
22199
22200 // Failed to find a match in the chain - bail.
22201 break;
22202 }
22203
22204 // See if we can fill in the missing constant elements as zeros.
22205 // TODO: Should we do this for any constant?
22206 APInt DemandedZeroElts = APInt::getZero(NumElts);
22207 for (unsigned I = 0; I != NumElts; ++I)
22208 if (!Ops[I])
22209 DemandedZeroElts.setBit(I);
22210
22211 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22212 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22213 : DAG.getConstantFP(0, DL, MaxEltVT);
22214 for (unsigned I = 0; I != NumElts; ++I)
22215 if (!Ops[I])
22216 Ops[I] = Zero;
22217
22218 return CanonicalizeBuildVector(Ops);
22219 }
22220 }
22221
22222 return SDValue();
22223}
22224
22225SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22226 SDValue EltNo,
22227 LoadSDNode *OriginalLoad) {
22228 assert(OriginalLoad->isSimple());
22229
22230 EVT ResultVT = EVE->getValueType(0);
22231 EVT VecEltVT = InVecVT.getVectorElementType();
22232
22233 // If the vector element type is not a multiple of a byte then we are unable
22234 // to correctly compute an address to load only the extracted element as a
22235 // scalar.
22236 if (!VecEltVT.isByteSized())
22237 return SDValue();
22238
22239 ISD::LoadExtType ExtTy =
22240 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22241 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22242 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22243 return SDValue();
22244
22245 Align Alignment = OriginalLoad->getAlign();
22247 SDLoc DL(EVE);
22248 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22249 int Elt = ConstEltNo->getZExtValue();
22250 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22251 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22252 Alignment = commonAlignment(Alignment, PtrOff);
22253 } else {
22254 // Discard the pointer info except the address space because the memory
22255 // operand can't represent this new access since the offset is variable.
22256 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22257 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22258 }
22259
22260 unsigned IsFast = 0;
22261 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22262 OriginalLoad->getAddressSpace(), Alignment,
22263 OriginalLoad->getMemOperand()->getFlags(),
22264 &IsFast) ||
22265 !IsFast)
22266 return SDValue();
22267
22268 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22269 InVecVT, EltNo);
22270
22271 // We are replacing a vector load with a scalar load. The new load must have
22272 // identical memory op ordering to the original.
22273 SDValue Load;
22274 if (ResultVT.bitsGT(VecEltVT)) {
22275 // If the result type of vextract is wider than the load, then issue an
22276 // extending load instead.
22277 ISD::LoadExtType ExtType =
22278 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22279 : ISD::EXTLOAD;
22280 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22281 NewPtr, MPI, VecEltVT, Alignment,
22282 OriginalLoad->getMemOperand()->getFlags(),
22283 OriginalLoad->getAAInfo());
22284 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22285 } else {
22286 // The result type is narrower or the same width as the vector element
22287 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22288 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22289 OriginalLoad->getAAInfo());
22290 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22291 if (ResultVT.bitsLT(VecEltVT))
22292 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22293 else
22294 Load = DAG.getBitcast(ResultVT, Load);
22295 }
22296 ++OpsNarrowed;
22297 return Load;
22298}
22299
22300/// Transform a vector binary operation into a scalar binary operation by moving
22301/// the math/logic after an extract element of a vector.
22303 const SDLoc &DL, bool LegalOperations) {
22304 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22305 SDValue Vec = ExtElt->getOperand(0);
22306 SDValue Index = ExtElt->getOperand(1);
22307 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22308 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22309 Vec->getNumValues() != 1)
22310 return SDValue();
22311
22312 // Targets may want to avoid this to prevent an expensive register transfer.
22313 if (!TLI.shouldScalarizeBinop(Vec))
22314 return SDValue();
22315
22316 // Extracting an element of a vector constant is constant-folded, so this
22317 // transform is just replacing a vector op with a scalar op while moving the
22318 // extract.
22319 SDValue Op0 = Vec.getOperand(0);
22320 SDValue Op1 = Vec.getOperand(1);
22321 APInt SplatVal;
22322 if (isAnyConstantBuildVector(Op0, true) ||
22323 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22324 isAnyConstantBuildVector(Op1, true) ||
22325 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22326 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22327 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22328 EVT VT = ExtElt->getValueType(0);
22329 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22330 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22331 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22332 }
22333
22334 return SDValue();
22335}
22336
22337// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22338// recursively analyse all of it's users. and try to model themselves as
22339// bit sequence extractions. If all of them agree on the new, narrower element
22340// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22341// new element type, do so now.
22342// This is mainly useful to recover from legalization that scalarized
22343// the vector as wide elements, but tries to rebuild it with narrower elements.
22344//
22345// Some more nodes could be modelled if that helps cover interesting patterns.
22346bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22347 SDNode *N) {
22348 // We perform this optimization post type-legalization because
22349 // the type-legalizer often scalarizes integer-promoted vectors.
22350 // Performing this optimization before may cause legalizaton cycles.
22351 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22352 return false;
22353
22354 // TODO: Add support for big-endian.
22355 if (DAG.getDataLayout().isBigEndian())
22356 return false;
22357
22358 SDValue VecOp = N->getOperand(0);
22359 EVT VecVT = VecOp.getValueType();
22360 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22361
22362 // We must start with a constant extraction index.
22363 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22364 if (!IndexC)
22365 return false;
22366
22367 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22368 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22369
22370 // TODO: deal with the case of implicit anyext of the extraction.
22371 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22372 EVT ScalarVT = N->getValueType(0);
22373 if (VecVT.getScalarType() != ScalarVT)
22374 return false;
22375
22376 // TODO: deal with the cases other than everything being integer-typed.
22377 if (!ScalarVT.isScalarInteger())
22378 return false;
22379
22380 struct Entry {
22382
22383 // Which bits of VecOp does it contain?
22384 unsigned BitPos;
22385 int NumBits;
22386 // NOTE: the actual width of \p Producer may be wider than NumBits!
22387
22388 Entry(Entry &&) = default;
22389 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22390 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22391
22392 Entry() = delete;
22393 Entry(const Entry &) = delete;
22394 Entry &operator=(const Entry &) = delete;
22395 Entry &operator=(Entry &&) = delete;
22396 };
22397 SmallVector<Entry, 32> Worklist;
22399
22400 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22401 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22402 /*NumBits=*/VecEltBitWidth);
22403
22404 while (!Worklist.empty()) {
22405 Entry E = Worklist.pop_back_val();
22406 // Does the node not even use any of the VecOp bits?
22407 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22408 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22409 return false; // Let's allow the other combines clean this up first.
22410 // Did we fail to model any of the users of the Producer?
22411 bool ProducerIsLeaf = false;
22412 // Look at each user of this Producer.
22413 for (SDNode *User : E.Producer->uses()) {
22414 switch (User->getOpcode()) {
22415 // TODO: support ISD::BITCAST
22416 // TODO: support ISD::ANY_EXTEND
22417 // TODO: support ISD::ZERO_EXTEND
22418 // TODO: support ISD::SIGN_EXTEND
22419 case ISD::TRUNCATE:
22420 // Truncation simply means we keep position, but extract less bits.
22421 Worklist.emplace_back(User, E.BitPos,
22422 /*NumBits=*/User->getValueSizeInBits(0));
22423 break;
22424 // TODO: support ISD::SRA
22425 // TODO: support ISD::SHL
22426 case ISD::SRL:
22427 // We should be shifting the Producer by a constant amount.
22428 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22429 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22430 // Logical right-shift means that we start extraction later,
22431 // but stop it at the same position we did previously.
22432 unsigned ShAmt = ShAmtC->getZExtValue();
22433 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22434 break;
22435 }
22436 [[fallthrough]];
22437 default:
22438 // We can not model this user of the Producer.
22439 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22440 ProducerIsLeaf = true;
22441 // Profitability check: all users that we can not model
22442 // must be ISD::BUILD_VECTOR's.
22443 if (User->getOpcode() != ISD::BUILD_VECTOR)
22444 return false;
22445 break;
22446 }
22447 }
22448 if (ProducerIsLeaf)
22449 Leafs.emplace_back(std::move(E));
22450 }
22451
22452 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22453
22454 // If we are still at the same element granularity, give up,
22455 if (NewVecEltBitWidth == VecEltBitWidth)
22456 return false;
22457
22458 // The vector width must be a multiple of the new element width.
22459 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22460 return false;
22461
22462 // All leafs must agree on the new element width.
22463 // All leafs must not expect any "padding" bits ontop of that width.
22464 // All leafs must start extraction from multiple of that width.
22465 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22466 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22467 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22468 E.BitPos % NewVecEltBitWidth == 0;
22469 }))
22470 return false;
22471
22472 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22473 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22474 VecVT.getSizeInBits() / NewVecEltBitWidth);
22475
22476 if (LegalTypes &&
22477 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22478 return false;
22479
22480 if (LegalOperations &&
22481 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22483 return false;
22484
22485 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22486 for (const Entry &E : Leafs) {
22487 SDLoc DL(E.Producer);
22488 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22489 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22490 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22491 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22492 DAG.getVectorIdxConstant(NewIndex, DL));
22493 CombineTo(E.Producer, V);
22494 }
22495
22496 return true;
22497}
22498
22499SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22500 SDValue VecOp = N->getOperand(0);
22501 SDValue Index = N->getOperand(1);
22502 EVT ScalarVT = N->getValueType(0);
22503 EVT VecVT = VecOp.getValueType();
22504 if (VecOp.isUndef())
22505 return DAG.getUNDEF(ScalarVT);
22506
22507 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22508 //
22509 // This only really matters if the index is non-constant since other combines
22510 // on the constant elements already work.
22511 SDLoc DL(N);
22512 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22513 Index == VecOp.getOperand(2)) {
22514 SDValue Elt = VecOp.getOperand(1);
22515 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22516 }
22517
22518 // (vextract (scalar_to_vector val, 0) -> val
22519 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22520 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22521 if (DAG.isKnownNeverZero(Index))
22522 return DAG.getUNDEF(ScalarVT);
22523
22524 // Check if the result type doesn't match the inserted element type.
22525 // The inserted element and extracted element may have mismatched bitwidth.
22526 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22527 SDValue InOp = VecOp.getOperand(0);
22528 if (InOp.getValueType() != ScalarVT) {
22529 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22530 if (InOp.getValueType().bitsGT(ScalarVT))
22531 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22532 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22533 }
22534 return InOp;
22535 }
22536
22537 // extract_vector_elt of out-of-bounds element -> UNDEF
22538 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22539 if (IndexC && VecVT.isFixedLengthVector() &&
22540 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22541 return DAG.getUNDEF(ScalarVT);
22542
22543 // extract_vector_elt (build_vector x, y), 1 -> y
22544 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22545 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22546 TLI.isTypeLegal(VecVT)) {
22547 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22548 VecVT.isFixedLengthVector()) &&
22549 "BUILD_VECTOR used for scalable vectors");
22550 unsigned IndexVal =
22551 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22552 SDValue Elt = VecOp.getOperand(IndexVal);
22553 EVT InEltVT = Elt.getValueType();
22554
22555 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22556 isNullConstant(Elt)) {
22557 // Sometimes build_vector's scalar input types do not match result type.
22558 if (ScalarVT == InEltVT)
22559 return Elt;
22560
22561 // TODO: It may be useful to truncate if free if the build_vector
22562 // implicitly converts.
22563 }
22564 }
22565
22566 if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations))
22567 return BO;
22568
22569 if (VecVT.isScalableVector())
22570 return SDValue();
22571
22572 // All the code from this point onwards assumes fixed width vectors, but it's
22573 // possible that some of the combinations could be made to work for scalable
22574 // vectors too.
22575 unsigned NumElts = VecVT.getVectorNumElements();
22576 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22577
22578 // See if the extracted element is constant, in which case fold it if its
22579 // a legal fp immediate.
22580 if (IndexC && ScalarVT.isFloatingPoint()) {
22581 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22582 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22583 if (KnownElt.isConstant()) {
22584 APFloat CstFP =
22585 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22586 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22587 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22588 }
22589 }
22590
22591 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22592 // there are regressions on multiple targets without it. We can end up with a
22593 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22594 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22595 VecOp.hasOneUse()) {
22596 // The vector index of the LSBs of the source depend on the endian-ness.
22597 bool IsLE = DAG.getDataLayout().isLittleEndian();
22598 unsigned ExtractIndex = IndexC->getZExtValue();
22599 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22600 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22601 SDValue BCSrc = VecOp.getOperand(0);
22602 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22603 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22604
22605 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22606 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22607 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22608 // trunc i64 X to i32
22609 SDValue X = BCSrc.getOperand(0);
22610 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22611 "Extract element and scalar to vector can't change element type "
22612 "from FP to integer.");
22613 unsigned XBitWidth = X.getValueSizeInBits();
22614 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22615
22616 // An extract element return value type can be wider than its vector
22617 // operand element type. In that case, the high bits are undefined, so
22618 // it's possible that we may need to extend rather than truncate.
22619 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22620 assert(XBitWidth % VecEltBitWidth == 0 &&
22621 "Scalar bitwidth must be a multiple of vector element bitwidth");
22622 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22623 }
22624 }
22625 }
22626
22627 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22628 // We only perform this optimization before the op legalization phase because
22629 // we may introduce new vector instructions which are not backed by TD
22630 // patterns. For example on AVX, extracting elements from a wide vector
22631 // without using extract_subvector. However, if we can find an underlying
22632 // scalar value, then we can always use that.
22633 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22634 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22635 // Find the new index to extract from.
22636 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22637
22638 // Extracting an undef index is undef.
22639 if (OrigElt == -1)
22640 return DAG.getUNDEF(ScalarVT);
22641
22642 // Select the right vector half to extract from.
22643 SDValue SVInVec;
22644 if (OrigElt < (int)NumElts) {
22645 SVInVec = VecOp.getOperand(0);
22646 } else {
22647 SVInVec = VecOp.getOperand(1);
22648 OrigElt -= NumElts;
22649 }
22650
22651 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22652 SDValue InOp = SVInVec.getOperand(OrigElt);
22653 if (InOp.getValueType() != ScalarVT) {
22654 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22655 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22656 }
22657
22658 return InOp;
22659 }
22660
22661 // FIXME: We should handle recursing on other vector shuffles and
22662 // scalar_to_vector here as well.
22663
22664 if (!LegalOperations ||
22665 // FIXME: Should really be just isOperationLegalOrCustom.
22668 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22669 DAG.getVectorIdxConstant(OrigElt, DL));
22670 }
22671 }
22672
22673 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22674 // simplify it based on the (valid) extraction indices.
22675 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22676 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22677 Use->getOperand(0) == VecOp &&
22678 isa<ConstantSDNode>(Use->getOperand(1));
22679 })) {
22680 APInt DemandedElts = APInt::getZero(NumElts);
22681 for (SDNode *Use : VecOp->uses()) {
22682 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22683 if (CstElt->getAPIntValue().ult(NumElts))
22684 DemandedElts.setBit(CstElt->getZExtValue());
22685 }
22686 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22687 // We simplified the vector operand of this extract element. If this
22688 // extract is not dead, visit it again so it is folded properly.
22689 if (N->getOpcode() != ISD::DELETED_NODE)
22690 AddToWorklist(N);
22691 return SDValue(N, 0);
22692 }
22693 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22694 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22695 // We simplified the vector operand of this extract element. If this
22696 // extract is not dead, visit it again so it is folded properly.
22697 if (N->getOpcode() != ISD::DELETED_NODE)
22698 AddToWorklist(N);
22699 return SDValue(N, 0);
22700 }
22701 }
22702
22703 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22704 return SDValue(N, 0);
22705
22706 // Everything under here is trying to match an extract of a loaded value.
22707 // If the result of load has to be truncated, then it's not necessarily
22708 // profitable.
22709 bool BCNumEltsChanged = false;
22710 EVT ExtVT = VecVT.getVectorElementType();
22711 EVT LVT = ExtVT;
22712 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22713 return SDValue();
22714
22715 if (VecOp.getOpcode() == ISD::BITCAST) {
22716 // Don't duplicate a load with other uses.
22717 if (!VecOp.hasOneUse())
22718 return SDValue();
22719
22720 EVT BCVT = VecOp.getOperand(0).getValueType();
22721 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22722 return SDValue();
22723 if (NumElts != BCVT.getVectorNumElements())
22724 BCNumEltsChanged = true;
22725 VecOp = VecOp.getOperand(0);
22726 ExtVT = BCVT.getVectorElementType();
22727 }
22728
22729 // extract (vector load $addr), i --> load $addr + i * size
22730 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22731 ISD::isNormalLoad(VecOp.getNode()) &&
22732 !Index->hasPredecessor(VecOp.getNode())) {
22733 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22734 if (VecLoad && VecLoad->isSimple())
22735 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22736 }
22737
22738 // Perform only after legalization to ensure build_vector / vector_shuffle
22739 // optimizations have already been done.
22740 if (!LegalOperations || !IndexC)
22741 return SDValue();
22742
22743 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22744 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22745 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22746 int Elt = IndexC->getZExtValue();
22747 LoadSDNode *LN0 = nullptr;
22748 if (ISD::isNormalLoad(VecOp.getNode())) {
22749 LN0 = cast<LoadSDNode>(VecOp);
22750 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22751 VecOp.getOperand(0).getValueType() == ExtVT &&
22752 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22753 // Don't duplicate a load with other uses.
22754 if (!VecOp.hasOneUse())
22755 return SDValue();
22756
22757 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22758 }
22759 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22760 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22761 // =>
22762 // (load $addr+1*size)
22763
22764 // Don't duplicate a load with other uses.
22765 if (!VecOp.hasOneUse())
22766 return SDValue();
22767
22768 // If the bit convert changed the number of elements, it is unsafe
22769 // to examine the mask.
22770 if (BCNumEltsChanged)
22771 return SDValue();
22772
22773 // Select the input vector, guarding against out of range extract vector.
22774 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22775 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22776
22777 if (VecOp.getOpcode() == ISD::BITCAST) {
22778 // Don't duplicate a load with other uses.
22779 if (!VecOp.hasOneUse())
22780 return SDValue();
22781
22782 VecOp = VecOp.getOperand(0);
22783 }
22784 if (ISD::isNormalLoad(VecOp.getNode())) {
22785 LN0 = cast<LoadSDNode>(VecOp);
22786 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22787 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22788 }
22789 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22790 VecVT.getVectorElementType() == ScalarVT &&
22791 (!LegalTypes ||
22792 TLI.isTypeLegal(
22794 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22795 // -> extract_vector_elt a, 0
22796 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22797 // -> extract_vector_elt a, 1
22798 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22799 // -> extract_vector_elt b, 0
22800 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22801 // -> extract_vector_elt b, 1
22802 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22803 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22804 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22805 Index.getValueType());
22806
22807 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22809 ConcatVT.getVectorElementType(),
22810 ConcatOp, NewIdx);
22811 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22812 }
22813
22814 // Make sure we found a non-volatile load and the extractelement is
22815 // the only use.
22816 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22817 return SDValue();
22818
22819 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22820 if (Elt == -1)
22821 return DAG.getUNDEF(LVT);
22822
22823 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22824}
22825
22826// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22827SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22828 // We perform this optimization post type-legalization because
22829 // the type-legalizer often scalarizes integer-promoted vectors.
22830 // Performing this optimization before may create bit-casts which
22831 // will be type-legalized to complex code sequences.
22832 // We perform this optimization only before the operation legalizer because we
22833 // may introduce illegal operations.
22834 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22835 return SDValue();
22836
22837 unsigned NumInScalars = N->getNumOperands();
22838 SDLoc DL(N);
22839 EVT VT = N->getValueType(0);
22840
22841 // Check to see if this is a BUILD_VECTOR of a bunch of values
22842 // which come from any_extend or zero_extend nodes. If so, we can create
22843 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22844 // optimizations. We do not handle sign-extend because we can't fill the sign
22845 // using shuffles.
22846 EVT SourceType = MVT::Other;
22847 bool AllAnyExt = true;
22848
22849 for (unsigned i = 0; i != NumInScalars; ++i) {
22850 SDValue In = N->getOperand(i);
22851 // Ignore undef inputs.
22852 if (In.isUndef()) continue;
22853
22854 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22855 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22856
22857 // Abort if the element is not an extension.
22858 if (!ZeroExt && !AnyExt) {
22859 SourceType = MVT::Other;
22860 break;
22861 }
22862
22863 // The input is a ZeroExt or AnyExt. Check the original type.
22864 EVT InTy = In.getOperand(0).getValueType();
22865
22866 // Check that all of the widened source types are the same.
22867 if (SourceType == MVT::Other)
22868 // First time.
22869 SourceType = InTy;
22870 else if (InTy != SourceType) {
22871 // Multiple income types. Abort.
22872 SourceType = MVT::Other;
22873 break;
22874 }
22875
22876 // Check if all of the extends are ANY_EXTENDs.
22877 AllAnyExt &= AnyExt;
22878 }
22879
22880 // In order to have valid types, all of the inputs must be extended from the
22881 // same source type and all of the inputs must be any or zero extend.
22882 // Scalar sizes must be a power of two.
22883 EVT OutScalarTy = VT.getScalarType();
22884 bool ValidTypes =
22885 SourceType != MVT::Other &&
22886 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22887 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22888
22889 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22890 // turn into a single shuffle instruction.
22891 if (!ValidTypes)
22892 return SDValue();
22893
22894 // If we already have a splat buildvector, then don't fold it if it means
22895 // introducing zeros.
22896 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22897 return SDValue();
22898
22899 bool isLE = DAG.getDataLayout().isLittleEndian();
22900 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22901 assert(ElemRatio > 1 && "Invalid element size ratio");
22902 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22903 DAG.getConstant(0, DL, SourceType);
22904
22905 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22906 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22907
22908 // Populate the new build_vector
22909 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22910 SDValue Cast = N->getOperand(i);
22911 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22912 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22913 Cast.isUndef()) && "Invalid cast opcode");
22914 SDValue In;
22915 if (Cast.isUndef())
22916 In = DAG.getUNDEF(SourceType);
22917 else
22918 In = Cast->getOperand(0);
22919 unsigned Index = isLE ? (i * ElemRatio) :
22920 (i * ElemRatio + (ElemRatio - 1));
22921
22922 assert(Index < Ops.size() && "Invalid index");
22923 Ops[Index] = In;
22924 }
22925
22926 // The type of the new BUILD_VECTOR node.
22927 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22928 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22929 "Invalid vector size");
22930 // Check if the new vector type is legal.
22931 if (!isTypeLegal(VecVT) ||
22932 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22934 return SDValue();
22935
22936 // Make the new BUILD_VECTOR.
22937 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22938
22939 // The new BUILD_VECTOR node has the potential to be further optimized.
22940 AddToWorklist(BV.getNode());
22941 // Bitcast to the desired type.
22942 return DAG.getBitcast(VT, BV);
22943}
22944
22945// Simplify (build_vec (trunc $1)
22946// (trunc (srl $1 half-width))
22947// (trunc (srl $1 (2 * half-width))))
22948// to (bitcast $1)
22949SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22950 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22951
22952 EVT VT = N->getValueType(0);
22953
22954 // Don't run this before LegalizeTypes if VT is legal.
22955 // Targets may have other preferences.
22956 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22957 return SDValue();
22958
22959 // Only for little endian
22960 if (!DAG.getDataLayout().isLittleEndian())
22961 return SDValue();
22962
22963 SDLoc DL(N);
22964 EVT OutScalarTy = VT.getScalarType();
22965 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22966
22967 // Only for power of two types to be sure that bitcast works well
22968 if (!isPowerOf2_64(ScalarTypeBitsize))
22969 return SDValue();
22970
22971 unsigned NumInScalars = N->getNumOperands();
22972
22973 // Look through bitcasts
22974 auto PeekThroughBitcast = [](SDValue Op) {
22975 if (Op.getOpcode() == ISD::BITCAST)
22976 return Op.getOperand(0);
22977 return Op;
22978 };
22979
22980 // The source value where all the parts are extracted.
22981 SDValue Src;
22982 for (unsigned i = 0; i != NumInScalars; ++i) {
22983 SDValue In = PeekThroughBitcast(N->getOperand(i));
22984 // Ignore undef inputs.
22985 if (In.isUndef()) continue;
22986
22987 if (In.getOpcode() != ISD::TRUNCATE)
22988 return SDValue();
22989
22990 In = PeekThroughBitcast(In.getOperand(0));
22991
22992 if (In.getOpcode() != ISD::SRL) {
22993 // For now only build_vec without shuffling, handle shifts here in the
22994 // future.
22995 if (i != 0)
22996 return SDValue();
22997
22998 Src = In;
22999 } else {
23000 // In is SRL
23001 SDValue part = PeekThroughBitcast(In.getOperand(0));
23002
23003 if (!Src) {
23004 Src = part;
23005 } else if (Src != part) {
23006 // Vector parts do not stem from the same variable
23007 return SDValue();
23008 }
23009
23010 SDValue ShiftAmtVal = In.getOperand(1);
23011 if (!isa<ConstantSDNode>(ShiftAmtVal))
23012 return SDValue();
23013
23014 uint64_t ShiftAmt = In.getConstantOperandVal(1);
23015
23016 // The extracted value is not extracted at the right position
23017 if (ShiftAmt != i * ScalarTypeBitsize)
23018 return SDValue();
23019 }
23020 }
23021
23022 // Only cast if the size is the same
23023 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23024 return SDValue();
23025
23026 return DAG.getBitcast(VT, Src);
23027}
23028
23029SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23030 ArrayRef<int> VectorMask,
23031 SDValue VecIn1, SDValue VecIn2,
23032 unsigned LeftIdx, bool DidSplitVec) {
23033 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23034
23035 EVT VT = N->getValueType(0);
23036 EVT InVT1 = VecIn1.getValueType();
23037 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23038
23039 unsigned NumElems = VT.getVectorNumElements();
23040 unsigned ShuffleNumElems = NumElems;
23041
23042 // If we artificially split a vector in two already, then the offsets in the
23043 // operands will all be based off of VecIn1, even those in VecIn2.
23044 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23045
23046 uint64_t VTSize = VT.getFixedSizeInBits();
23047 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23048 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23049
23050 assert(InVT2Size <= InVT1Size &&
23051 "Inputs must be sorted to be in non-increasing vector size order.");
23052
23053 // We can't generate a shuffle node with mismatched input and output types.
23054 // Try to make the types match the type of the output.
23055 if (InVT1 != VT || InVT2 != VT) {
23056 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23057 // If the output vector length is a multiple of both input lengths,
23058 // we can concatenate them and pad the rest with undefs.
23059 unsigned NumConcats = VTSize / InVT1Size;
23060 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23061 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23062 ConcatOps[0] = VecIn1;
23063 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23064 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23065 VecIn2 = SDValue();
23066 } else if (InVT1Size == VTSize * 2) {
23067 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23068 return SDValue();
23069
23070 if (!VecIn2.getNode()) {
23071 // If we only have one input vector, and it's twice the size of the
23072 // output, split it in two.
23073 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23074 DAG.getVectorIdxConstant(NumElems, DL));
23075 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23076 // Since we now have shorter input vectors, adjust the offset of the
23077 // second vector's start.
23078 Vec2Offset = NumElems;
23079 } else {
23080 assert(InVT2Size <= InVT1Size &&
23081 "Second input is not going to be larger than the first one.");
23082
23083 // VecIn1 is wider than the output, and we have another, possibly
23084 // smaller input. Pad the smaller input with undefs, shuffle at the
23085 // input vector width, and extract the output.
23086 // The shuffle type is different than VT, so check legality again.
23087 if (LegalOperations &&
23089 return SDValue();
23090
23091 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23092 // lower it back into a BUILD_VECTOR. So if the inserted type is
23093 // illegal, don't even try.
23094 if (InVT1 != InVT2) {
23095 if (!TLI.isTypeLegal(InVT2))
23096 return SDValue();
23097 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23098 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23099 }
23100 ShuffleNumElems = NumElems * 2;
23101 }
23102 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23103 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23104 ConcatOps[0] = VecIn2;
23105 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23106 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23107 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23108 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23109 return SDValue();
23110 // If dest vector has less than two elements, then use shuffle and extract
23111 // from larger regs will cost even more.
23112 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23113 return SDValue();
23114 assert(InVT2Size <= InVT1Size &&
23115 "Second input is not going to be larger than the first one.");
23116
23117 // VecIn1 is wider than the output, and we have another, possibly
23118 // smaller input. Pad the smaller input with undefs, shuffle at the
23119 // input vector width, and extract the output.
23120 // The shuffle type is different than VT, so check legality again.
23121 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23122 return SDValue();
23123
23124 if (InVT1 != InVT2) {
23125 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23126 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23127 }
23128 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23129 } else {
23130 // TODO: Support cases where the length mismatch isn't exactly by a
23131 // factor of 2.
23132 // TODO: Move this check upwards, so that if we have bad type
23133 // mismatches, we don't create any DAG nodes.
23134 return SDValue();
23135 }
23136 }
23137
23138 // Initialize mask to undef.
23139 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23140
23141 // Only need to run up to the number of elements actually used, not the
23142 // total number of elements in the shuffle - if we are shuffling a wider
23143 // vector, the high lanes should be set to undef.
23144 for (unsigned i = 0; i != NumElems; ++i) {
23145 if (VectorMask[i] <= 0)
23146 continue;
23147
23148 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23149 if (VectorMask[i] == (int)LeftIdx) {
23150 Mask[i] = ExtIndex;
23151 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23152 Mask[i] = Vec2Offset + ExtIndex;
23153 }
23154 }
23155
23156 // The type the input vectors may have changed above.
23157 InVT1 = VecIn1.getValueType();
23158
23159 // If we already have a VecIn2, it should have the same type as VecIn1.
23160 // If we don't, get an undef/zero vector of the appropriate type.
23161 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23162 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23163
23164 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23165 if (ShuffleNumElems > NumElems)
23166 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23167
23168 return Shuffle;
23169}
23170
23172 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23173
23174 // First, determine where the build vector is not undef.
23175 // TODO: We could extend this to handle zero elements as well as undefs.
23176 int NumBVOps = BV->getNumOperands();
23177 int ZextElt = -1;
23178 for (int i = 0; i != NumBVOps; ++i) {
23179 SDValue Op = BV->getOperand(i);
23180 if (Op.isUndef())
23181 continue;
23182 if (ZextElt == -1)
23183 ZextElt = i;
23184 else
23185 return SDValue();
23186 }
23187 // Bail out if there's no non-undef element.
23188 if (ZextElt == -1)
23189 return SDValue();
23190
23191 // The build vector contains some number of undef elements and exactly
23192 // one other element. That other element must be a zero-extended scalar
23193 // extracted from a vector at a constant index to turn this into a shuffle.
23194 // Also, require that the build vector does not implicitly truncate/extend
23195 // its elements.
23196 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23197 EVT VT = BV->getValueType(0);
23198 SDValue Zext = BV->getOperand(ZextElt);
23199 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23201 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23203 return SDValue();
23204
23205 // The zero-extend must be a multiple of the source size, and we must be
23206 // building a vector of the same size as the source of the extract element.
23207 SDValue Extract = Zext.getOperand(0);
23208 unsigned DestSize = Zext.getValueSizeInBits();
23209 unsigned SrcSize = Extract.getValueSizeInBits();
23210 if (DestSize % SrcSize != 0 ||
23211 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23212 return SDValue();
23213
23214 // Create a shuffle mask that will combine the extracted element with zeros
23215 // and undefs.
23216 int ZextRatio = DestSize / SrcSize;
23217 int NumMaskElts = NumBVOps * ZextRatio;
23218 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23219 for (int i = 0; i != NumMaskElts; ++i) {
23220 if (i / ZextRatio == ZextElt) {
23221 // The low bits of the (potentially translated) extracted element map to
23222 // the source vector. The high bits map to zero. We will use a zero vector
23223 // as the 2nd source operand of the shuffle, so use the 1st element of
23224 // that vector (mask value is number-of-elements) for the high bits.
23225 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23226 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23227 : NumMaskElts;
23228 }
23229
23230 // Undef elements of the build vector remain undef because we initialize
23231 // the shuffle mask with -1.
23232 }
23233
23234 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23235 // bitcast (shuffle V, ZeroVec, VectorMask)
23236 SDLoc DL(BV);
23237 EVT VecVT = Extract.getOperand(0).getValueType();
23238 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23239 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23240 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23241 ZeroVec, ShufMask, DAG);
23242 if (!Shuf)
23243 return SDValue();
23244 return DAG.getBitcast(VT, Shuf);
23245}
23246
23247// FIXME: promote to STLExtras.
23248template <typename R, typename T>
23249static auto getFirstIndexOf(R &&Range, const T &Val) {
23250 auto I = find(Range, Val);
23251 if (I == Range.end())
23252 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23253 return std::distance(Range.begin(), I);
23254}
23255
23256// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23257// operations. If the types of the vectors we're extracting from allow it,
23258// turn this into a vector_shuffle node.
23259SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23260 SDLoc DL(N);
23261 EVT VT = N->getValueType(0);
23262
23263 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23264 if (!isTypeLegal(VT))
23265 return SDValue();
23266
23268 return V;
23269
23270 // May only combine to shuffle after legalize if shuffle is legal.
23271 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23272 return SDValue();
23273
23274 bool UsesZeroVector = false;
23275 unsigned NumElems = N->getNumOperands();
23276
23277 // Record, for each element of the newly built vector, which input vector
23278 // that element comes from. -1 stands for undef, 0 for the zero vector,
23279 // and positive values for the input vectors.
23280 // VectorMask maps each element to its vector number, and VecIn maps vector
23281 // numbers to their initial SDValues.
23282
23283 SmallVector<int, 8> VectorMask(NumElems, -1);
23285 VecIn.push_back(SDValue());
23286
23287 for (unsigned i = 0; i != NumElems; ++i) {
23288 SDValue Op = N->getOperand(i);
23289
23290 if (Op.isUndef())
23291 continue;
23292
23293 // See if we can use a blend with a zero vector.
23294 // TODO: Should we generalize this to a blend with an arbitrary constant
23295 // vector?
23297 UsesZeroVector = true;
23298 VectorMask[i] = 0;
23299 continue;
23300 }
23301
23302 // Not an undef or zero. If the input is something other than an
23303 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23304 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23305 !isa<ConstantSDNode>(Op.getOperand(1)))
23306 return SDValue();
23307 SDValue ExtractedFromVec = Op.getOperand(0);
23308
23309 if (ExtractedFromVec.getValueType().isScalableVector())
23310 return SDValue();
23311
23312 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23313 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23314 return SDValue();
23315
23316 // All inputs must have the same element type as the output.
23317 if (VT.getVectorElementType() !=
23318 ExtractedFromVec.getValueType().getVectorElementType())
23319 return SDValue();
23320
23321 // Have we seen this input vector before?
23322 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23323 // a map back from SDValues to numbers isn't worth it.
23324 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23325 if (Idx == -1) { // A new source vector?
23326 Idx = VecIn.size();
23327 VecIn.push_back(ExtractedFromVec);
23328 }
23329
23330 VectorMask[i] = Idx;
23331 }
23332
23333 // If we didn't find at least one input vector, bail out.
23334 if (VecIn.size() < 2)
23335 return SDValue();
23336
23337 // If all the Operands of BUILD_VECTOR extract from same
23338 // vector, then split the vector efficiently based on the maximum
23339 // vector access index and adjust the VectorMask and
23340 // VecIn accordingly.
23341 bool DidSplitVec = false;
23342 if (VecIn.size() == 2) {
23343 unsigned MaxIndex = 0;
23344 unsigned NearestPow2 = 0;
23345 SDValue Vec = VecIn.back();
23346 EVT InVT = Vec.getValueType();
23347 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23348
23349 for (unsigned i = 0; i < NumElems; i++) {
23350 if (VectorMask[i] <= 0)
23351 continue;
23352 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23353 IndexVec[i] = Index;
23354 MaxIndex = std::max(MaxIndex, Index);
23355 }
23356
23357 NearestPow2 = PowerOf2Ceil(MaxIndex);
23358 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23359 NumElems * 2 < NearestPow2) {
23360 unsigned SplitSize = NearestPow2 / 2;
23361 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23362 InVT.getVectorElementType(), SplitSize);
23363 if (TLI.isTypeLegal(SplitVT) &&
23364 SplitSize + SplitVT.getVectorNumElements() <=
23365 InVT.getVectorNumElements()) {
23366 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23367 DAG.getVectorIdxConstant(SplitSize, DL));
23368 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23369 DAG.getVectorIdxConstant(0, DL));
23370 VecIn.pop_back();
23371 VecIn.push_back(VecIn1);
23372 VecIn.push_back(VecIn2);
23373 DidSplitVec = true;
23374
23375 for (unsigned i = 0; i < NumElems; i++) {
23376 if (VectorMask[i] <= 0)
23377 continue;
23378 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23379 }
23380 }
23381 }
23382 }
23383
23384 // Sort input vectors by decreasing vector element count,
23385 // while preserving the relative order of equally-sized vectors.
23386 // Note that we keep the first "implicit zero vector as-is.
23387 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23388 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23389 [](const SDValue &a, const SDValue &b) {
23390 return a.getValueType().getVectorNumElements() >
23391 b.getValueType().getVectorNumElements();
23392 });
23393
23394 // We now also need to rebuild the VectorMask, because it referenced element
23395 // order in VecIn, and we just sorted them.
23396 for (int &SourceVectorIndex : VectorMask) {
23397 if (SourceVectorIndex <= 0)
23398 continue;
23399 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23400 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23401 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23402 SourceVectorIndex = Idx;
23403 }
23404
23405 VecIn = std::move(SortedVecIn);
23406
23407 // TODO: Should this fire if some of the input vectors has illegal type (like
23408 // it does now), or should we let legalization run its course first?
23409
23410 // Shuffle phase:
23411 // Take pairs of vectors, and shuffle them so that the result has elements
23412 // from these vectors in the correct places.
23413 // For example, given:
23414 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23415 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23416 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23417 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23418 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23419 // We will generate:
23420 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23421 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23422 SmallVector<SDValue, 4> Shuffles;
23423 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23424 unsigned LeftIdx = 2 * In + 1;
23425 SDValue VecLeft = VecIn[LeftIdx];
23426 SDValue VecRight =
23427 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23428
23429 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23430 VecRight, LeftIdx, DidSplitVec))
23431 Shuffles.push_back(Shuffle);
23432 else
23433 return SDValue();
23434 }
23435
23436 // If we need the zero vector as an "ingredient" in the blend tree, add it
23437 // to the list of shuffles.
23438 if (UsesZeroVector)
23439 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23440 : DAG.getConstantFP(0.0, DL, VT));
23441
23442 // If we only have one shuffle, we're done.
23443 if (Shuffles.size() == 1)
23444 return Shuffles[0];
23445
23446 // Update the vector mask to point to the post-shuffle vectors.
23447 for (int &Vec : VectorMask)
23448 if (Vec == 0)
23449 Vec = Shuffles.size() - 1;
23450 else
23451 Vec = (Vec - 1) / 2;
23452
23453 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23454 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23455 // generate:
23456 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23457 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23458 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23459 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23460 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23461 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23462 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23463
23464 // Make sure the initial size of the shuffle list is even.
23465 if (Shuffles.size() % 2)
23466 Shuffles.push_back(DAG.getUNDEF(VT));
23467
23468 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23469 if (CurSize % 2) {
23470 Shuffles[CurSize] = DAG.getUNDEF(VT);
23471 CurSize++;
23472 }
23473 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23474 int Left = 2 * In;
23475 int Right = 2 * In + 1;
23476 SmallVector<int, 8> Mask(NumElems, -1);
23477 SDValue L = Shuffles[Left];
23478 ArrayRef<int> LMask;
23479 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23480 L.use_empty() && L.getOperand(1).isUndef() &&
23481 L.getOperand(0).getValueType() == L.getValueType();
23482 if (IsLeftShuffle) {
23483 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23484 L = L.getOperand(0);
23485 }
23486 SDValue R = Shuffles[Right];
23487 ArrayRef<int> RMask;
23488 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23489 R.use_empty() && R.getOperand(1).isUndef() &&
23490 R.getOperand(0).getValueType() == R.getValueType();
23491 if (IsRightShuffle) {
23492 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23493 R = R.getOperand(0);
23494 }
23495 for (unsigned I = 0; I != NumElems; ++I) {
23496 if (VectorMask[I] == Left) {
23497 Mask[I] = I;
23498 if (IsLeftShuffle)
23499 Mask[I] = LMask[I];
23500 VectorMask[I] = In;
23501 } else if (VectorMask[I] == Right) {
23502 Mask[I] = I + NumElems;
23503 if (IsRightShuffle)
23504 Mask[I] = RMask[I] + NumElems;
23505 VectorMask[I] = In;
23506 }
23507 }
23508
23509 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23510 }
23511 }
23512 return Shuffles[0];
23513}
23514
23515// Try to turn a build vector of zero extends of extract vector elts into a
23516// a vector zero extend and possibly an extract subvector.
23517// TODO: Support sign extend?
23518// TODO: Allow undef elements?
23519SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23520 if (LegalOperations)
23521 return SDValue();
23522
23523 EVT VT = N->getValueType(0);
23524
23525 bool FoundZeroExtend = false;
23526 SDValue Op0 = N->getOperand(0);
23527 auto checkElem = [&](SDValue Op) -> int64_t {
23528 unsigned Opc = Op.getOpcode();
23529 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23530 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23531 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23532 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23533 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23534 return C->getZExtValue();
23535 return -1;
23536 };
23537
23538 // Make sure the first element matches
23539 // (zext (extract_vector_elt X, C))
23540 // Offset must be a constant multiple of the
23541 // known-minimum vector length of the result type.
23542 int64_t Offset = checkElem(Op0);
23543 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23544 return SDValue();
23545
23546 unsigned NumElems = N->getNumOperands();
23547 SDValue In = Op0.getOperand(0).getOperand(0);
23548 EVT InSVT = In.getValueType().getScalarType();
23549 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23550
23551 // Don't create an illegal input type after type legalization.
23552 if (LegalTypes && !TLI.isTypeLegal(InVT))
23553 return SDValue();
23554
23555 // Ensure all the elements come from the same vector and are adjacent.
23556 for (unsigned i = 1; i != NumElems; ++i) {
23557 if ((Offset + i) != checkElem(N->getOperand(i)))
23558 return SDValue();
23559 }
23560
23561 SDLoc DL(N);
23562 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23563 Op0.getOperand(0).getOperand(1));
23564 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23565 VT, In);
23566}
23567
23568// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23569// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23570// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23571// This patten can appear during legalization.
23572//
23573// NOTE: This can be generalized to allow more than a single
23574// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23575SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23576 // Don't run this after legalization. Targets may have other preferences.
23577 if (Level >= AfterLegalizeDAG)
23578 return SDValue();
23579
23580 // FIXME: support big-endian.
23581 if (DAG.getDataLayout().isBigEndian())
23582 return SDValue();
23583
23584 EVT VT = N->getValueType(0);
23585 EVT OpVT = N->getOperand(0).getValueType();
23586 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23587
23588 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23589
23590 if (!TLI.isTypeLegal(OpIntVT) ||
23591 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23592 return SDValue();
23593
23594 unsigned EltBitwidth = VT.getScalarSizeInBits();
23595 // NOTE: the actual width of operands may be wider than that!
23596
23597 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23598 // active bits they all have? We'll want to truncate them all to that width.
23599 unsigned ActiveBits = 0;
23600 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23601 for (auto I : enumerate(N->ops())) {
23602 SDValue Op = I.value();
23603 // FIXME: support UNDEF elements?
23604 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23605 unsigned OpActiveBits =
23606 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23607 if (OpActiveBits == 0) {
23608 KnownZeroOps.setBit(I.index());
23609 continue;
23610 }
23611 // Profitability check: don't allow non-zero constant operands.
23612 return SDValue();
23613 }
23614 // Profitability check: there must only be a single non-zero operand,
23615 // and it must be the first operand of the BUILD_VECTOR.
23616 if (I.index() != 0)
23617 return SDValue();
23618 // The operand must be a zero-extension itself.
23619 // FIXME: this could be generalized to known leading zeros check.
23620 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23621 return SDValue();
23622 unsigned CurrActiveBits =
23623 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23624 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23625 ActiveBits = CurrActiveBits;
23626 // We want to at least halve the element size.
23627 if (2 * ActiveBits > EltBitwidth)
23628 return SDValue();
23629 }
23630
23631 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23632 if (ActiveBits == 0)
23633 return SDValue();
23634
23635 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23636 // into how many chunks can we split our element width?
23637 EVT NewScalarIntVT, NewIntVT;
23638 std::optional<unsigned> Factor;
23639 // We can split the element into at least two chunks, but not into more
23640 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23641 // for which the element width is a multiple of it,
23642 // and the resulting types/operations on that chunk width are legal.
23643 assert(2 * ActiveBits <= EltBitwidth &&
23644 "We know that half or less bits of the element are active.");
23645 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23646 if (EltBitwidth % Scale != 0)
23647 continue;
23648 unsigned ChunkBitwidth = EltBitwidth / Scale;
23649 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23650 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23651 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23652 Scale * N->getNumOperands());
23653 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23654 (LegalOperations &&
23655 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23657 continue;
23658 Factor = Scale;
23659 break;
23660 }
23661 if (!Factor)
23662 return SDValue();
23663
23664 SDLoc DL(N);
23665 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23666
23667 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23669 NewOps.reserve(NewIntVT.getVectorNumElements());
23670 for (auto I : enumerate(N->ops())) {
23671 SDValue Op = I.value();
23672 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23673 unsigned SrcOpIdx = I.index();
23674 if (KnownZeroOps[SrcOpIdx]) {
23675 NewOps.append(*Factor, ZeroOp);
23676 continue;
23677 }
23678 Op = DAG.getBitcast(OpIntVT, Op);
23679 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23680 NewOps.emplace_back(Op);
23681 NewOps.append(*Factor - 1, ZeroOp);
23682 }
23683 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23684 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23685 NewBV = DAG.getBitcast(VT, NewBV);
23686 return NewBV;
23687}
23688
23689SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23690 EVT VT = N->getValueType(0);
23691
23692 // A vector built entirely of undefs is undef.
23694 return DAG.getUNDEF(VT);
23695
23696 // If this is a splat of a bitcast from another vector, change to a
23697 // concat_vector.
23698 // For example:
23699 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23700 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23701 //
23702 // If X is a build_vector itself, the concat can become a larger build_vector.
23703 // TODO: Maybe this is useful for non-splat too?
23704 if (!LegalOperations) {
23705 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23706 // Only change build_vector to a concat_vector if the splat value type is
23707 // same as the vector element type.
23708 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23710 EVT SrcVT = Splat.getValueType();
23711 if (SrcVT.isVector()) {
23712 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23713 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23714 SrcVT.getVectorElementType(), NumElts);
23715 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23716 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23717 SDValue Concat =
23718 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23719 return DAG.getBitcast(VT, Concat);
23720 }
23721 }
23722 }
23723 }
23724
23725 // Check if we can express BUILD VECTOR via subvector extract.
23726 if (!LegalTypes && (N->getNumOperands() > 1)) {
23727 SDValue Op0 = N->getOperand(0);
23728 auto checkElem = [&](SDValue Op) -> uint64_t {
23729 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23730 (Op0.getOperand(0) == Op.getOperand(0)))
23731 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23732 return CNode->getZExtValue();
23733 return -1;
23734 };
23735
23736 int Offset = checkElem(Op0);
23737 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23738 if (Offset + i != checkElem(N->getOperand(i))) {
23739 Offset = -1;
23740 break;
23741 }
23742 }
23743
23744 if ((Offset == 0) &&
23745 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23746 return Op0.getOperand(0);
23747 if ((Offset != -1) &&
23748 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23749 0)) // IDX must be multiple of output size.
23750 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23751 Op0.getOperand(0), Op0.getOperand(1));
23752 }
23753
23754 if (SDValue V = convertBuildVecZextToZext(N))
23755 return V;
23756
23757 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23758 return V;
23759
23760 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23761 return V;
23762
23763 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23764 return V;
23765
23766 if (SDValue V = reduceBuildVecToShuffle(N))
23767 return V;
23768
23769 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23770 // Do this late as some of the above may replace the splat.
23772 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23773 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23774 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23775 }
23776
23777 return SDValue();
23778}
23779
23781 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23782 EVT OpVT = N->getOperand(0).getValueType();
23783
23784 // If the operands are legal vectors, leave them alone.
23785 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23786 return SDValue();
23787
23788 SDLoc DL(N);
23789 EVT VT = N->getValueType(0);
23791 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23792
23793 // Keep track of what we encounter.
23794 bool AnyInteger = false;
23795 bool AnyFP = false;
23796 for (const SDValue &Op : N->ops()) {
23797 if (ISD::BITCAST == Op.getOpcode() &&
23798 !Op.getOperand(0).getValueType().isVector())
23799 Ops.push_back(Op.getOperand(0));
23800 else if (ISD::UNDEF == Op.getOpcode())
23801 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23802 else
23803 return SDValue();
23804
23805 // Note whether we encounter an integer or floating point scalar.
23806 // If it's neither, bail out, it could be something weird like x86mmx.
23807 EVT LastOpVT = Ops.back().getValueType();
23808 if (LastOpVT.isFloatingPoint())
23809 AnyFP = true;
23810 else if (LastOpVT.isInteger())
23811 AnyInteger = true;
23812 else
23813 return SDValue();
23814 }
23815
23816 // If any of the operands is a floating point scalar bitcast to a vector,
23817 // use floating point types throughout, and bitcast everything.
23818 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23819 if (AnyFP) {
23821 if (AnyInteger) {
23822 for (SDValue &Op : Ops) {
23823 if (Op.getValueType() == SVT)
23824 continue;
23825 if (Op.isUndef())
23826 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23827 else
23828 Op = DAG.getBitcast(SVT, Op);
23829 }
23830 }
23831 }
23832
23833 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23834 VT.getSizeInBits() / SVT.getSizeInBits());
23835 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23836}
23837
23838// Attempt to merge nested concat_vectors/undefs.
23839// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23840// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23842 SelectionDAG &DAG) {
23843 EVT VT = N->getValueType(0);
23844
23845 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23846 EVT SubVT;
23847 SDValue FirstConcat;
23848 for (const SDValue &Op : N->ops()) {
23849 if (Op.isUndef())
23850 continue;
23851 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23852 return SDValue();
23853 if (!FirstConcat) {
23854 SubVT = Op.getOperand(0).getValueType();
23855 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23856 return SDValue();
23857 FirstConcat = Op;
23858 continue;
23859 }
23860 if (SubVT != Op.getOperand(0).getValueType())
23861 return SDValue();
23862 }
23863 assert(FirstConcat && "Concat of all-undefs found");
23864
23865 SmallVector<SDValue> ConcatOps;
23866 for (const SDValue &Op : N->ops()) {
23867 if (Op.isUndef()) {
23868 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23869 continue;
23870 }
23871 ConcatOps.append(Op->op_begin(), Op->op_end());
23872 }
23873 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23874}
23875
23876// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23877// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23878// most two distinct vectors the same size as the result, attempt to turn this
23879// into a legal shuffle.
23881 EVT VT = N->getValueType(0);
23882 EVT OpVT = N->getOperand(0).getValueType();
23883
23884 // We currently can't generate an appropriate shuffle for a scalable vector.
23885 if (VT.isScalableVector())
23886 return SDValue();
23887
23888 int NumElts = VT.getVectorNumElements();
23889 int NumOpElts = OpVT.getVectorNumElements();
23890
23891 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23893
23894 for (SDValue Op : N->ops()) {
23896
23897 // UNDEF nodes convert to UNDEF shuffle mask values.
23898 if (Op.isUndef()) {
23899 Mask.append((unsigned)NumOpElts, -1);
23900 continue;
23901 }
23902
23903 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23904 return SDValue();
23905
23906 // What vector are we extracting the subvector from and at what index?
23907 SDValue ExtVec = Op.getOperand(0);
23908 int ExtIdx = Op.getConstantOperandVal(1);
23909
23910 // We want the EVT of the original extraction to correctly scale the
23911 // extraction index.
23912 EVT ExtVT = ExtVec.getValueType();
23913 ExtVec = peekThroughBitcasts(ExtVec);
23914
23915 // UNDEF nodes convert to UNDEF shuffle mask values.
23916 if (ExtVec.isUndef()) {
23917 Mask.append((unsigned)NumOpElts, -1);
23918 continue;
23919 }
23920
23921 // Ensure that we are extracting a subvector from a vector the same
23922 // size as the result.
23923 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23924 return SDValue();
23925
23926 // Scale the subvector index to account for any bitcast.
23927 int NumExtElts = ExtVT.getVectorNumElements();
23928 if (0 == (NumExtElts % NumElts))
23929 ExtIdx /= (NumExtElts / NumElts);
23930 else if (0 == (NumElts % NumExtElts))
23931 ExtIdx *= (NumElts / NumExtElts);
23932 else
23933 return SDValue();
23934
23935 // At most we can reference 2 inputs in the final shuffle.
23936 if (SV0.isUndef() || SV0 == ExtVec) {
23937 SV0 = ExtVec;
23938 for (int i = 0; i != NumOpElts; ++i)
23939 Mask.push_back(i + ExtIdx);
23940 } else if (SV1.isUndef() || SV1 == ExtVec) {
23941 SV1 = ExtVec;
23942 for (int i = 0; i != NumOpElts; ++i)
23943 Mask.push_back(i + ExtIdx + NumElts);
23944 } else {
23945 return SDValue();
23946 }
23947 }
23948
23949 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23950 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23951 DAG.getBitcast(VT, SV1), Mask, DAG);
23952}
23953
23955 unsigned CastOpcode = N->getOperand(0).getOpcode();
23956 switch (CastOpcode) {
23957 case ISD::SINT_TO_FP:
23958 case ISD::UINT_TO_FP:
23959 case ISD::FP_TO_SINT:
23960 case ISD::FP_TO_UINT:
23961 // TODO: Allow more opcodes?
23962 // case ISD::BITCAST:
23963 // case ISD::TRUNCATE:
23964 // case ISD::ZERO_EXTEND:
23965 // case ISD::SIGN_EXTEND:
23966 // case ISD::FP_EXTEND:
23967 break;
23968 default:
23969 return SDValue();
23970 }
23971
23972 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23973 if (!SrcVT.isVector())
23974 return SDValue();
23975
23976 // All operands of the concat must be the same kind of cast from the same
23977 // source type.
23979 for (SDValue Op : N->ops()) {
23980 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23981 Op.getOperand(0).getValueType() != SrcVT)
23982 return SDValue();
23983 SrcOps.push_back(Op.getOperand(0));
23984 }
23985
23986 // The wider cast must be supported by the target. This is unusual because
23987 // the operation support type parameter depends on the opcode. In addition,
23988 // check the other type in the cast to make sure this is really legal.
23989 EVT VT = N->getValueType(0);
23990 EVT SrcEltVT = SrcVT.getVectorElementType();
23991 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23992 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23993 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23994 switch (CastOpcode) {
23995 case ISD::SINT_TO_FP:
23996 case ISD::UINT_TO_FP:
23997 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23998 !TLI.isTypeLegal(VT))
23999 return SDValue();
24000 break;
24001 case ISD::FP_TO_SINT:
24002 case ISD::FP_TO_UINT:
24003 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24004 !TLI.isTypeLegal(ConcatSrcVT))
24005 return SDValue();
24006 break;
24007 default:
24008 llvm_unreachable("Unexpected cast opcode");
24009 }
24010
24011 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24012 SDLoc DL(N);
24013 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24014 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24015}
24016
24017// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24018// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24019// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24021 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24022 bool LegalOperations) {
24023 EVT VT = N->getValueType(0);
24024 EVT OpVT = N->getOperand(0).getValueType();
24025 if (VT.isScalableVector())
24026 return SDValue();
24027
24028 // For now, only allow simple 2-operand concatenations.
24029 if (N->getNumOperands() != 2)
24030 return SDValue();
24031
24032 // Don't create illegal types/shuffles when not allowed to.
24033 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24034 (LegalOperations &&
24036 return SDValue();
24037
24038 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24039 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24040 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24041 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24042 // (4) and for now, the SHUFFLE_VECTOR must be unary.
24043 ShuffleVectorSDNode *SVN = nullptr;
24044 for (SDValue Op : N->ops()) {
24045 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24046 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24047 all_of(N->ops(), [CurSVN](SDValue Op) {
24048 // FIXME: can we allow UNDEF operands?
24049 return !Op.isUndef() &&
24050 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24051 })) {
24052 SVN = CurSVN;
24053 break;
24054 }
24055 }
24056 if (!SVN)
24057 return SDValue();
24058
24059 // We are going to pad the shuffle operands, so any indice, that was picking
24060 // from the second operand, must be adjusted.
24061 SmallVector<int, 16> AdjustedMask;
24062 AdjustedMask.reserve(SVN->getMask().size());
24063 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24064 append_range(AdjustedMask, SVN->getMask());
24065
24066 // Identity masks for the operands of the (padded) shuffle.
24067 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24068 MutableArrayRef<int> FirstShufOpIdentityMask =
24069 MutableArrayRef<int>(IdentityMask)
24071 MutableArrayRef<int> SecondShufOpIdentityMask =
24073 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24074 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24076
24077 // New combined shuffle mask.
24079 Mask.reserve(VT.getVectorNumElements());
24080 for (SDValue Op : N->ops()) {
24081 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24082 if (Op.getNode() == SVN) {
24083 append_range(Mask, AdjustedMask);
24084 continue;
24085 }
24086 if (Op == SVN->getOperand(0)) {
24087 append_range(Mask, FirstShufOpIdentityMask);
24088 continue;
24089 }
24090 if (Op == SVN->getOperand(1)) {
24091 append_range(Mask, SecondShufOpIdentityMask);
24092 continue;
24093 }
24094 llvm_unreachable("Unexpected operand!");
24095 }
24096
24097 // Don't create illegal shuffle masks.
24098 if (!TLI.isShuffleMaskLegal(Mask, VT))
24099 return SDValue();
24100
24101 // Pad the shuffle operands with UNDEF.
24102 SDLoc dl(N);
24103 std::array<SDValue, 2> ShufOps;
24104 for (auto I : zip(SVN->ops(), ShufOps)) {
24105 SDValue ShufOp = std::get<0>(I);
24106 SDValue &NewShufOp = std::get<1>(I);
24107 if (ShufOp.isUndef())
24108 NewShufOp = DAG.getUNDEF(VT);
24109 else {
24110 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24111 DAG.getUNDEF(OpVT));
24112 ShufOpParts[0] = ShufOp;
24113 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24114 }
24115 }
24116 // Finally, create the new wide shuffle.
24117 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24118}
24119
24120SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24121 // If we only have one input vector, we don't need to do any concatenation.
24122 if (N->getNumOperands() == 1)
24123 return N->getOperand(0);
24124
24125 // Check if all of the operands are undefs.
24126 EVT VT = N->getValueType(0);
24128 return DAG.getUNDEF(VT);
24129
24130 // Optimize concat_vectors where all but the first of the vectors are undef.
24131 if (all_of(drop_begin(N->ops()),
24132 [](const SDValue &Op) { return Op.isUndef(); })) {
24133 SDValue In = N->getOperand(0);
24134 assert(In.getValueType().isVector() && "Must concat vectors");
24135
24136 // If the input is a concat_vectors, just make a larger concat by padding
24137 // with smaller undefs.
24138 //
24139 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24140 // here could cause an infinite loop. That legalizing happens when LegalDAG
24141 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24142 // scalable.
24143 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24144 !(LegalDAG && In.getValueType().isScalableVector())) {
24145 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24146 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
24147 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24148 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24149 }
24150
24152
24153 // concat_vectors(scalar_to_vector(scalar), undef) ->
24154 // scalar_to_vector(scalar)
24155 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24156 Scalar.hasOneUse()) {
24157 EVT SVT = Scalar.getValueType().getVectorElementType();
24158 if (SVT == Scalar.getOperand(0).getValueType())
24159 Scalar = Scalar.getOperand(0);
24160 }
24161
24162 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24163 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24164 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24165 // look through the trunc so we can still do the transform:
24166 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24167 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24168 !TLI.isTypeLegal(Scalar.getValueType()) &&
24169 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24170 Scalar = Scalar->getOperand(0);
24171
24172 EVT SclTy = Scalar.getValueType();
24173
24174 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24175 return SDValue();
24176
24177 // Bail out if the vector size is not a multiple of the scalar size.
24178 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24179 return SDValue();
24180
24181 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24182 if (VNTNumElms < 2)
24183 return SDValue();
24184
24185 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24186 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24187 return SDValue();
24188
24189 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24190 return DAG.getBitcast(VT, Res);
24191 }
24192 }
24193
24194 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24195 // We have already tested above for an UNDEF only concatenation.
24196 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24197 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24198 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24199 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24200 };
24201 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24203 EVT SVT = VT.getScalarType();
24204
24205 EVT MinVT = SVT;
24206 if (!SVT.isFloatingPoint()) {
24207 // If BUILD_VECTOR are from built from integer, they may have different
24208 // operand types. Get the smallest type and truncate all operands to it.
24209 bool FoundMinVT = false;
24210 for (const SDValue &Op : N->ops())
24211 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24212 EVT OpSVT = Op.getOperand(0).getValueType();
24213 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24214 FoundMinVT = true;
24215 }
24216 assert(FoundMinVT && "Concat vector type mismatch");
24217 }
24218
24219 for (const SDValue &Op : N->ops()) {
24220 EVT OpVT = Op.getValueType();
24221 unsigned NumElts = OpVT.getVectorNumElements();
24222
24223 if (ISD::UNDEF == Op.getOpcode())
24224 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24225
24226 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24227 if (SVT.isFloatingPoint()) {
24228 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24229 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24230 } else {
24231 for (unsigned i = 0; i != NumElts; ++i)
24232 Opnds.push_back(
24233 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24234 }
24235 }
24236 }
24237
24238 assert(VT.getVectorNumElements() == Opnds.size() &&
24239 "Concat vector type mismatch");
24240 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24241 }
24242
24243 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24244 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24246 return V;
24247
24248 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24249 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24251 return V;
24252
24253 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24255 return V;
24256 }
24257
24258 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24259 return V;
24260
24262 N, DAG, TLI, LegalTypes, LegalOperations))
24263 return V;
24264
24265 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24266 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24267 // operands and look for a CONCAT operations that place the incoming vectors
24268 // at the exact same location.
24269 //
24270 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24271 SDValue SingleSource = SDValue();
24272 unsigned PartNumElem =
24273 N->getOperand(0).getValueType().getVectorMinNumElements();
24274
24275 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24276 SDValue Op = N->getOperand(i);
24277
24278 if (Op.isUndef())
24279 continue;
24280
24281 // Check if this is the identity extract:
24282 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24283 return SDValue();
24284
24285 // Find the single incoming vector for the extract_subvector.
24286 if (SingleSource.getNode()) {
24287 if (Op.getOperand(0) != SingleSource)
24288 return SDValue();
24289 } else {
24290 SingleSource = Op.getOperand(0);
24291
24292 // Check the source type is the same as the type of the result.
24293 // If not, this concat may extend the vector, so we can not
24294 // optimize it away.
24295 if (SingleSource.getValueType() != N->getValueType(0))
24296 return SDValue();
24297 }
24298
24299 // Check that we are reading from the identity index.
24300 unsigned IdentityIndex = i * PartNumElem;
24301 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24302 return SDValue();
24303 }
24304
24305 if (SingleSource.getNode())
24306 return SingleSource;
24307
24308 return SDValue();
24309}
24310
24311// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24312// if the subvector can be sourced for free.
24314 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24315 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24316 return V.getOperand(1);
24317 }
24318 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24319 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24320 V.getOperand(0).getValueType() == SubVT &&
24321 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24322 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24323 return V.getOperand(SubIdx);
24324 }
24325 return SDValue();
24326}
24327
24329 SelectionDAG &DAG,
24330 bool LegalOperations) {
24331 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24332 SDValue BinOp = Extract->getOperand(0);
24333 unsigned BinOpcode = BinOp.getOpcode();
24334 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24335 return SDValue();
24336
24337 EVT VecVT = BinOp.getValueType();
24338 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24339 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24340 return SDValue();
24341
24342 SDValue Index = Extract->getOperand(1);
24343 EVT SubVT = Extract->getValueType(0);
24344 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24345 return SDValue();
24346
24347 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24348 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24349
24350 // TODO: We could handle the case where only 1 operand is being inserted by
24351 // creating an extract of the other operand, but that requires checking
24352 // number of uses and/or costs.
24353 if (!Sub0 || !Sub1)
24354 return SDValue();
24355
24356 // We are inserting both operands of the wide binop only to extract back
24357 // to the narrow vector size. Eliminate all of the insert/extract:
24358 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24359 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24360 BinOp->getFlags());
24361}
24362
24363/// If we are extracting a subvector produced by a wide binary operator try
24364/// to use a narrow binary operator and/or avoid concatenation and extraction.
24366 bool LegalOperations) {
24367 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24368 // some of these bailouts with other transforms.
24369
24370 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24371 return V;
24372
24373 // The extract index must be a constant, so we can map it to a concat operand.
24374 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24375 if (!ExtractIndexC)
24376 return SDValue();
24377
24378 // We are looking for an optionally bitcasted wide vector binary operator
24379 // feeding an extract subvector.
24380 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24381 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24382 unsigned BOpcode = BinOp.getOpcode();
24383 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24384 return SDValue();
24385
24386 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24387 // reduced to the unary fneg when it is visited, and we probably want to deal
24388 // with fneg in a target-specific way.
24389 if (BOpcode == ISD::FSUB) {
24390 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24391 if (C && C->getValueAPF().isNegZero())
24392 return SDValue();
24393 }
24394
24395 // The binop must be a vector type, so we can extract some fraction of it.
24396 EVT WideBVT = BinOp.getValueType();
24397 // The optimisations below currently assume we are dealing with fixed length
24398 // vectors. It is possible to add support for scalable vectors, but at the
24399 // moment we've done no analysis to prove whether they are profitable or not.
24400 if (!WideBVT.isFixedLengthVector())
24401 return SDValue();
24402
24403 EVT VT = Extract->getValueType(0);
24404 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24405 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24406 "Extract index is not a multiple of the vector length.");
24407
24408 // Bail out if this is not a proper multiple width extraction.
24409 unsigned WideWidth = WideBVT.getSizeInBits();
24410 unsigned NarrowWidth = VT.getSizeInBits();
24411 if (WideWidth % NarrowWidth != 0)
24412 return SDValue();
24413
24414 // Bail out if we are extracting a fraction of a single operation. This can
24415 // occur because we potentially looked through a bitcast of the binop.
24416 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24417 unsigned WideNumElts = WideBVT.getVectorNumElements();
24418 if (WideNumElts % NarrowingRatio != 0)
24419 return SDValue();
24420
24421 // Bail out if the target does not support a narrower version of the binop.
24422 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24423 WideNumElts / NarrowingRatio);
24424 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24425 LegalOperations))
24426 return SDValue();
24427
24428 // If extraction is cheap, we don't need to look at the binop operands
24429 // for concat ops. The narrow binop alone makes this transform profitable.
24430 // We can't just reuse the original extract index operand because we may have
24431 // bitcasted.
24432 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24433 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24434 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24435 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24436 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24437 SDLoc DL(Extract);
24438 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24439 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24440 BinOp.getOperand(0), NewExtIndex);
24441 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24442 BinOp.getOperand(1), NewExtIndex);
24443 SDValue NarrowBinOp =
24444 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24445 return DAG.getBitcast(VT, NarrowBinOp);
24446 }
24447
24448 // Only handle the case where we are doubling and then halving. A larger ratio
24449 // may require more than two narrow binops to replace the wide binop.
24450 if (NarrowingRatio != 2)
24451 return SDValue();
24452
24453 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24454 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24455 // flavors, but no other 256-bit integer support. This could be extended to
24456 // handle any binop, but that may require fixing/adding other folds to avoid
24457 // codegen regressions.
24458 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24459 return SDValue();
24460
24461 // We need at least one concatenation operation of a binop operand to make
24462 // this transform worthwhile. The concat must double the input vector sizes.
24463 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24464 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24465 return V.getOperand(ConcatOpNum);
24466 return SDValue();
24467 };
24468 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24469 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24470
24471 if (SubVecL || SubVecR) {
24472 // If a binop operand was not the result of a concat, we must extract a
24473 // half-sized operand for our new narrow binop:
24474 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24475 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24476 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24477 SDLoc DL(Extract);
24478 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24479 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24480 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24481 BinOp.getOperand(0), IndexC);
24482
24483 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24484 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24485 BinOp.getOperand(1), IndexC);
24486
24487 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24488 return DAG.getBitcast(VT, NarrowBinOp);
24489 }
24490
24491 return SDValue();
24492}
24493
24494/// If we are extracting a subvector from a wide vector load, convert to a
24495/// narrow load to eliminate the extraction:
24496/// (extract_subvector (load wide vector)) --> (load narrow vector)
24498 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24499 if (DAG.getDataLayout().isBigEndian())
24500 return SDValue();
24501
24502 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24503 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24504 return SDValue();
24505
24506 // Allow targets to opt-out.
24507 EVT VT = Extract->getValueType(0);
24508
24509 // We can only create byte sized loads.
24510 if (!VT.isByteSized())
24511 return SDValue();
24512
24513 unsigned Index = Extract->getConstantOperandVal(1);
24514 unsigned NumElts = VT.getVectorMinNumElements();
24515 // A fixed length vector being extracted from a scalable vector
24516 // may not be any *smaller* than the scalable one.
24517 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24518 return SDValue();
24519
24520 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24521 // multiple of the minimum number of elements in the result type.
24522 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24523 "multiple of the result's element count");
24524
24525 // It's fine to use TypeSize here as we know the offset will not be negative.
24526 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24527
24528 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24529 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24530 return SDValue();
24531
24532 // The narrow load will be offset from the base address of the old load if
24533 // we are extracting from something besides index 0 (little-endian).
24534 SDLoc DL(Extract);
24535
24536 // TODO: Use "BaseIndexOffset" to make this more effective.
24537 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24538
24541 MachineMemOperand *MMO;
24542 if (Offset.isScalable()) {
24543 MachinePointerInfo MPI =
24545 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24546 } else
24547 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24548 StoreSize);
24549
24550 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24551 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24552 return NewLd;
24553}
24554
24555/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24556/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24557/// EXTRACT_SUBVECTOR(Op?, ?),
24558/// Mask'))
24559/// iff it is legal and profitable to do so. Notably, the trimmed mask
24560/// (containing only the elements that are extracted)
24561/// must reference at most two subvectors.
24563 SelectionDAG &DAG,
24564 const TargetLowering &TLI,
24565 bool LegalOperations) {
24566 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24567 "Must only be called on EXTRACT_SUBVECTOR's");
24568
24569 SDValue N0 = N->getOperand(0);
24570
24571 // Only deal with non-scalable vectors.
24572 EVT NarrowVT = N->getValueType(0);
24573 EVT WideVT = N0.getValueType();
24574 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24575 return SDValue();
24576
24577 // The operand must be a shufflevector.
24578 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24579 if (!WideShuffleVector)
24580 return SDValue();
24581
24582 // The old shuffleneeds to go away.
24583 if (!WideShuffleVector->hasOneUse())
24584 return SDValue();
24585
24586 // And the narrow shufflevector that we'll form must be legal.
24587 if (LegalOperations &&
24589 return SDValue();
24590
24591 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24592 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24593 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24594 "Extract index is not a multiple of the output vector length.");
24595
24596 int WideNumElts = WideVT.getVectorNumElements();
24597
24598 SmallVector<int, 16> NewMask;
24599 NewMask.reserve(NumEltsExtracted);
24600 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24601 DemandedSubvectors;
24602
24603 // Try to decode the wide mask into narrow mask from at most two subvectors.
24604 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24605 NumEltsExtracted)) {
24606 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24607 "Out-of-bounds shuffle mask?");
24608
24609 if (M < 0) {
24610 // Does not depend on operands, does not require adjustment.
24611 NewMask.emplace_back(M);
24612 continue;
24613 }
24614
24615 // From which operand of the shuffle does this shuffle mask element pick?
24616 int WideShufOpIdx = M / WideNumElts;
24617 // Which element of that operand is picked?
24618 int OpEltIdx = M % WideNumElts;
24619
24620 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24621 "Shuffle mask vector decomposition failure.");
24622
24623 // And which NumEltsExtracted-sized subvector of that operand is that?
24624 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24625 // And which element within that subvector of that operand is that?
24626 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24627
24628 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24629 "Shuffle mask subvector decomposition failure.");
24630
24631 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24632 WideShufOpIdx * WideNumElts) == M &&
24633 "Shuffle mask full decomposition failure.");
24634
24635 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24636
24637 if (Op.isUndef()) {
24638 // Picking from an undef operand. Let's adjust mask instead.
24639 NewMask.emplace_back(-1);
24640 continue;
24641 }
24642
24643 const std::pair<SDValue, int> DemandedSubvector =
24644 std::make_pair(Op, OpSubvecIdx);
24645
24646 if (DemandedSubvectors.insert(DemandedSubvector)) {
24647 if (DemandedSubvectors.size() > 2)
24648 return SDValue(); // We can't handle more than two subvectors.
24649 // How many elements into the WideVT does this subvector start?
24650 int Index = NumEltsExtracted * OpSubvecIdx;
24651 // Bail out if the extraction isn't going to be cheap.
24652 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24653 return SDValue();
24654 }
24655
24656 // Ok, but from which operand of the new shuffle will this element pick?
24657 int NewOpIdx =
24658 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24659 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24660
24661 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24662 NewMask.emplace_back(AdjM);
24663 }
24664 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24665 assert(DemandedSubvectors.size() <= 2 &&
24666 "Should have ended up demanding at most two subvectors.");
24667
24668 // Did we discover that the shuffle does not actually depend on operands?
24669 if (DemandedSubvectors.empty())
24670 return DAG.getUNDEF(NarrowVT);
24671
24672 // Profitability check: only deal with extractions from the first subvector
24673 // unless the mask becomes an identity mask.
24674 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24675 any_of(NewMask, [](int M) { return M < 0; }))
24676 for (auto &DemandedSubvector : DemandedSubvectors)
24677 if (DemandedSubvector.second != 0)
24678 return SDValue();
24679
24680 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24681 // operand[s]/index[es], so there is no point in checking for it's legality.
24682
24683 // Do not turn a legal shuffle into an illegal one.
24684 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24685 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24686 return SDValue();
24687
24688 SDLoc DL(N);
24689
24691 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24692 &DemandedSubvector : DemandedSubvectors) {
24693 // How many elements into the WideVT does this subvector start?
24694 int Index = NumEltsExtracted * DemandedSubvector.second;
24695 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24696 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24697 DemandedSubvector.first, IndexC));
24698 }
24699 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24700 "Should end up with either one or two ops");
24701
24702 // If we ended up with only one operand, pad with an undef.
24703 if (NewOps.size() == 1)
24704 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24705
24706 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24707}
24708
24709SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24710 EVT NVT = N->getValueType(0);
24711 SDValue V = N->getOperand(0);
24712 uint64_t ExtIdx = N->getConstantOperandVal(1);
24713 SDLoc DL(N);
24714
24715 // Extract from UNDEF is UNDEF.
24716 if (V.isUndef())
24717 return DAG.getUNDEF(NVT);
24718
24720 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24721 return NarrowLoad;
24722
24723 // Combine an extract of an extract into a single extract_subvector.
24724 // ext (ext X, C), 0 --> ext X, C
24725 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24726 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24727 V.getConstantOperandVal(1)) &&
24729 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24730 V.getOperand(1));
24731 }
24732 }
24733
24734 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24735 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24736 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24737 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24738 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24739
24740 // extract_subvector(insert_subvector(x,y,c1),c2)
24741 // --> extract_subvector(y,c2-c1)
24742 // iff we're just extracting from the inserted subvector.
24743 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24744 SDValue InsSub = V.getOperand(1);
24745 EVT InsSubVT = InsSub.getValueType();
24746 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24747 unsigned InsIdx = V.getConstantOperandVal(2);
24748 unsigned NumSubElts = NVT.getVectorMinNumElements();
24749 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24750 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24751 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24752 V.getValueType().isFixedLengthVector())
24753 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24754 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24755 }
24756
24757 // Try to move vector bitcast after extract_subv by scaling extraction index:
24758 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24759 if (V.getOpcode() == ISD::BITCAST &&
24760 V.getOperand(0).getValueType().isVector() &&
24761 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24762 SDValue SrcOp = V.getOperand(0);
24763 EVT SrcVT = SrcOp.getValueType();
24764 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24765 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24766 if ((SrcNumElts % DestNumElts) == 0) {
24767 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24768 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24769 EVT NewExtVT =
24770 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24772 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24773 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24774 V.getOperand(0), NewIndex);
24775 return DAG.getBitcast(NVT, NewExtract);
24776 }
24777 }
24778 if ((DestNumElts % SrcNumElts) == 0) {
24779 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24780 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24781 ElementCount NewExtEC =
24782 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24783 EVT ScalarVT = SrcVT.getScalarType();
24784 if ((ExtIdx % DestSrcRatio) == 0) {
24785 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24786 EVT NewExtVT =
24787 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24789 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24790 SDValue NewExtract =
24791 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24792 V.getOperand(0), NewIndex);
24793 return DAG.getBitcast(NVT, NewExtract);
24794 }
24795 if (NewExtEC.isScalar() &&
24797 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24798 SDValue NewExtract =
24799 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24800 V.getOperand(0), NewIndex);
24801 return DAG.getBitcast(NVT, NewExtract);
24802 }
24803 }
24804 }
24805 }
24806 }
24807
24808 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24809 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24810 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24811 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24812 "Concat and extract subvector do not change element type");
24813 assert((ExtIdx % ExtNumElts) == 0 &&
24814 "Extract index is not a multiple of the input vector length.");
24815
24816 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24817 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24818
24819 // If the concatenated source types match this extract, it's a direct
24820 // simplification:
24821 // extract_subvec (concat V1, V2, ...), i --> Vi
24822 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24823 return V.getOperand(ConcatOpIdx);
24824
24825 // If the concatenated source vectors are a multiple length of this extract,
24826 // then extract a fraction of one of those source vectors directly from a
24827 // concat operand. Example:
24828 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24829 // v2i8 extract_subvec v8i8 Y, 6
24830 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24831 ConcatSrcNumElts % ExtNumElts == 0) {
24832 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24833 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24834 "Trying to extract from >1 concat operand?");
24835 assert(NewExtIdx % ExtNumElts == 0 &&
24836 "Extract index is not a multiple of the input vector length.");
24837 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24838 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24839 V.getOperand(ConcatOpIdx), NewIndexC);
24840 }
24841 }
24842
24843 if (SDValue V =
24844 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24845 return V;
24846
24848
24849 // If the input is a build vector. Try to make a smaller build vector.
24850 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24851 EVT InVT = V.getValueType();
24852 unsigned ExtractSize = NVT.getSizeInBits();
24853 unsigned EltSize = InVT.getScalarSizeInBits();
24854 // Only do this if we won't split any elements.
24855 if (ExtractSize % EltSize == 0) {
24856 unsigned NumElems = ExtractSize / EltSize;
24857 EVT EltVT = InVT.getVectorElementType();
24858 EVT ExtractVT =
24859 NumElems == 1 ? EltVT
24860 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24861 if ((Level < AfterLegalizeDAG ||
24862 (NumElems == 1 ||
24863 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24864 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24865 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24866
24867 if (NumElems == 1) {
24868 SDValue Src = V->getOperand(IdxVal);
24869 if (EltVT != Src.getValueType())
24870 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24871 return DAG.getBitcast(NVT, Src);
24872 }
24873
24874 // Extract the pieces from the original build_vector.
24875 SDValue BuildVec =
24876 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24877 return DAG.getBitcast(NVT, BuildVec);
24878 }
24879 }
24880 }
24881
24882 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24883 // Handle only simple case where vector being inserted and vector
24884 // being extracted are of same size.
24885 EVT SmallVT = V.getOperand(1).getValueType();
24886 if (!NVT.bitsEq(SmallVT))
24887 return SDValue();
24888
24889 // Combine:
24890 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24891 // Into:
24892 // indices are equal or bit offsets are equal => V1
24893 // otherwise => (extract_subvec V1, ExtIdx)
24894 uint64_t InsIdx = V.getConstantOperandVal(2);
24895 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24896 ExtIdx * NVT.getScalarSizeInBits()) {
24897 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24898 return SDValue();
24899
24900 return DAG.getBitcast(NVT, V.getOperand(1));
24901 }
24902 return DAG.getNode(
24904 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24905 N->getOperand(1));
24906 }
24907
24908 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24909 return NarrowBOp;
24910
24912 return SDValue(N, 0);
24913
24914 return SDValue();
24915}
24916
24917/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24918/// followed by concatenation. Narrow vector ops may have better performance
24919/// than wide ops, and this can unlock further narrowing of other vector ops.
24920/// Targets can invert this transform later if it is not profitable.
24922 SelectionDAG &DAG) {
24923 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24924 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24925 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24926 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24927 return SDValue();
24928
24929 // Split the wide shuffle mask into halves. Any mask element that is accessing
24930 // operand 1 is offset down to account for narrowing of the vectors.
24931 ArrayRef<int> Mask = Shuf->getMask();
24932 EVT VT = Shuf->getValueType(0);
24933 unsigned NumElts = VT.getVectorNumElements();
24934 unsigned HalfNumElts = NumElts / 2;
24935 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24936 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24937 for (unsigned i = 0; i != NumElts; ++i) {
24938 if (Mask[i] == -1)
24939 continue;
24940 // If we reference the upper (undef) subvector then the element is undef.
24941 if ((Mask[i] % NumElts) >= HalfNumElts)
24942 continue;
24943 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24944 if (i < HalfNumElts)
24945 Mask0[i] = M;
24946 else
24947 Mask1[i - HalfNumElts] = M;
24948 }
24949
24950 // Ask the target if this is a valid transform.
24951 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24952 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24953 HalfNumElts);
24954 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24955 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24956 return SDValue();
24957
24958 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24959 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24960 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24961 SDLoc DL(Shuf);
24962 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24963 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24964 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24965}
24966
24967// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24968// or turn a shuffle of a single concat into simpler shuffle then concat.
24970 EVT VT = N->getValueType(0);
24971 unsigned NumElts = VT.getVectorNumElements();
24972
24973 SDValue N0 = N->getOperand(0);
24974 SDValue N1 = N->getOperand(1);
24975 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24976 ArrayRef<int> Mask = SVN->getMask();
24977
24979 EVT ConcatVT = N0.getOperand(0).getValueType();
24980 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24981 unsigned NumConcats = NumElts / NumElemsPerConcat;
24982
24983 auto IsUndefMaskElt = [](int i) { return i == -1; };
24984
24985 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24986 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24987 // half vector elements.
24988 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24989 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24990 IsUndefMaskElt)) {
24991 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24992 N0.getOperand(1),
24993 Mask.slice(0, NumElemsPerConcat));
24994 N1 = DAG.getUNDEF(ConcatVT);
24995 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24996 }
24997
24998 // Look at every vector that's inserted. We're looking for exact
24999 // subvector-sized copies from a concatenated vector
25000 for (unsigned I = 0; I != NumConcats; ++I) {
25001 unsigned Begin = I * NumElemsPerConcat;
25002 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25003
25004 // Make sure we're dealing with a copy.
25005 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25006 Ops.push_back(DAG.getUNDEF(ConcatVT));
25007 continue;
25008 }
25009
25010 int OpIdx = -1;
25011 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25012 if (IsUndefMaskElt(SubMask[i]))
25013 continue;
25014 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25015 return SDValue();
25016 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25017 if (0 <= OpIdx && EltOpIdx != OpIdx)
25018 return SDValue();
25019 OpIdx = EltOpIdx;
25020 }
25021 assert(0 <= OpIdx && "Unknown concat_vectors op");
25022
25023 if (OpIdx < (int)N0.getNumOperands())
25024 Ops.push_back(N0.getOperand(OpIdx));
25025 else
25026 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25027 }
25028
25029 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25030}
25031
25032// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25033// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25034//
25035// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25036// a simplification in some sense, but it isn't appropriate in general: some
25037// BUILD_VECTORs are substantially cheaper than others. The general case
25038// of a BUILD_VECTOR requires inserting each element individually (or
25039// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25040// all constants is a single constant pool load. A BUILD_VECTOR where each
25041// element is identical is a splat. A BUILD_VECTOR where most of the operands
25042// are undef lowers to a small number of element insertions.
25043//
25044// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25045// We don't fold shuffles where one side is a non-zero constant, and we don't
25046// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25047// non-constant operands. This seems to work out reasonably well in practice.
25049 SelectionDAG &DAG,
25050 const TargetLowering &TLI) {
25051 EVT VT = SVN->getValueType(0);
25052 unsigned NumElts = VT.getVectorNumElements();
25053 SDValue N0 = SVN->getOperand(0);
25054 SDValue N1 = SVN->getOperand(1);
25055
25056 if (!N0->hasOneUse())
25057 return SDValue();
25058
25059 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25060 // discussed above.
25061 if (!N1.isUndef()) {
25062 if (!N1->hasOneUse())
25063 return SDValue();
25064
25065 bool N0AnyConst = isAnyConstantBuildVector(N0);
25066 bool N1AnyConst = isAnyConstantBuildVector(N1);
25067 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25068 return SDValue();
25069 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25070 return SDValue();
25071 }
25072
25073 // If both inputs are splats of the same value then we can safely merge this
25074 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25075 bool IsSplat = false;
25076 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25077 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25078 if (BV0 && BV1)
25079 if (SDValue Splat0 = BV0->getSplatValue())
25080 IsSplat = (Splat0 == BV1->getSplatValue());
25081
25083 SmallSet<SDValue, 16> DuplicateOps;
25084 for (int M : SVN->getMask()) {
25085 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25086 if (M >= 0) {
25087 int Idx = M < (int)NumElts ? M : M - NumElts;
25088 SDValue &S = (M < (int)NumElts ? N0 : N1);
25089 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25090 Op = S.getOperand(Idx);
25091 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25092 SDValue Op0 = S.getOperand(0);
25093 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25094 } else {
25095 // Operand can't be combined - bail out.
25096 return SDValue();
25097 }
25098 }
25099
25100 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25101 // generating a splat; semantically, this is fine, but it's likely to
25102 // generate low-quality code if the target can't reconstruct an appropriate
25103 // shuffle.
25104 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25105 if (!IsSplat && !DuplicateOps.insert(Op).second)
25106 return SDValue();
25107
25108 Ops.push_back(Op);
25109 }
25110
25111 // BUILD_VECTOR requires all inputs to be of the same type, find the
25112 // maximum type and extend them all.
25113 EVT SVT = VT.getScalarType();
25114 if (SVT.isInteger())
25115 for (SDValue &Op : Ops)
25116 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25117 if (SVT != VT.getScalarType())
25118 for (SDValue &Op : Ops)
25119 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25120 : (TLI.isZExtFree(Op.getValueType(), SVT)
25121 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25122 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25123 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25124}
25125
25126// Match shuffles that can be converted to *_vector_extend_in_reg.
25127// This is often generated during legalization.
25128// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25129// and returns the EVT to which the extension should be performed.
25130// NOTE: this assumes that the src is the first operand of the shuffle.
25132 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25133 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25134 bool LegalOperations) {
25135 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25136
25137 // TODO Add support for big-endian when we have a test case.
25138 if (!VT.isInteger() || IsBigEndian)
25139 return std::nullopt;
25140
25141 unsigned NumElts = VT.getVectorNumElements();
25142 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25143
25144 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25145 // power-of-2 extensions as they are the most likely.
25146 // FIXME: should try Scale == NumElts case too,
25147 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25148 // The vector width must be a multiple of Scale.
25149 if (NumElts % Scale != 0)
25150 continue;
25151
25152 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25153 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25154
25155 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25156 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25157 continue;
25158
25159 if (Match(Scale))
25160 return OutVT;
25161 }
25162
25163 return std::nullopt;
25164}
25165
25166// Match shuffles that can be converted to any_vector_extend_in_reg.
25167// This is often generated during legalization.
25168// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25170 SelectionDAG &DAG,
25171 const TargetLowering &TLI,
25172 bool LegalOperations) {
25173 EVT VT = SVN->getValueType(0);
25174 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25175
25176 // TODO Add support for big-endian when we have a test case.
25177 if (!VT.isInteger() || IsBigEndian)
25178 return SDValue();
25179
25180 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25181 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25182 Mask = SVN->getMask()](unsigned Scale) {
25183 for (unsigned i = 0; i != NumElts; ++i) {
25184 if (Mask[i] < 0)
25185 continue;
25186 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25187 continue;
25188 return false;
25189 }
25190 return true;
25191 };
25192
25193 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25194 SDValue N0 = SVN->getOperand(0);
25195 // Never create an illegal type. Only create unsupported operations if we
25196 // are pre-legalization.
25197 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25198 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25199 if (!OutVT)
25200 return SDValue();
25201 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25202}
25203
25204// Match shuffles that can be converted to zero_extend_vector_inreg.
25205// This is often generated during legalization.
25206// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25208 SelectionDAG &DAG,
25209 const TargetLowering &TLI,
25210 bool LegalOperations) {
25211 bool LegalTypes = true;
25212 EVT VT = SVN->getValueType(0);
25213 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25214 unsigned NumElts = VT.getVectorNumElements();
25215 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25216
25217 // TODO: add support for big-endian when we have a test case.
25218 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25219 if (!VT.isInteger() || IsBigEndian)
25220 return SDValue();
25221
25222 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
25223 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25224 for (int &Indice : Mask) {
25225 if (Indice < 0)
25226 continue;
25227 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25228 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25229 Fn(Indice, OpIdx, OpEltIdx);
25230 }
25231 };
25232
25233 // Which elements of which operand does this shuffle demand?
25234 std::array<APInt, 2> OpsDemandedElts;
25235 for (APInt &OpDemandedElts : OpsDemandedElts)
25236 OpDemandedElts = APInt::getZero(NumElts);
25237 ForEachDecomposedIndice(
25238 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25239 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25240 });
25241
25242 // Element-wise(!), which of these demanded elements are know to be zero?
25243 std::array<APInt, 2> OpsKnownZeroElts;
25244 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25245 std::get<2>(I) =
25246 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25247
25248 // Manifest zeroable element knowledge in the shuffle mask.
25249 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25250 // this is a local invention, but it won't leak into DAG.
25251 // FIXME: should we not manifest them, but just check when matching?
25252 bool HadZeroableElts = false;
25253 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25254 int &Indice, int OpIdx, int OpEltIdx) {
25255 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25256 Indice = -2; // Zeroable element.
25257 HadZeroableElts = true;
25258 }
25259 });
25260
25261 // Don't proceed unless we've refined at least one zeroable mask indice.
25262 // If we didn't, then we are still trying to match the same shuffle mask
25263 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25264 // and evidently failed. Proceeding will lead to endless combine loops.
25265 if (!HadZeroableElts)
25266 return SDValue();
25267
25268 // The shuffle may be more fine-grained than we want. Widen elements first.
25269 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25270 SmallVector<int, 16> ScaledMask;
25271 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25272 assert(Mask.size() >= ScaledMask.size() &&
25273 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25274 int Prescale = Mask.size() / ScaledMask.size();
25275
25276 NumElts = ScaledMask.size();
25277 EltSizeInBits *= Prescale;
25278
25279 EVT PrescaledVT = EVT::getVectorVT(
25280 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25281 NumElts);
25282
25283 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25284 return SDValue();
25285
25286 // For example,
25287 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25288 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25289 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25290 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25291 "Unexpected mask scaling factor.");
25292 ArrayRef<int> Mask = ScaledMask;
25293 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25294 SrcElt != NumSrcElts; ++SrcElt) {
25295 // Analyze the shuffle mask in Scale-sized chunks.
25296 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25297 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25298 Mask = Mask.drop_front(MaskChunk.size());
25299 // The first indice in this chunk must be SrcElt, but not zero!
25300 // FIXME: undef should be fine, but that results in more-defined result.
25301 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25302 return false;
25303 // The rest of the indices in this chunk must be zeros.
25304 // FIXME: undef should be fine, but that results in more-defined result.
25305 if (!all_of(MaskChunk.drop_front(1),
25306 [](int Indice) { return Indice == -2; }))
25307 return false;
25308 }
25309 assert(Mask.empty() && "Did not process the whole mask?");
25310 return true;
25311 };
25312
25313 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25314 for (bool Commuted : {false, true}) {
25315 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25316 if (Commuted)
25318 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25319 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25320 LegalOperations);
25321 if (OutVT)
25322 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25323 DAG.getBitcast(PrescaledVT, Op)));
25324 }
25325 return SDValue();
25326}
25327
25328// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25329// each source element of a large type into the lowest elements of a smaller
25330// destination type. This is often generated during legalization.
25331// If the source node itself was a '*_extend_vector_inreg' node then we should
25332// then be able to remove it.
25334 SelectionDAG &DAG) {
25335 EVT VT = SVN->getValueType(0);
25336 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25337
25338 // TODO Add support for big-endian when we have a test case.
25339 if (!VT.isInteger() || IsBigEndian)
25340 return SDValue();
25341
25343
25344 unsigned Opcode = N0.getOpcode();
25345 if (!ISD::isExtVecInRegOpcode(Opcode))
25346 return SDValue();
25347
25348 SDValue N00 = N0.getOperand(0);
25349 ArrayRef<int> Mask = SVN->getMask();
25350 unsigned NumElts = VT.getVectorNumElements();
25351 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25352 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25353 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25354
25355 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25356 return SDValue();
25357 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25358
25359 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25360 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25361 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25362 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25363 for (unsigned i = 0; i != NumElts; ++i) {
25364 if (Mask[i] < 0)
25365 continue;
25366 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25367 continue;
25368 return false;
25369 }
25370 return true;
25371 };
25372
25373 // At the moment we just handle the case where we've truncated back to the
25374 // same size as before the extension.
25375 // TODO: handle more extension/truncation cases as cases arise.
25376 if (EltSizeInBits != ExtSrcSizeInBits)
25377 return SDValue();
25378
25379 // We can remove *extend_vector_inreg only if the truncation happens at
25380 // the same scale as the extension.
25381 if (isTruncate(ExtScale))
25382 return DAG.getBitcast(VT, N00);
25383
25384 return SDValue();
25385}
25386
25387// Combine shuffles of splat-shuffles of the form:
25388// shuffle (shuffle V, undef, splat-mask), undef, M
25389// If splat-mask contains undef elements, we need to be careful about
25390// introducing undef's in the folded mask which are not the result of composing
25391// the masks of the shuffles.
25393 SelectionDAG &DAG) {
25394 EVT VT = Shuf->getValueType(0);
25395 unsigned NumElts = VT.getVectorNumElements();
25396
25397 if (!Shuf->getOperand(1).isUndef())
25398 return SDValue();
25399
25400 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25401 // in disguise, with all demanded elements being identical.
25402 // FIXME: this can be done per-operand.
25403 if (!Shuf->isSplat()) {
25404 APInt DemandedElts(NumElts, 0);
25405 for (int Idx : Shuf->getMask()) {
25406 if (Idx < 0)
25407 continue; // Ignore sentinel indices.
25408 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25409 DemandedElts.setBit(Idx);
25410 }
25411 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25412 APInt UndefElts;
25413 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25414 // Even if all demanded elements are splat, some of them could be undef.
25415 // Which lowest demanded element is *not* known-undef?
25416 std::optional<unsigned> MinNonUndefIdx;
25417 for (int Idx : Shuf->getMask()) {
25418 if (Idx < 0 || UndefElts[Idx])
25419 continue; // Ignore sentinel indices, and undef elements.
25420 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25421 }
25422 if (!MinNonUndefIdx)
25423 return DAG.getUNDEF(VT); // All undef - result is undef.
25424 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25425 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25426 Shuf->getMask().end());
25427 for (int &Idx : SplatMask) {
25428 if (Idx < 0)
25429 continue; // Passthrough sentinel indices.
25430 // Otherwise, just pick the lowest demanded non-undef element.
25431 // Or sentinel undef, if we know we'd pick a known-undef element.
25432 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25433 }
25434 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25435 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25436 Shuf->getOperand(1), SplatMask);
25437 }
25438 }
25439
25440 // If the inner operand is a known splat with no undefs, just return that directly.
25441 // TODO: Create DemandedElts mask from Shuf's mask.
25442 // TODO: Allow undef elements and merge with the shuffle code below.
25443 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25444 return Shuf->getOperand(0);
25445
25446 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25447 if (!Splat || !Splat->isSplat())
25448 return SDValue();
25449
25450 ArrayRef<int> ShufMask = Shuf->getMask();
25451 ArrayRef<int> SplatMask = Splat->getMask();
25452 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25453
25454 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25455 // every undef mask element in the splat-shuffle has a corresponding undef
25456 // element in the user-shuffle's mask or if the composition of mask elements
25457 // would result in undef.
25458 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25459 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25460 // In this case it is not legal to simplify to the splat-shuffle because we
25461 // may be exposing the users of the shuffle an undef element at index 1
25462 // which was not there before the combine.
25463 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25464 // In this case the composition of masks yields SplatMask, so it's ok to
25465 // simplify to the splat-shuffle.
25466 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25467 // In this case the composed mask includes all undef elements of SplatMask
25468 // and in addition sets element zero to undef. It is safe to simplify to
25469 // the splat-shuffle.
25470 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25471 ArrayRef<int> SplatMask) {
25472 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25473 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25474 SplatMask[UserMask[i]] != -1)
25475 return false;
25476 return true;
25477 };
25478 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25479 return Shuf->getOperand(0);
25480
25481 // Create a new shuffle with a mask that is composed of the two shuffles'
25482 // masks.
25483 SmallVector<int, 32> NewMask;
25484 for (int Idx : ShufMask)
25485 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25486
25487 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25488 Splat->getOperand(0), Splat->getOperand(1),
25489 NewMask);
25490}
25491
25492// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25493// the mask can be treated as a larger type.
25495 SelectionDAG &DAG,
25496 const TargetLowering &TLI,
25497 bool LegalOperations) {
25498 SDValue Op0 = SVN->getOperand(0);
25499 SDValue Op1 = SVN->getOperand(1);
25500 EVT VT = SVN->getValueType(0);
25501 if (Op0.getOpcode() != ISD::BITCAST)
25502 return SDValue();
25503 EVT InVT = Op0.getOperand(0).getValueType();
25504 if (!InVT.isVector() ||
25505 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25506 Op1.getOperand(0).getValueType() != InVT)))
25507 return SDValue();
25509 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25510 return SDValue();
25511
25512 int VTLanes = VT.getVectorNumElements();
25513 int InLanes = InVT.getVectorNumElements();
25514 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25515 (LegalOperations &&
25517 return SDValue();
25518 int Factor = VTLanes / InLanes;
25519
25520 // Check that each group of lanes in the mask are either undef or make a valid
25521 // mask for the wider lane type.
25522 ArrayRef<int> Mask = SVN->getMask();
25523 SmallVector<int> NewMask;
25524 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25525 return SDValue();
25526
25527 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25528 return SDValue();
25529
25530 // Create the new shuffle with the new mask and bitcast it back to the
25531 // original type.
25532 SDLoc DL(SVN);
25533 Op0 = Op0.getOperand(0);
25534 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25535 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25536 return DAG.getBitcast(VT, NewShuf);
25537}
25538
25539/// Combine shuffle of shuffle of the form:
25540/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25542 SelectionDAG &DAG) {
25543 if (!OuterShuf->getOperand(1).isUndef())
25544 return SDValue();
25545 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25546 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25547 return SDValue();
25548
25549 ArrayRef<int> OuterMask = OuterShuf->getMask();
25550 ArrayRef<int> InnerMask = InnerShuf->getMask();
25551 unsigned NumElts = OuterMask.size();
25552 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25553 SmallVector<int, 32> CombinedMask(NumElts, -1);
25554 int SplatIndex = -1;
25555 for (unsigned i = 0; i != NumElts; ++i) {
25556 // Undef lanes remain undef.
25557 int OuterMaskElt = OuterMask[i];
25558 if (OuterMaskElt == -1)
25559 continue;
25560
25561 // Peek through the shuffle masks to get the underlying source element.
25562 int InnerMaskElt = InnerMask[OuterMaskElt];
25563 if (InnerMaskElt == -1)
25564 continue;
25565
25566 // Initialize the splatted element.
25567 if (SplatIndex == -1)
25568 SplatIndex = InnerMaskElt;
25569
25570 // Non-matching index - this is not a splat.
25571 if (SplatIndex != InnerMaskElt)
25572 return SDValue();
25573
25574 CombinedMask[i] = InnerMaskElt;
25575 }
25576 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25577 getSplatIndex(CombinedMask) != -1) &&
25578 "Expected a splat mask");
25579
25580 // TODO: The transform may be a win even if the mask is not legal.
25581 EVT VT = OuterShuf->getValueType(0);
25582 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25583 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25584 return SDValue();
25585
25586 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25587 InnerShuf->getOperand(1), CombinedMask);
25588}
25589
25590/// If the shuffle mask is taking exactly one element from the first vector
25591/// operand and passing through all other elements from the second vector
25592/// operand, return the index of the mask element that is choosing an element
25593/// from the first operand. Otherwise, return -1.
25595 int MaskSize = Mask.size();
25596 int EltFromOp0 = -1;
25597 // TODO: This does not match if there are undef elements in the shuffle mask.
25598 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25599 // removing an instruction (a shuffle), but losing the knowledge that some
25600 // vector lanes are not needed.
25601 for (int i = 0; i != MaskSize; ++i) {
25602 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25603 // We're looking for a shuffle of exactly one element from operand 0.
25604 if (EltFromOp0 != -1)
25605 return -1;
25606 EltFromOp0 = i;
25607 } else if (Mask[i] != i + MaskSize) {
25608 // Nothing from operand 1 can change lanes.
25609 return -1;
25610 }
25611 }
25612 return EltFromOp0;
25613}
25614
25615/// If a shuffle inserts exactly one element from a source vector operand into
25616/// another vector operand and we can access the specified element as a scalar,
25617/// then we can eliminate the shuffle.
25619 SelectionDAG &DAG) {
25620 // First, check if we are taking one element of a vector and shuffling that
25621 // element into another vector.
25622 ArrayRef<int> Mask = Shuf->getMask();
25623 SmallVector<int, 16> CommutedMask(Mask);
25624 SDValue Op0 = Shuf->getOperand(0);
25625 SDValue Op1 = Shuf->getOperand(1);
25626 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25627 if (ShufOp0Index == -1) {
25628 // Commute mask and check again.
25630 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25631 if (ShufOp0Index == -1)
25632 return SDValue();
25633 // Commute operands to match the commuted shuffle mask.
25634 std::swap(Op0, Op1);
25635 Mask = CommutedMask;
25636 }
25637
25638 // The shuffle inserts exactly one element from operand 0 into operand 1.
25639 // Now see if we can access that element as a scalar via a real insert element
25640 // instruction.
25641 // TODO: We can try harder to locate the element as a scalar. Examples: it
25642 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25643 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25644 "Shuffle mask value must be from operand 0");
25645 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25646 return SDValue();
25647
25648 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25649 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25650 return SDValue();
25651
25652 // There's an existing insertelement with constant insertion index, so we
25653 // don't need to check the legality/profitability of a replacement operation
25654 // that differs at most in the constant value. The target should be able to
25655 // lower any of those in a similar way. If not, legalization will expand this
25656 // to a scalar-to-vector plus shuffle.
25657 //
25658 // Note that the shuffle may move the scalar from the position that the insert
25659 // element used. Therefore, our new insert element occurs at the shuffle's
25660 // mask index value, not the insert's index value.
25661 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25662 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25663 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25664 Op1, Op0.getOperand(1), NewInsIndex);
25665}
25666
25667/// If we have a unary shuffle of a shuffle, see if it can be folded away
25668/// completely. This has the potential to lose undef knowledge because the first
25669/// shuffle may not have an undef mask element where the second one does. So
25670/// only call this after doing simplifications based on demanded elements.
25672 // shuf (shuf0 X, Y, Mask0), undef, Mask
25673 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25674 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25675 return SDValue();
25676
25677 ArrayRef<int> Mask = Shuf->getMask();
25678 ArrayRef<int> Mask0 = Shuf0->getMask();
25679 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25680 // Ignore undef elements.
25681 if (Mask[i] == -1)
25682 continue;
25683 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25684
25685 // Is the element of the shuffle operand chosen by this shuffle the same as
25686 // the element chosen by the shuffle operand itself?
25687 if (Mask0[Mask[i]] != Mask0[i])
25688 return SDValue();
25689 }
25690 // Every element of this shuffle is identical to the result of the previous
25691 // shuffle, so we can replace this value.
25692 return Shuf->getOperand(0);
25693}
25694
25695SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25696 EVT VT = N->getValueType(0);
25697 unsigned NumElts = VT.getVectorNumElements();
25698
25699 SDValue N0 = N->getOperand(0);
25700 SDValue N1 = N->getOperand(1);
25701
25702 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25703
25704 // Canonicalize shuffle undef, undef -> undef
25705 if (N0.isUndef() && N1.isUndef())
25706 return DAG.getUNDEF(VT);
25707
25708 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25709
25710 // Canonicalize shuffle v, v -> v, undef
25711 if (N0 == N1)
25712 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25713 createUnaryMask(SVN->getMask(), NumElts));
25714
25715 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25716 if (N0.isUndef())
25717 return DAG.getCommutedVectorShuffle(*SVN);
25718
25719 // Remove references to rhs if it is undef
25720 if (N1.isUndef()) {
25721 bool Changed = false;
25722 SmallVector<int, 8> NewMask;
25723 for (unsigned i = 0; i != NumElts; ++i) {
25724 int Idx = SVN->getMaskElt(i);
25725 if (Idx >= (int)NumElts) {
25726 Idx = -1;
25727 Changed = true;
25728 }
25729 NewMask.push_back(Idx);
25730 }
25731 if (Changed)
25732 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25733 }
25734
25735 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25736 return InsElt;
25737
25738 // A shuffle of a single vector that is a splatted value can always be folded.
25739 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25740 return V;
25741
25742 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25743 return V;
25744
25745 // If it is a splat, check if the argument vector is another splat or a
25746 // build_vector.
25747 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25748 int SplatIndex = SVN->getSplatIndex();
25749 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25750 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25751 // splat (vector_bo L, R), Index -->
25752 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25753 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25754 SDLoc DL(N);
25755 EVT EltVT = VT.getScalarType();
25756 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25757 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25758 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25759 SDValue NewBO =
25760 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25761 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25763 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25764 }
25765
25766 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25767 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25768 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25769 N0.hasOneUse()) {
25770 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25771 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25772
25774 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25775 if (Idx->getAPIntValue() == SplatIndex)
25776 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25777
25778 // Look through a bitcast if LE and splatting lane 0, through to a
25779 // scalar_to_vector or a build_vector.
25780 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25781 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25784 EVT N00VT = N0.getOperand(0).getValueType();
25785 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25786 VT.isInteger() && N00VT.isInteger()) {
25787 EVT InVT =
25790 SDLoc(N), InVT);
25791 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25792 }
25793 }
25794 }
25795
25796 // If this is a bit convert that changes the element type of the vector but
25797 // not the number of vector elements, look through it. Be careful not to
25798 // look though conversions that change things like v4f32 to v2f64.
25799 SDNode *V = N0.getNode();
25800 if (V->getOpcode() == ISD::BITCAST) {
25801 SDValue ConvInput = V->getOperand(0);
25802 if (ConvInput.getValueType().isVector() &&
25803 ConvInput.getValueType().getVectorNumElements() == NumElts)
25804 V = ConvInput.getNode();
25805 }
25806
25807 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25808 assert(V->getNumOperands() == NumElts &&
25809 "BUILD_VECTOR has wrong number of operands");
25810 SDValue Base;
25811 bool AllSame = true;
25812 for (unsigned i = 0; i != NumElts; ++i) {
25813 if (!V->getOperand(i).isUndef()) {
25814 Base = V->getOperand(i);
25815 break;
25816 }
25817 }
25818 // Splat of <u, u, u, u>, return <u, u, u, u>
25819 if (!Base.getNode())
25820 return N0;
25821 for (unsigned i = 0; i != NumElts; ++i) {
25822 if (V->getOperand(i) != Base) {
25823 AllSame = false;
25824 break;
25825 }
25826 }
25827 // Splat of <x, x, x, x>, return <x, x, x, x>
25828 if (AllSame)
25829 return N0;
25830
25831 // Canonicalize any other splat as a build_vector.
25832 SDValue Splatted = V->getOperand(SplatIndex);
25833 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25834 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25835
25836 // We may have jumped through bitcasts, so the type of the
25837 // BUILD_VECTOR may not match the type of the shuffle.
25838 if (V->getValueType(0) != VT)
25839 NewBV = DAG.getBitcast(VT, NewBV);
25840 return NewBV;
25841 }
25842 }
25843
25844 // Simplify source operands based on shuffle mask.
25846 return SDValue(N, 0);
25847
25848 // This is intentionally placed after demanded elements simplification because
25849 // it could eliminate knowledge of undef elements created by this shuffle.
25850 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25851 return ShufOp;
25852
25853 // Match shuffles that can be converted to any_vector_extend_in_reg.
25854 if (SDValue V =
25855 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25856 return V;
25857
25858 // Combine "truncate_vector_in_reg" style shuffles.
25859 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25860 return V;
25861
25862 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25863 Level < AfterLegalizeVectorOps &&
25864 (N1.isUndef() ||
25865 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25866 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25867 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25868 return V;
25869 }
25870
25871 // A shuffle of a concat of the same narrow vector can be reduced to use
25872 // only low-half elements of a concat with undef:
25873 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25874 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25875 N0.getNumOperands() == 2 &&
25876 N0.getOperand(0) == N0.getOperand(1)) {
25877 int HalfNumElts = (int)NumElts / 2;
25878 SmallVector<int, 8> NewMask;
25879 for (unsigned i = 0; i != NumElts; ++i) {
25880 int Idx = SVN->getMaskElt(i);
25881 if (Idx >= HalfNumElts) {
25882 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25883 Idx -= HalfNumElts;
25884 }
25885 NewMask.push_back(Idx);
25886 }
25887 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25888 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25889 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25890 N0.getOperand(0), UndefVec);
25891 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25892 }
25893 }
25894
25895 // See if we can replace a shuffle with an insert_subvector.
25896 // e.g. v2i32 into v8i32:
25897 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25898 // --> insert_subvector(lhs,rhs1,4).
25899 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25901 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25902 // Ensure RHS subvectors are legal.
25903 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25904 EVT SubVT = RHS.getOperand(0).getValueType();
25905 int NumSubVecs = RHS.getNumOperands();
25906 int NumSubElts = SubVT.getVectorNumElements();
25907 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25908 if (!TLI.isTypeLegal(SubVT))
25909 return SDValue();
25910
25911 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25912 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25913 return SDValue();
25914
25915 // Search [NumSubElts] spans for RHS sequence.
25916 // TODO: Can we avoid nested loops to increase performance?
25917 SmallVector<int> InsertionMask(NumElts);
25918 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25919 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25920 // Reset mask to identity.
25921 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25922
25923 // Add subvector insertion.
25924 std::iota(InsertionMask.begin() + SubIdx,
25925 InsertionMask.begin() + SubIdx + NumSubElts,
25926 NumElts + (SubVec * NumSubElts));
25927
25928 // See if the shuffle mask matches the reference insertion mask.
25929 bool MatchingShuffle = true;
25930 for (int i = 0; i != (int)NumElts; ++i) {
25931 int ExpectIdx = InsertionMask[i];
25932 int ActualIdx = Mask[i];
25933 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25934 MatchingShuffle = false;
25935 break;
25936 }
25937 }
25938
25939 if (MatchingShuffle)
25940 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25941 RHS.getOperand(SubVec),
25942 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25943 }
25944 }
25945 return SDValue();
25946 };
25947 ArrayRef<int> Mask = SVN->getMask();
25948 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25949 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25950 return InsertN1;
25951 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25952 SmallVector<int> CommuteMask(Mask);
25954 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25955 return InsertN0;
25956 }
25957 }
25958
25959 // If we're not performing a select/blend shuffle, see if we can convert the
25960 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25961 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25962 bool IsInLaneMask = true;
25963 ArrayRef<int> Mask = SVN->getMask();
25964 SmallVector<int, 16> ClearMask(NumElts, -1);
25965 APInt DemandedLHS = APInt::getZero(NumElts);
25966 APInt DemandedRHS = APInt::getZero(NumElts);
25967 for (int I = 0; I != (int)NumElts; ++I) {
25968 int M = Mask[I];
25969 if (M < 0)
25970 continue;
25971 ClearMask[I] = M == I ? I : (I + NumElts);
25972 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25973 if (M != I) {
25974 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25975 Demanded.setBit(M % NumElts);
25976 }
25977 }
25978 // TODO: Should we try to mask with N1 as well?
25979 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25980 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25981 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25982 SDLoc DL(N);
25985 // Transform the type to a legal type so that the buildvector constant
25986 // elements are not illegal. Make sure that the result is larger than the
25987 // original type, incase the value is split into two (eg i64->i32).
25988 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25989 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25990 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25991 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25992 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25993 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25994 for (int I = 0; I != (int)NumElts; ++I)
25995 if (0 <= Mask[I])
25996 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25997
25998 // See if a clear mask is legal instead of going via
25999 // XformToShuffleWithZero which loses UNDEF mask elements.
26000 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26001 return DAG.getBitcast(
26002 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26003 DAG.getConstant(0, DL, IntVT), ClearMask));
26004
26005 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26006 return DAG.getBitcast(
26007 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26008 DAG.getBuildVector(IntVT, DL, AndMask)));
26009 }
26010 }
26011 }
26012
26013 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26014 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26015 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26016 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26017 return Res;
26018
26019 // If this shuffle only has a single input that is a bitcasted shuffle,
26020 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26021 // back to their original types.
26022 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26023 N1.isUndef() && Level < AfterLegalizeVectorOps &&
26024 TLI.isTypeLegal(VT)) {
26025
26027 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26028 EVT SVT = VT.getScalarType();
26029 EVT InnerVT = BC0->getValueType(0);
26030 EVT InnerSVT = InnerVT.getScalarType();
26031
26032 // Determine which shuffle works with the smaller scalar type.
26033 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26034 EVT ScaleSVT = ScaleVT.getScalarType();
26035
26036 if (TLI.isTypeLegal(ScaleVT) &&
26037 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26038 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26039 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26040 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26041
26042 // Scale the shuffle masks to the smaller scalar type.
26043 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26044 SmallVector<int, 8> InnerMask;
26045 SmallVector<int, 8> OuterMask;
26046 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26047 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26048
26049 // Merge the shuffle masks.
26050 SmallVector<int, 8> NewMask;
26051 for (int M : OuterMask)
26052 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26053
26054 // Test for shuffle mask legality over both commutations.
26055 SDValue SV0 = BC0->getOperand(0);
26056 SDValue SV1 = BC0->getOperand(1);
26057 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26058 if (!LegalMask) {
26059 std::swap(SV0, SV1);
26061 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26062 }
26063
26064 if (LegalMask) {
26065 SV0 = DAG.getBitcast(ScaleVT, SV0);
26066 SV1 = DAG.getBitcast(ScaleVT, SV1);
26067 return DAG.getBitcast(
26068 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26069 }
26070 }
26071 }
26072 }
26073
26074 // Match shuffles of bitcasts, so long as the mask can be treated as the
26075 // larger type.
26076 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26077 return V;
26078
26079 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26080 // operand, and SV1 as the second operand.
26081 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26082 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26083 auto MergeInnerShuffle =
26084 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26085 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26086 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26087 SmallVectorImpl<int> &Mask) -> bool {
26088 // Don't try to fold splats; they're likely to simplify somehow, or they
26089 // might be free.
26090 if (OtherSVN->isSplat())
26091 return false;
26092
26093 SV0 = SV1 = SDValue();
26094 Mask.clear();
26095
26096 for (unsigned i = 0; i != NumElts; ++i) {
26097 int Idx = SVN->getMaskElt(i);
26098 if (Idx < 0) {
26099 // Propagate Undef.
26100 Mask.push_back(Idx);
26101 continue;
26102 }
26103
26104 if (Commute)
26105 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26106
26107 SDValue CurrentVec;
26108 if (Idx < (int)NumElts) {
26109 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26110 // shuffle mask to identify which vector is actually referenced.
26111 Idx = OtherSVN->getMaskElt(Idx);
26112 if (Idx < 0) {
26113 // Propagate Undef.
26114 Mask.push_back(Idx);
26115 continue;
26116 }
26117 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26118 : OtherSVN->getOperand(1);
26119 } else {
26120 // This shuffle index references an element within N1.
26121 CurrentVec = N1;
26122 }
26123
26124 // Simple case where 'CurrentVec' is UNDEF.
26125 if (CurrentVec.isUndef()) {
26126 Mask.push_back(-1);
26127 continue;
26128 }
26129
26130 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26131 // will be the first or second operand of the combined shuffle.
26132 Idx = Idx % NumElts;
26133 if (!SV0.getNode() || SV0 == CurrentVec) {
26134 // Ok. CurrentVec is the left hand side.
26135 // Update the mask accordingly.
26136 SV0 = CurrentVec;
26137 Mask.push_back(Idx);
26138 continue;
26139 }
26140 if (!SV1.getNode() || SV1 == CurrentVec) {
26141 // Ok. CurrentVec is the right hand side.
26142 // Update the mask accordingly.
26143 SV1 = CurrentVec;
26144 Mask.push_back(Idx + NumElts);
26145 continue;
26146 }
26147
26148 // Last chance - see if the vector is another shuffle and if it
26149 // uses one of the existing candidate shuffle ops.
26150 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26151 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26152 if (InnerIdx < 0) {
26153 Mask.push_back(-1);
26154 continue;
26155 }
26156 SDValue InnerVec = (InnerIdx < (int)NumElts)
26157 ? CurrentSVN->getOperand(0)
26158 : CurrentSVN->getOperand(1);
26159 if (InnerVec.isUndef()) {
26160 Mask.push_back(-1);
26161 continue;
26162 }
26163 InnerIdx %= NumElts;
26164 if (InnerVec == SV0) {
26165 Mask.push_back(InnerIdx);
26166 continue;
26167 }
26168 if (InnerVec == SV1) {
26169 Mask.push_back(InnerIdx + NumElts);
26170 continue;
26171 }
26172 }
26173
26174 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26175 return false;
26176 }
26177
26178 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26179 return true;
26180
26181 // Avoid introducing shuffles with illegal mask.
26182 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26183 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26184 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26185 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26186 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26187 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26188 if (TLI.isShuffleMaskLegal(Mask, VT))
26189 return true;
26190
26191 std::swap(SV0, SV1);
26193 return TLI.isShuffleMaskLegal(Mask, VT);
26194 };
26195
26196 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26197 // Canonicalize shuffles according to rules:
26198 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26199 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26200 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26201 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26203 // The incoming shuffle must be of the same type as the result of the
26204 // current shuffle.
26205 assert(N1->getOperand(0).getValueType() == VT &&
26206 "Shuffle types don't match");
26207
26208 SDValue SV0 = N1->getOperand(0);
26209 SDValue SV1 = N1->getOperand(1);
26210 bool HasSameOp0 = N0 == SV0;
26211 bool IsSV1Undef = SV1.isUndef();
26212 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26213 // Commute the operands of this shuffle so merging below will trigger.
26214 return DAG.getCommutedVectorShuffle(*SVN);
26215 }
26216
26217 // Canonicalize splat shuffles to the RHS to improve merging below.
26218 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26219 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26220 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26221 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26222 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26223 return DAG.getCommutedVectorShuffle(*SVN);
26224 }
26225
26226 // Try to fold according to rules:
26227 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26228 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26229 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26230 // Don't try to fold shuffles with illegal type.
26231 // Only fold if this shuffle is the only user of the other shuffle.
26232 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26233 for (int i = 0; i != 2; ++i) {
26234 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26235 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26236 // The incoming shuffle must be of the same type as the result of the
26237 // current shuffle.
26238 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26239 assert(OtherSV->getOperand(0).getValueType() == VT &&
26240 "Shuffle types don't match");
26241
26242 SDValue SV0, SV1;
26244 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26245 SV0, SV1, Mask)) {
26246 // Check if all indices in Mask are Undef. In case, propagate Undef.
26247 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26248 return DAG.getUNDEF(VT);
26249
26250 return DAG.getVectorShuffle(VT, SDLoc(N),
26251 SV0 ? SV0 : DAG.getUNDEF(VT),
26252 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26253 }
26254 }
26255 }
26256
26257 // Merge shuffles through binops if we are able to merge it with at least
26258 // one other shuffles.
26259 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26260 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26261 unsigned SrcOpcode = N0.getOpcode();
26262 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26263 (N1.isUndef() ||
26264 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26265 // Get binop source ops, or just pass on the undef.
26266 SDValue Op00 = N0.getOperand(0);
26267 SDValue Op01 = N0.getOperand(1);
26268 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26269 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26270 // TODO: We might be able to relax the VT check but we don't currently
26271 // have any isBinOp() that has different result/ops VTs so play safe until
26272 // we have test coverage.
26273 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26274 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26275 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26276 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26277 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26278 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26279 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26280 SmallVectorImpl<int> &Mask, bool LeftOp,
26281 bool Commute) {
26282 SDValue InnerN = Commute ? N1 : N0;
26283 SDValue Op0 = LeftOp ? Op00 : Op01;
26284 SDValue Op1 = LeftOp ? Op10 : Op11;
26285 if (Commute)
26286 std::swap(Op0, Op1);
26287 // Only accept the merged shuffle if we don't introduce undef elements,
26288 // or the inner shuffle already contained undef elements.
26289 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26290 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26291 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26292 Mask) &&
26293 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26294 llvm::none_of(Mask, [](int M) { return M < 0; }));
26295 };
26296
26297 // Ensure we don't increase the number of shuffles - we must merge a
26298 // shuffle from at least one of the LHS and RHS ops.
26299 bool MergedLeft = false;
26300 SDValue LeftSV0, LeftSV1;
26301 SmallVector<int, 4> LeftMask;
26302 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26303 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26304 MergedLeft = true;
26305 } else {
26306 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26307 LeftSV0 = Op00, LeftSV1 = Op10;
26308 }
26309
26310 bool MergedRight = false;
26311 SDValue RightSV0, RightSV1;
26312 SmallVector<int, 4> RightMask;
26313 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26314 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26315 MergedRight = true;
26316 } else {
26317 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26318 RightSV0 = Op01, RightSV1 = Op11;
26319 }
26320
26321 if (MergedLeft || MergedRight) {
26322 SDLoc DL(N);
26324 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26325 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26327 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26328 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26329 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26330 }
26331 }
26332 }
26333 }
26334
26335 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26336 return V;
26337
26338 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26339 // Perform this really late, because it could eliminate knowledge
26340 // of undef elements created by this shuffle.
26341 if (Level < AfterLegalizeTypes)
26342 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26343 LegalOperations))
26344 return V;
26345
26346 return SDValue();
26347}
26348
26349SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26350 EVT VT = N->getValueType(0);
26351 if (!VT.isFixedLengthVector())
26352 return SDValue();
26353
26354 // Try to convert a scalar binop with an extracted vector element to a vector
26355 // binop. This is intended to reduce potentially expensive register moves.
26356 // TODO: Check if both operands are extracted.
26357 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26358 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26359 SDValue Scalar = N->getOperand(0);
26360 unsigned Opcode = Scalar.getOpcode();
26361 EVT VecEltVT = VT.getScalarType();
26362 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26363 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26364 Scalar.getOperand(0).getValueType() == VecEltVT &&
26365 Scalar.getOperand(1).getValueType() == VecEltVT &&
26366 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26367 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26368 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26369 // Match an extract element and get a shuffle mask equivalent.
26370 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26371
26372 for (int i : {0, 1}) {
26373 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26374 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26375 SDValue EE = Scalar.getOperand(i);
26376 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26377 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26378 EE.getOperand(0).getValueType() == VT &&
26379 isa<ConstantSDNode>(EE.getOperand(1))) {
26380 // Mask = {ExtractIndex, undef, undef....}
26381 ShufMask[0] = EE.getConstantOperandVal(1);
26382 // Make sure the shuffle is legal if we are crossing lanes.
26383 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26384 SDLoc DL(N);
26385 SDValue V[] = {EE.getOperand(0),
26386 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26387 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26388 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26389 ShufMask);
26390 }
26391 }
26392 }
26393 }
26394
26395 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26396 // with a VECTOR_SHUFFLE and possible truncate.
26397 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26398 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26399 return SDValue();
26400
26401 // If we have an implicit truncate, truncate here if it is legal.
26402 if (VecEltVT != Scalar.getValueType() &&
26403 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26404 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26405 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26406 }
26407
26408 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26409 if (!ExtIndexC)
26410 return SDValue();
26411
26412 SDValue SrcVec = Scalar.getOperand(0);
26413 EVT SrcVT = SrcVec.getValueType();
26414 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26415 unsigned VTNumElts = VT.getVectorNumElements();
26416 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26417 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26418 SmallVector<int, 8> Mask(SrcNumElts, -1);
26419 Mask[0] = ExtIndexC->getZExtValue();
26420 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26421 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26422 if (!LegalShuffle)
26423 return SDValue();
26424
26425 // If the initial vector is the same size, the shuffle is the result.
26426 if (VT == SrcVT)
26427 return LegalShuffle;
26428
26429 // If not, shorten the shuffled vector.
26430 if (VTNumElts != SrcNumElts) {
26431 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26432 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26433 SrcVT.getVectorElementType(), VTNumElts);
26434 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26435 ZeroIdx);
26436 }
26437 }
26438
26439 return SDValue();
26440}
26441
26442SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26443 EVT VT = N->getValueType(0);
26444 SDValue N0 = N->getOperand(0);
26445 SDValue N1 = N->getOperand(1);
26446 SDValue N2 = N->getOperand(2);
26447 uint64_t InsIdx = N->getConstantOperandVal(2);
26448
26449 // If inserting an UNDEF, just return the original vector.
26450 if (N1.isUndef())
26451 return N0;
26452
26453 // If this is an insert of an extracted vector into an undef vector, we can
26454 // just use the input to the extract if the types match, and can simplify
26455 // in some cases even if they don't.
26456 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26457 N1.getOperand(1) == N2) {
26458 EVT SrcVT = N1.getOperand(0).getValueType();
26459 if (SrcVT == VT)
26460 return N1.getOperand(0);
26461 // TODO: To remove the zero check, need to adjust the offset to
26462 // a multiple of the new src type.
26463 if (isNullConstant(N2)) {
26464 if (VT.knownBitsGE(SrcVT) &&
26465 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
26466 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26467 VT, N0, N1.getOperand(0), N2);
26468 else if (VT.knownBitsLE(SrcVT) &&
26469 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
26471 VT, N1.getOperand(0), N2);
26472 }
26473 }
26474
26475 // Handle case where we've ended up inserting back into the source vector
26476 // we extracted the subvector from.
26477 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26478 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26479 N1.getOperand(1) == N2)
26480 return N0;
26481
26482 // Simplify scalar inserts into an undef vector:
26483 // insert_subvector undef, (splat X), N2 -> splat X
26484 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26485 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26486 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26487
26488 // If we are inserting a bitcast value into an undef, with the same
26489 // number of elements, just use the bitcast input of the extract.
26490 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26491 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26492 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26494 N1.getOperand(0).getOperand(1) == N2 &&
26496 VT.getVectorElementCount() &&
26498 VT.getSizeInBits()) {
26499 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26500 }
26501
26502 // If both N1 and N2 are bitcast values on which insert_subvector
26503 // would makes sense, pull the bitcast through.
26504 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26505 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26506 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26507 SDValue CN0 = N0.getOperand(0);
26508 SDValue CN1 = N1.getOperand(0);
26509 EVT CN0VT = CN0.getValueType();
26510 EVT CN1VT = CN1.getValueType();
26511 if (CN0VT.isVector() && CN1VT.isVector() &&
26512 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26514 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26515 CN0.getValueType(), CN0, CN1, N2);
26516 return DAG.getBitcast(VT, NewINSERT);
26517 }
26518 }
26519
26520 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26521 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26522 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26523 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26524 N0.getOperand(1).getValueType() == N1.getValueType() &&
26525 N0.getOperand(2) == N2)
26526 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26527 N1, N2);
26528
26529 // Eliminate an intermediate insert into an undef vector:
26530 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26531 // insert_subvector undef, X, 0
26532 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26533 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26534 isNullConstant(N2))
26535 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26536 N1.getOperand(1), N2);
26537
26538 // Push subvector bitcasts to the output, adjusting the index as we go.
26539 // insert_subvector(bitcast(v), bitcast(s), c1)
26540 // -> bitcast(insert_subvector(v, s, c2))
26541 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26542 N1.getOpcode() == ISD::BITCAST) {
26543 SDValue N0Src = peekThroughBitcasts(N0);
26544 SDValue N1Src = peekThroughBitcasts(N1);
26545 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26546 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26547 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26548 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26549 EVT NewVT;
26550 SDLoc DL(N);
26551 SDValue NewIdx;
26552 LLVMContext &Ctx = *DAG.getContext();
26553 ElementCount NumElts = VT.getVectorElementCount();
26554 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26555 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26556 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26557 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26558 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26559 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26560 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26561 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26562 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26563 NumElts.divideCoefficientBy(Scale));
26564 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26565 }
26566 }
26567 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26568 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26569 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26570 return DAG.getBitcast(VT, Res);
26571 }
26572 }
26573 }
26574
26575 // Canonicalize insert_subvector dag nodes.
26576 // Example:
26577 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26578 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26579 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26580 N1.getValueType() == N0.getOperand(1).getValueType()) {
26581 unsigned OtherIdx = N0.getConstantOperandVal(2);
26582 if (InsIdx < OtherIdx) {
26583 // Swap nodes.
26584 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26585 N0.getOperand(0), N1, N2);
26586 AddToWorklist(NewOp.getNode());
26587 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26588 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26589 }
26590 }
26591
26592 // If the input vector is a concatenation, and the insert replaces
26593 // one of the pieces, we can optimize into a single concat_vectors.
26594 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26595 N0.getOperand(0).getValueType() == N1.getValueType() &&
26598 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26599 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26600 Ops[InsIdx / Factor] = N1;
26601 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26602 }
26603
26604 // Simplify source operands based on insertion.
26606 return SDValue(N, 0);
26607
26608 return SDValue();
26609}
26610
26611SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26612 SDValue N0 = N->getOperand(0);
26613
26614 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26615 if (N0->getOpcode() == ISD::FP16_TO_FP)
26616 return N0->getOperand(0);
26617
26618 return SDValue();
26619}
26620
26621SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26622 auto Op = N->getOpcode();
26624 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26625 SDValue N0 = N->getOperand(0);
26626
26627 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26628 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26629 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26631 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26632 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26633 }
26634 }
26635
26636 // Sometimes constants manage to survive very late in the pipeline, e.g.,
26637 // because they are wrapped inside the <1 x f16> type. Try one last time to
26638 // get rid of them.
26639 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
26640 N->getValueType(0), {N0});
26641 return Folded;
26642}
26643
26644SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26645 SDValue N0 = N->getOperand(0);
26646
26647 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26648 if (N0->getOpcode() == ISD::BF16_TO_FP)
26649 return N0->getOperand(0);
26650
26651 return SDValue();
26652}
26653
26654SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26655 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26656 return visitFP16_TO_FP(N);
26657}
26658
26659SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26660 SDValue N0 = N->getOperand(0);
26661 EVT VT = N0.getValueType();
26662 unsigned Opcode = N->getOpcode();
26663
26664 // VECREDUCE over 1-element vector is just an extract.
26665 if (VT.getVectorElementCount().isScalar()) {
26666 SDLoc dl(N);
26667 SDValue Res =
26669 DAG.getVectorIdxConstant(0, dl));
26670 if (Res.getValueType() != N->getValueType(0))
26671 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26672 return Res;
26673 }
26674
26675 // On an boolean vector an and/or reduction is the same as a umin/umax
26676 // reduction. Convert them if the latter is legal while the former isn't.
26677 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26678 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26680 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26681 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26683 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26684 }
26685
26686 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26687 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26688 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26689 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26690 SDValue Vec = N0.getOperand(0);
26691 SDValue Subvec = N0.getOperand(1);
26692 if ((Opcode == ISD::VECREDUCE_OR &&
26693 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26694 (Opcode == ISD::VECREDUCE_AND &&
26695 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26696 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26697 }
26698
26699 return SDValue();
26700}
26701
26702SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26703 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26704
26705 // FSUB -> FMA combines:
26706 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26707 AddToWorklist(Fused.getNode());
26708 return Fused;
26709 }
26710 return SDValue();
26711}
26712
26713SDValue DAGCombiner::visitVPOp(SDNode *N) {
26714
26715 if (N->getOpcode() == ISD::VP_GATHER)
26716 if (SDValue SD = visitVPGATHER(N))
26717 return SD;
26718
26719 if (N->getOpcode() == ISD::VP_SCATTER)
26720 if (SDValue SD = visitVPSCATTER(N))
26721 return SD;
26722
26723 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26724 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26725 return SD;
26726
26727 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26728 if (SDValue SD = visitVP_STRIDED_STORE(N))
26729 return SD;
26730
26731 // VP operations in which all vector elements are disabled - either by
26732 // determining that the mask is all false or that the EVL is 0 - can be
26733 // eliminated.
26734 bool AreAllEltsDisabled = false;
26735 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26736 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26737 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26738 AreAllEltsDisabled |=
26739 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26740
26741 // This is the only generic VP combine we support for now.
26742 if (!AreAllEltsDisabled) {
26743 switch (N->getOpcode()) {
26744 case ISD::VP_FADD:
26745 return visitVP_FADD(N);
26746 case ISD::VP_FSUB:
26747 return visitVP_FSUB(N);
26748 case ISD::VP_FMA:
26749 return visitFMA<VPMatchContext>(N);
26750 case ISD::VP_SELECT:
26751 return visitVP_SELECT(N);
26752 case ISD::VP_MUL:
26753 return visitMUL<VPMatchContext>(N);
26754 default:
26755 break;
26756 }
26757 return SDValue();
26758 }
26759
26760 // Binary operations can be replaced by UNDEF.
26761 if (ISD::isVPBinaryOp(N->getOpcode()))
26762 return DAG.getUNDEF(N->getValueType(0));
26763
26764 // VP Memory operations can be replaced by either the chain (stores) or the
26765 // chain + undef (loads).
26766 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26767 if (MemSD->writeMem())
26768 return MemSD->getChain();
26769 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26770 }
26771
26772 // Reduction operations return the start operand when no elements are active.
26773 if (ISD::isVPReduction(N->getOpcode()))
26774 return N->getOperand(0);
26775
26776 return SDValue();
26777}
26778
26779SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26780 SDValue Chain = N->getOperand(0);
26781 SDValue Ptr = N->getOperand(1);
26782 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26783
26784 // Check if the memory, where FP state is written to, is used only in a single
26785 // load operation.
26786 LoadSDNode *LdNode = nullptr;
26787 for (auto *U : Ptr->uses()) {
26788 if (U == N)
26789 continue;
26790 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26791 if (LdNode && LdNode != Ld)
26792 return SDValue();
26793 LdNode = Ld;
26794 continue;
26795 }
26796 return SDValue();
26797 }
26798 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26799 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26801 return SDValue();
26802
26803 // Check if the loaded value is used only in a store operation.
26804 StoreSDNode *StNode = nullptr;
26805 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26806 SDUse &U = I.getUse();
26807 if (U.getResNo() == 0) {
26808 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26809 if (StNode)
26810 return SDValue();
26811 StNode = St;
26812 } else {
26813 return SDValue();
26814 }
26815 }
26816 }
26817 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26818 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26819 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26820 return SDValue();
26821
26822 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26823 // environment.
26824 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26825 StNode->getMemOperand());
26826 CombineTo(StNode, Res, false);
26827 return Res;
26828}
26829
26830SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26831 SDValue Chain = N->getOperand(0);
26832 SDValue Ptr = N->getOperand(1);
26833 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26834
26835 // Check if the address of FP state is used also in a store operation only.
26836 StoreSDNode *StNode = nullptr;
26837 for (auto *U : Ptr->uses()) {
26838 if (U == N)
26839 continue;
26840 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26841 if (StNode && StNode != St)
26842 return SDValue();
26843 StNode = St;
26844 continue;
26845 }
26846 return SDValue();
26847 }
26848 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26849 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26850 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26851 return SDValue();
26852
26853 // Check if the stored value is loaded from some location and the loaded
26854 // value is used only in the store operation.
26855 SDValue StValue = StNode->getValue();
26856 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26857 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26858 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26859 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26860 return SDValue();
26861
26862 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26863 // environment.
26864 SDValue Res =
26865 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26866 LdNode->getMemOperand());
26867 return Res;
26868}
26869
26870/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26871/// with the destination vector and a zero vector.
26872/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26873/// vector_shuffle V, Zero, <0, 4, 2, 4>
26874SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26875 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26876
26877 EVT VT = N->getValueType(0);
26878 SDValue LHS = N->getOperand(0);
26879 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26880 SDLoc DL(N);
26881
26882 // Make sure we're not running after operation legalization where it
26883 // may have custom lowered the vector shuffles.
26884 if (LegalOperations)
26885 return SDValue();
26886
26887 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26888 return SDValue();
26889
26890 EVT RVT = RHS.getValueType();
26891 unsigned NumElts = RHS.getNumOperands();
26892
26893 // Attempt to create a valid clear mask, splitting the mask into
26894 // sub elements and checking to see if each is
26895 // all zeros or all ones - suitable for shuffle masking.
26896 auto BuildClearMask = [&](int Split) {
26897 int NumSubElts = NumElts * Split;
26898 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26899
26900 SmallVector<int, 8> Indices;
26901 for (int i = 0; i != NumSubElts; ++i) {
26902 int EltIdx = i / Split;
26903 int SubIdx = i % Split;
26904 SDValue Elt = RHS.getOperand(EltIdx);
26905 // X & undef --> 0 (not undef). So this lane must be converted to choose
26906 // from the zero constant vector (same as if the element had all 0-bits).
26907 if (Elt.isUndef()) {
26908 Indices.push_back(i + NumSubElts);
26909 continue;
26910 }
26911
26912 APInt Bits;
26913 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26914 Bits = Cst->getAPIntValue();
26915 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26916 Bits = CstFP->getValueAPF().bitcastToAPInt();
26917 else
26918 return SDValue();
26919
26920 // Extract the sub element from the constant bit mask.
26921 if (DAG.getDataLayout().isBigEndian())
26922 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26923 else
26924 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26925
26926 if (Bits.isAllOnes())
26927 Indices.push_back(i);
26928 else if (Bits == 0)
26929 Indices.push_back(i + NumSubElts);
26930 else
26931 return SDValue();
26932 }
26933
26934 // Let's see if the target supports this vector_shuffle.
26935 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26936 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26937 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26938 return SDValue();
26939
26940 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26941 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26942 DAG.getBitcast(ClearVT, LHS),
26943 Zero, Indices));
26944 };
26945
26946 // Determine maximum split level (byte level masking).
26947 int MaxSplit = 1;
26948 if (RVT.getScalarSizeInBits() % 8 == 0)
26949 MaxSplit = RVT.getScalarSizeInBits() / 8;
26950
26951 for (int Split = 1; Split <= MaxSplit; ++Split)
26952 if (RVT.getScalarSizeInBits() % Split == 0)
26953 if (SDValue S = BuildClearMask(Split))
26954 return S;
26955
26956 return SDValue();
26957}
26958
26959/// If a vector binop is performed on splat values, it may be profitable to
26960/// extract, scalarize, and insert/splat.
26962 const SDLoc &DL) {
26963 SDValue N0 = N->getOperand(0);
26964 SDValue N1 = N->getOperand(1);
26965 unsigned Opcode = N->getOpcode();
26966 EVT VT = N->getValueType(0);
26967 EVT EltVT = VT.getVectorElementType();
26968 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26969
26970 // TODO: Remove/replace the extract cost check? If the elements are available
26971 // as scalars, then there may be no extract cost. Should we ask if
26972 // inserting a scalar back into a vector is cheap instead?
26973 int Index0, Index1;
26974 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26975 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26976 // Extract element from splat_vector should be free.
26977 // TODO: use DAG.isSplatValue instead?
26978 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26980 if (!Src0 || !Src1 || Index0 != Index1 ||
26981 Src0.getValueType().getVectorElementType() != EltVT ||
26982 Src1.getValueType().getVectorElementType() != EltVT ||
26983 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26984 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26985 return SDValue();
26986
26987 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26988 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26989 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26990 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26991
26992 // If all lanes but 1 are undefined, no need to splat the scalar result.
26993 // TODO: Keep track of undefs and use that info in the general case.
26994 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26995 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26996 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26997 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26998 // build_vec ..undef, (bo X, Y), undef...
27000 Ops[Index0] = ScalarBO;
27001 return DAG.getBuildVector(VT, DL, Ops);
27002 }
27003
27004 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27005 return DAG.getSplat(VT, DL, ScalarBO);
27006}
27007
27008/// Visit a vector cast operation, like FP_EXTEND.
27009SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27010 EVT VT = N->getValueType(0);
27011 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27012 EVT EltVT = VT.getVectorElementType();
27013 unsigned Opcode = N->getOpcode();
27014
27015 SDValue N0 = N->getOperand(0);
27016 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27017
27018 // TODO: promote operation might be also good here?
27019 int Index0;
27020 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27021 if (Src0 &&
27022 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27023 TLI.isExtractVecEltCheap(VT, Index0)) &&
27024 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27025 TLI.preferScalarizeSplat(N)) {
27026 EVT SrcVT = N0.getValueType();
27027 EVT SrcEltVT = SrcVT.getVectorElementType();
27028 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27029 SDValue Elt =
27030 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27031 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27032 if (VT.isScalableVector())
27033 return DAG.getSplatVector(VT, DL, ScalarBO);
27035 return DAG.getBuildVector(VT, DL, Ops);
27036 }
27037
27038 return SDValue();
27039}
27040
27041/// Visit a binary vector operation, like ADD.
27042SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27043 EVT VT = N->getValueType(0);
27044 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27045
27046 SDValue LHS = N->getOperand(0);
27047 SDValue RHS = N->getOperand(1);
27048 unsigned Opcode = N->getOpcode();
27049 SDNodeFlags Flags = N->getFlags();
27050
27051 // Move unary shuffles with identical masks after a vector binop:
27052 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27053 // --> shuffle (VBinOp A, B), Undef, Mask
27054 // This does not require type legality checks because we are creating the
27055 // same types of operations that are in the original sequence. We do have to
27056 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27057 // though. This code is adapted from the identical transform in instcombine.
27058 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27059 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27060 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27061 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27062 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27063 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27064 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27065 RHS.getOperand(0), Flags);
27066 SDValue UndefV = LHS.getOperand(1);
27067 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27068 }
27069
27070 // Try to sink a splat shuffle after a binop with a uniform constant.
27071 // This is limited to cases where neither the shuffle nor the constant have
27072 // undefined elements because that could be poison-unsafe or inhibit
27073 // demanded elements analysis. It is further limited to not change a splat
27074 // of an inserted scalar because that may be optimized better by
27075 // load-folding or other target-specific behaviors.
27076 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27077 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27078 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27079 // binop (splat X), (splat C) --> splat (binop X, C)
27080 SDValue X = Shuf0->getOperand(0);
27081 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27082 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27083 Shuf0->getMask());
27084 }
27085 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27086 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27087 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27088 // binop (splat C), (splat X) --> splat (binop C, X)
27089 SDValue X = Shuf1->getOperand(0);
27090 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27091 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27092 Shuf1->getMask());
27093 }
27094 }
27095
27096 // The following pattern is likely to emerge with vector reduction ops. Moving
27097 // the binary operation ahead of insertion may allow using a narrower vector
27098 // instruction that has better performance than the wide version of the op:
27099 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27100 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27101 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27102 LHS.getOperand(2) == RHS.getOperand(2) &&
27103 (LHS.hasOneUse() || RHS.hasOneUse())) {
27104 SDValue X = LHS.getOperand(1);
27105 SDValue Y = RHS.getOperand(1);
27106 SDValue Z = LHS.getOperand(2);
27107 EVT NarrowVT = X.getValueType();
27108 if (NarrowVT == Y.getValueType() &&
27109 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27110 LegalOperations)) {
27111 // (binop undef, undef) may not return undef, so compute that result.
27112 SDValue VecC =
27113 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27114 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27115 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27116 }
27117 }
27118
27119 // Make sure all but the first op are undef or constant.
27120 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27121 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27122 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27123 return Op.isUndef() ||
27124 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27125 });
27126 };
27127
27128 // The following pattern is likely to emerge with vector reduction ops. Moving
27129 // the binary operation ahead of the concat may allow using a narrower vector
27130 // instruction that has better performance than the wide version of the op:
27131 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27132 // concat (VBinOp X, Y), VecC
27133 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27134 (LHS.hasOneUse() || RHS.hasOneUse())) {
27135 EVT NarrowVT = LHS.getOperand(0).getValueType();
27136 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27137 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27138 unsigned NumOperands = LHS.getNumOperands();
27139 SmallVector<SDValue, 4> ConcatOps;
27140 for (unsigned i = 0; i != NumOperands; ++i) {
27141 // This constant fold for operands 1 and up.
27142 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27143 RHS.getOperand(i)));
27144 }
27145
27146 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27147 }
27148 }
27149
27150 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
27151 return V;
27152
27153 return SDValue();
27154}
27155
27156SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27157 SDValue N2) {
27158 assert(N0.getOpcode() == ISD::SETCC &&
27159 "First argument must be a SetCC node!");
27160
27161 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27162 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27163
27164 // If we got a simplified select_cc node back from SimplifySelectCC, then
27165 // break it down into a new SETCC node, and a new SELECT node, and then return
27166 // the SELECT node, since we were called with a SELECT node.
27167 if (SCC.getNode()) {
27168 // Check to see if we got a select_cc back (to turn into setcc/select).
27169 // Otherwise, just return whatever node we got back, like fabs.
27170 if (SCC.getOpcode() == ISD::SELECT_CC) {
27171 const SDNodeFlags Flags = N0->getFlags();
27173 N0.getValueType(),
27174 SCC.getOperand(0), SCC.getOperand(1),
27175 SCC.getOperand(4), Flags);
27176 AddToWorklist(SETCC.getNode());
27177 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27178 SCC.getOperand(2), SCC.getOperand(3));
27179 SelectNode->setFlags(Flags);
27180 return SelectNode;
27181 }
27182
27183 return SCC;
27184 }
27185 return SDValue();
27186}
27187
27188/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27189/// being selected between, see if we can simplify the select. Callers of this
27190/// should assume that TheSelect is deleted if this returns true. As such, they
27191/// should return the appropriate thing (e.g. the node) back to the top-level of
27192/// the DAG combiner loop to avoid it being looked at.
27193bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27194 SDValue RHS) {
27195 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27196 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27197 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27198 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27199 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27200 SDValue Sqrt = RHS;
27202 SDValue CmpLHS;
27203 const ConstantFPSDNode *Zero = nullptr;
27204
27205 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27206 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27207 CmpLHS = TheSelect->getOperand(0);
27208 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27209 } else {
27210 // SELECT or VSELECT
27211 SDValue Cmp = TheSelect->getOperand(0);
27212 if (Cmp.getOpcode() == ISD::SETCC) {
27213 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27214 CmpLHS = Cmp.getOperand(0);
27215 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27216 }
27217 }
27218 if (Zero && Zero->isZero() &&
27219 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27220 CC == ISD::SETULT || CC == ISD::SETLT)) {
27221 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27222 CombineTo(TheSelect, Sqrt);
27223 return true;
27224 }
27225 }
27226 }
27227 // Cannot simplify select with vector condition
27228 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27229
27230 // If this is a select from two identical things, try to pull the operation
27231 // through the select.
27232 if (LHS.getOpcode() != RHS.getOpcode() ||
27233 !LHS.hasOneUse() || !RHS.hasOneUse())
27234 return false;
27235
27236 // If this is a load and the token chain is identical, replace the select
27237 // of two loads with a load through a select of the address to load from.
27238 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27239 // constants have been dropped into the constant pool.
27240 if (LHS.getOpcode() == ISD::LOAD) {
27241 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27242 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27243
27244 // Token chains must be identical.
27245 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27246 // Do not let this transformation reduce the number of volatile loads.
27247 // Be conservative for atomics for the moment
27248 // TODO: This does appear to be legal for unordered atomics (see D66309)
27249 !LLD->isSimple() || !RLD->isSimple() ||
27250 // FIXME: If either is a pre/post inc/dec load,
27251 // we'd need to split out the address adjustment.
27252 LLD->isIndexed() || RLD->isIndexed() ||
27253 // If this is an EXTLOAD, the VT's must match.
27254 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27255 // If this is an EXTLOAD, the kind of extension must match.
27256 (LLD->getExtensionType() != RLD->getExtensionType() &&
27257 // The only exception is if one of the extensions is anyext.
27258 LLD->getExtensionType() != ISD::EXTLOAD &&
27259 RLD->getExtensionType() != ISD::EXTLOAD) ||
27260 // FIXME: this discards src value information. This is
27261 // over-conservative. It would be beneficial to be able to remember
27262 // both potential memory locations. Since we are discarding
27263 // src value info, don't do the transformation if the memory
27264 // locations are not in the default address space.
27265 LLD->getPointerInfo().getAddrSpace() != 0 ||
27266 RLD->getPointerInfo().getAddrSpace() != 0 ||
27267 // We can't produce a CMOV of a TargetFrameIndex since we won't
27268 // generate the address generation required.
27271 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27272 LLD->getBasePtr().getValueType()))
27273 return false;
27274
27275 // The loads must not depend on one another.
27276 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27277 return false;
27278
27279 // Check that the select condition doesn't reach either load. If so,
27280 // folding this will induce a cycle into the DAG. If not, this is safe to
27281 // xform, so create a select of the addresses.
27282
27285
27286 // Always fail if LLD and RLD are not independent. TheSelect is a
27287 // predecessor to all Nodes in question so we need not search past it.
27288
27289 Visited.insert(TheSelect);
27290 Worklist.push_back(LLD);
27291 Worklist.push_back(RLD);
27292
27293 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27294 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27295 return false;
27296
27297 SDValue Addr;
27298 if (TheSelect->getOpcode() == ISD::SELECT) {
27299 // We cannot do this optimization if any pair of {RLD, LLD} is a
27300 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27301 // Loads, we only need to check if CondNode is a successor to one of the
27302 // loads. We can further avoid this if there's no use of their chain
27303 // value.
27304 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27305 Worklist.push_back(CondNode);
27306
27307 if ((LLD->hasAnyUseOfValue(1) &&
27308 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27309 (RLD->hasAnyUseOfValue(1) &&
27310 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27311 return false;
27312
27313 Addr = DAG.getSelect(SDLoc(TheSelect),
27314 LLD->getBasePtr().getValueType(),
27315 TheSelect->getOperand(0), LLD->getBasePtr(),
27316 RLD->getBasePtr());
27317 } else { // Otherwise SELECT_CC
27318 // We cannot do this optimization if any pair of {RLD, LLD} is a
27319 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27320 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27321 // one of the loads. We can further avoid this if there's no use of their
27322 // chain value.
27323
27324 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27325 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27326 Worklist.push_back(CondLHS);
27327 Worklist.push_back(CondRHS);
27328
27329 if ((LLD->hasAnyUseOfValue(1) &&
27330 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27331 (RLD->hasAnyUseOfValue(1) &&
27332 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27333 return false;
27334
27335 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27336 LLD->getBasePtr().getValueType(),
27337 TheSelect->getOperand(0),
27338 TheSelect->getOperand(1),
27339 LLD->getBasePtr(), RLD->getBasePtr(),
27340 TheSelect->getOperand(4));
27341 }
27342
27343 SDValue Load;
27344 // It is safe to replace the two loads if they have different alignments,
27345 // but the new load must be the minimum (most restrictive) alignment of the
27346 // inputs.
27347 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27348 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27349 if (!RLD->isInvariant())
27350 MMOFlags &= ~MachineMemOperand::MOInvariant;
27351 if (!RLD->isDereferenceable())
27352 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27353 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27354 // FIXME: Discards pointer and AA info.
27355 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27356 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27357 MMOFlags);
27358 } else {
27359 // FIXME: Discards pointer and AA info.
27360 Load = DAG.getExtLoad(
27362 : LLD->getExtensionType(),
27363 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27364 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27365 }
27366
27367 // Users of the select now use the result of the load.
27368 CombineTo(TheSelect, Load);
27369
27370 // Users of the old loads now use the new load's chain. We know the
27371 // old-load value is dead now.
27372 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27373 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27374 return true;
27375 }
27376
27377 return false;
27378}
27379
27380/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27381/// bitwise 'and'.
27382SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27383 SDValue N1, SDValue N2, SDValue N3,
27384 ISD::CondCode CC) {
27385 // If this is a select where the false operand is zero and the compare is a
27386 // check of the sign bit, see if we can perform the "gzip trick":
27387 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27388 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27389 EVT XType = N0.getValueType();
27390 EVT AType = N2.getValueType();
27391 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27392 return SDValue();
27393
27394 // If the comparison is testing for a positive value, we have to invert
27395 // the sign bit mask, so only do that transform if the target has a bitwise
27396 // 'and not' instruction (the invert is free).
27397 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27398 // (X > -1) ? A : 0
27399 // (X > 0) ? X : 0 <-- This is canonical signed max.
27400 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27401 return SDValue();
27402 } else if (CC == ISD::SETLT) {
27403 // (X < 0) ? A : 0
27404 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27405 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27406 return SDValue();
27407 } else {
27408 return SDValue();
27409 }
27410
27411 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27412 // constant.
27413 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27414 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27415 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27416 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27417 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27418 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27419 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27420 AddToWorklist(Shift.getNode());
27421
27422 if (XType.bitsGT(AType)) {
27423 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27424 AddToWorklist(Shift.getNode());
27425 }
27426
27427 if (CC == ISD::SETGT)
27428 Shift = DAG.getNOT(DL, Shift, AType);
27429
27430 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27431 }
27432 }
27433
27434 unsigned ShCt = XType.getSizeInBits() - 1;
27435 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27436 return SDValue();
27437
27438 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27439 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27440 AddToWorklist(Shift.getNode());
27441
27442 if (XType.bitsGT(AType)) {
27443 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27444 AddToWorklist(Shift.getNode());
27445 }
27446
27447 if (CC == ISD::SETGT)
27448 Shift = DAG.getNOT(DL, Shift, AType);
27449
27450 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27451}
27452
27453// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27454SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27455 SDValue N0 = N->getOperand(0);
27456 SDValue N1 = N->getOperand(1);
27457 SDValue N2 = N->getOperand(2);
27458 SDLoc DL(N);
27459
27460 unsigned BinOpc = N1.getOpcode();
27461 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27462 (N1.getResNo() != N2.getResNo()))
27463 return SDValue();
27464
27465 // The use checks are intentionally on SDNode because we may be dealing
27466 // with opcodes that produce more than one SDValue.
27467 // TODO: Do we really need to check N0 (the condition operand of the select)?
27468 // But removing that clause could cause an infinite loop...
27469 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27470 return SDValue();
27471
27472 // Binops may include opcodes that return multiple values, so all values
27473 // must be created/propagated from the newly created binops below.
27474 SDVTList OpVTs = N1->getVTList();
27475
27476 // Fold select(cond, binop(x, y), binop(z, y))
27477 // --> binop(select(cond, x, z), y)
27478 if (N1.getOperand(1) == N2.getOperand(1)) {
27479 SDValue N10 = N1.getOperand(0);
27480 SDValue N20 = N2.getOperand(0);
27481 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27482 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27483 NewBinOp->setFlags(N1->getFlags());
27484 NewBinOp->intersectFlagsWith(N2->getFlags());
27485 return SDValue(NewBinOp.getNode(), N1.getResNo());
27486 }
27487
27488 // Fold select(cond, binop(x, y), binop(x, z))
27489 // --> binop(x, select(cond, y, z))
27490 if (N1.getOperand(0) == N2.getOperand(0)) {
27491 SDValue N11 = N1.getOperand(1);
27492 SDValue N21 = N2.getOperand(1);
27493 // Second op VT might be different (e.g. shift amount type)
27494 if (N11.getValueType() == N21.getValueType()) {
27495 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27496 SDValue NewBinOp =
27497 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27498 NewBinOp->setFlags(N1->getFlags());
27499 NewBinOp->intersectFlagsWith(N2->getFlags());
27500 return SDValue(NewBinOp.getNode(), N1.getResNo());
27501 }
27502 }
27503
27504 // TODO: Handle isCommutativeBinOp patterns as well?
27505 return SDValue();
27506}
27507
27508// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27509SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27510 SDValue N0 = N->getOperand(0);
27511 EVT VT = N->getValueType(0);
27512 bool IsFabs = N->getOpcode() == ISD::FABS;
27513 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27514
27515 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27516 return SDValue();
27517
27518 SDValue Int = N0.getOperand(0);
27519 EVT IntVT = Int.getValueType();
27520
27521 // The operand to cast should be integer.
27522 if (!IntVT.isInteger() || IntVT.isVector())
27523 return SDValue();
27524
27525 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27526 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27527 APInt SignMask;
27528 if (N0.getValueType().isVector()) {
27529 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27530 // 0x7f...) per element and splat it.
27532 if (IsFabs)
27533 SignMask = ~SignMask;
27534 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27535 } else {
27536 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27537 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27538 if (IsFabs)
27539 SignMask = ~SignMask;
27540 }
27541 SDLoc DL(N0);
27542 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27543 DAG.getConstant(SignMask, DL, IntVT));
27544 AddToWorklist(Int.getNode());
27545 return DAG.getBitcast(VT, Int);
27546}
27547
27548/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27549/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27550/// in it. This may be a win when the constant is not otherwise available
27551/// because it replaces two constant pool loads with one.
27552SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27553 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27554 ISD::CondCode CC) {
27556 return SDValue();
27557
27558 // If we are before legalize types, we want the other legalization to happen
27559 // first (for example, to avoid messing with soft float).
27560 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27561 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27562 EVT VT = N2.getValueType();
27563 if (!TV || !FV || !TLI.isTypeLegal(VT))
27564 return SDValue();
27565
27566 // If a constant can be materialized without loads, this does not make sense.
27568 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27569 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27570 return SDValue();
27571
27572 // If both constants have multiple uses, then we won't need to do an extra
27573 // load. The values are likely around in registers for other users.
27574 if (!TV->hasOneUse() && !FV->hasOneUse())
27575 return SDValue();
27576
27577 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27578 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27579 Type *FPTy = Elts[0]->getType();
27580 const DataLayout &TD = DAG.getDataLayout();
27581
27582 // Create a ConstantArray of the two constants.
27583 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27584 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27585 TD.getPrefTypeAlign(FPTy));
27586 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27587
27588 // Get offsets to the 0 and 1 elements of the array, so we can select between
27589 // them.
27590 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27591 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27592 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27593 SDValue Cond =
27594 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27595 AddToWorklist(Cond.getNode());
27596 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27597 AddToWorklist(CstOffset.getNode());
27598 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27599 AddToWorklist(CPIdx.getNode());
27600 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27602 DAG.getMachineFunction()), Alignment);
27603}
27604
27605/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27606/// where 'cond' is the comparison specified by CC.
27607SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27609 bool NotExtCompare) {
27610 // (x ? y : y) -> y.
27611 if (N2 == N3) return N2;
27612
27613 EVT CmpOpVT = N0.getValueType();
27614 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27615 EVT VT = N2.getValueType();
27616 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27617 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27618 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27619
27620 // Determine if the condition we're dealing with is constant.
27621 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27622 AddToWorklist(SCC.getNode());
27623 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27624 // fold select_cc true, x, y -> x
27625 // fold select_cc false, x, y -> y
27626 return !(SCCC->isZero()) ? N2 : N3;
27627 }
27628 }
27629
27630 if (SDValue V =
27631 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27632 return V;
27633
27634 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27635 return V;
27636
27637 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27638 // where y is has a single bit set.
27639 // A plaintext description would be, we can turn the SELECT_CC into an AND
27640 // when the condition can be materialized as an all-ones register. Any
27641 // single bit-test can be materialized as an all-ones register with
27642 // shift-left and shift-right-arith.
27643 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27644 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27645 SDValue AndLHS = N0->getOperand(0);
27646 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27647 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27648 // Shift the tested bit over the sign bit.
27649 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27650 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27651 unsigned ShCt = AndMask.getBitWidth() - 1;
27652 SDValue ShlAmt =
27653 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27654 getShiftAmountTy(AndLHS.getValueType()));
27655 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27656
27657 // Now arithmetic right shift it all the way over, so the result is
27658 // either all-ones, or zero.
27659 SDValue ShrAmt =
27660 DAG.getConstant(ShCt, SDLoc(Shl),
27662 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27663
27664 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27665 }
27666 }
27667 }
27668
27669 // fold select C, 16, 0 -> shl C, 4
27670 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27671 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27672
27673 if ((Fold || Swap) &&
27674 TLI.getBooleanContents(CmpOpVT) ==
27676 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27677
27678 if (Swap) {
27679 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27680 std::swap(N2C, N3C);
27681 }
27682
27683 // If the caller doesn't want us to simplify this into a zext of a compare,
27684 // don't do it.
27685 if (NotExtCompare && N2C->isOne())
27686 return SDValue();
27687
27688 SDValue Temp, SCC;
27689 // zext (setcc n0, n1)
27690 if (LegalTypes) {
27691 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27692 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27693 } else {
27694 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27695 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27696 }
27697
27698 AddToWorklist(SCC.getNode());
27699 AddToWorklist(Temp.getNode());
27700
27701 if (N2C->isOne())
27702 return Temp;
27703
27704 unsigned ShCt = N2C->getAPIntValue().logBase2();
27705 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27706 return SDValue();
27707
27708 // shl setcc result by log2 n2c
27709 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27710 DAG.getConstant(ShCt, SDLoc(Temp),
27712 }
27713
27714 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27715 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27716 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27717 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27718 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27719 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27720 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27721 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27722 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27723 SDValue ValueOnZero = N2;
27724 SDValue Count = N3;
27725 // If the condition is NE instead of E, swap the operands.
27726 if (CC == ISD::SETNE)
27727 std::swap(ValueOnZero, Count);
27728 // Check if the value on zero is a constant equal to the bits in the type.
27729 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27730 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27731 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27732 // legal, combine to just cttz.
27733 if ((Count.getOpcode() == ISD::CTTZ ||
27734 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27735 N0 == Count.getOperand(0) &&
27736 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27737 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27738 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27739 // legal, combine to just ctlz.
27740 if ((Count.getOpcode() == ISD::CTLZ ||
27741 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27742 N0 == Count.getOperand(0) &&
27743 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27744 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27745 }
27746 }
27747 }
27748
27749 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27750 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27751 if (!NotExtCompare && N1C && N2C && N3C &&
27752 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27753 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27754 (N1C->isZero() && CC == ISD::SETLT)) &&
27755 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27756 SDValue ASR = DAG.getNode(
27757 ISD::SRA, DL, CmpOpVT, N0,
27758 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27759 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27760 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27761 }
27762
27763 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27764 return S;
27765 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27766 return S;
27767
27768 return SDValue();
27769}
27770
27771/// This is a stub for TargetLowering::SimplifySetCC.
27772SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27773 ISD::CondCode Cond, const SDLoc &DL,
27774 bool foldBooleans) {
27776 DagCombineInfo(DAG, Level, false, this);
27777 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27778}
27779
27780/// Given an ISD::SDIV node expressing a divide by constant, return
27781/// a DAG expression to select that will generate the same value by multiplying
27782/// by a magic number.
27783/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27784SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27785 // when optimising for minimum size, we don't want to expand a div to a mul
27786 // and a shift.
27788 return SDValue();
27789
27791 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27792 for (SDNode *N : Built)
27793 AddToWorklist(N);
27794 return S;
27795 }
27796
27797 return SDValue();
27798}
27799
27800/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27801/// DAG expression that will generate the same value by right shifting.
27802SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27803 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27804 if (!C)
27805 return SDValue();
27806
27807 // Avoid division by zero.
27808 if (C->isZero())
27809 return SDValue();
27810
27812 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27813 for (SDNode *N : Built)
27814 AddToWorklist(N);
27815 return S;
27816 }
27817
27818 return SDValue();
27819}
27820
27821/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27822/// expression that will generate the same value by multiplying by a magic
27823/// number.
27824/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27825SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27826 // when optimising for minimum size, we don't want to expand a div to a mul
27827 // and a shift.
27829 return SDValue();
27830
27832 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27833 for (SDNode *N : Built)
27834 AddToWorklist(N);
27835 return S;
27836 }
27837
27838 return SDValue();
27839}
27840
27841/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27842/// return a DAG expression that will generate the same value.
27843SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27844 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27845 if (!C)
27846 return SDValue();
27847
27848 // Avoid division by zero.
27849 if (C->isZero())
27850 return SDValue();
27851
27853 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27854 for (SDNode *N : Built)
27855 AddToWorklist(N);
27856 return S;
27857 }
27858
27859 return SDValue();
27860}
27861
27862// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27863//
27864// Returns the node that represents `Log2(Op)`. This may create a new node. If
27865// we are unable to compute `Log2(Op)` its return `SDValue()`.
27866//
27867// All nodes will be created at `DL` and the output will be of type `VT`.
27868//
27869// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27870// `AssumeNonZero` if this function should simply assume (not require proving
27871// `Op` is non-zero).
27873 SDValue Op, unsigned Depth,
27874 bool AssumeNonZero) {
27875 assert(VT.isInteger() && "Only integer types are supported!");
27876
27877 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27878 while (true) {
27879 switch (V.getOpcode()) {
27880 case ISD::TRUNCATE:
27881 case ISD::ZERO_EXTEND:
27882 V = V.getOperand(0);
27883 break;
27884 default:
27885 return V;
27886 }
27887 }
27888 };
27889
27890 if (VT.isScalableVector())
27891 return SDValue();
27892
27893 Op = PeekThroughCastsAndTrunc(Op);
27894
27895 // Helper for determining whether a value is a power-2 constant scalar or a
27896 // vector of such elements.
27897 SmallVector<APInt> Pow2Constants;
27898 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27899 if (C->isZero() || C->isOpaque())
27900 return false;
27901 // TODO: We may also be able to support negative powers of 2 here.
27902 if (C->getAPIntValue().isPowerOf2()) {
27903 Pow2Constants.emplace_back(C->getAPIntValue());
27904 return true;
27905 }
27906 return false;
27907 };
27908
27909 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27910 if (!VT.isVector())
27911 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27912 // We need to create a build vector
27913 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
27914 return DAG.getSplat(VT, DL,
27915 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
27916 VT.getScalarType()));
27917 SmallVector<SDValue> Log2Ops;
27918 for (const APInt &Pow2 : Pow2Constants)
27919 Log2Ops.emplace_back(
27920 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27921 return DAG.getBuildVector(VT, DL, Log2Ops);
27922 }
27923
27924 if (Depth >= DAG.MaxRecursionDepth)
27925 return SDValue();
27926
27927 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27928 ToCast = PeekThroughCastsAndTrunc(ToCast);
27929 EVT CurVT = ToCast.getValueType();
27930 if (NewVT == CurVT)
27931 return ToCast;
27932
27933 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27934 return DAG.getBitcast(NewVT, ToCast);
27935
27936 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27937 };
27938
27939 // log2(X << Y) -> log2(X) + Y
27940 if (Op.getOpcode() == ISD::SHL) {
27941 // 1 << Y and X nuw/nsw << Y are all non-zero.
27942 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27943 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27944 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27945 Depth + 1, AssumeNonZero))
27946 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27947 CastToVT(VT, Op.getOperand(1)));
27948 }
27949
27950 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27951 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27952 Op.hasOneUse()) {
27953 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27954 Depth + 1, AssumeNonZero))
27955 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27956 Depth + 1, AssumeNonZero))
27957 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27958 }
27959
27960 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27961 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27962 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27963 Op.hasOneUse()) {
27964 // Use AssumeNonZero as false here. Otherwise we can hit case where
27965 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27966 if (SDValue LogX =
27967 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27968 /*AssumeNonZero*/ false))
27969 if (SDValue LogY =
27970 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27971 /*AssumeNonZero*/ false))
27972 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27973 }
27974
27975 return SDValue();
27976}
27977
27978/// Determines the LogBase2 value for a non-null input value using the
27979/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27980SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27981 bool KnownNonZero, bool InexpensiveOnly,
27982 std::optional<EVT> OutVT) {
27983 EVT VT = OutVT ? *OutVT : V.getValueType();
27984 SDValue InexpensiveLogBase2 =
27985 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27986 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27987 return InexpensiveLogBase2;
27988
27989 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27990 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27991 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27992 return LogBase2;
27993}
27994
27995/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27996/// For the reciprocal, we need to find the zero of the function:
27997/// F(X) = 1/X - A [which has a zero at X = 1/A]
27998/// =>
27999/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28000/// does not require additional intermediate precision]
28001/// For the last iteration, put numerator N into it to gain more precision:
28002/// Result = N X_i + X_i (N - N A X_i)
28003SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28004 SDNodeFlags Flags) {
28005 if (LegalDAG)
28006 return SDValue();
28007
28008 // TODO: Handle extended types?
28009 EVT VT = Op.getValueType();
28010 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28011 VT.getScalarType() != MVT::f64)
28012 return SDValue();
28013
28014 // If estimates are explicitly disabled for this function, we're done.
28016 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28017 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28018 return SDValue();
28019
28020 // Estimates may be explicitly enabled for this type with a custom number of
28021 // refinement steps.
28022 int Iterations = TLI.getDivRefinementSteps(VT, MF);
28023 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28024 AddToWorklist(Est.getNode());
28025
28026 SDLoc DL(Op);
28027 if (Iterations) {
28028 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28029
28030 // Newton iterations: Est = Est + Est (N - Arg * Est)
28031 // If this is the last iteration, also multiply by the numerator.
28032 for (int i = 0; i < Iterations; ++i) {
28033 SDValue MulEst = Est;
28034
28035 if (i == Iterations - 1) {
28036 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28037 AddToWorklist(MulEst.getNode());
28038 }
28039
28040 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28041 AddToWorklist(NewEst.getNode());
28042
28043 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28044 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28045 AddToWorklist(NewEst.getNode());
28046
28047 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28048 AddToWorklist(NewEst.getNode());
28049
28050 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28051 AddToWorklist(Est.getNode());
28052 }
28053 } else {
28054 // If no iterations are available, multiply with N.
28055 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28056 AddToWorklist(Est.getNode());
28057 }
28058
28059 return Est;
28060 }
28061
28062 return SDValue();
28063}
28064
28065/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28066/// For the reciprocal sqrt, we need to find the zero of the function:
28067/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28068/// =>
28069/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28070/// As a result, we precompute A/2 prior to the iteration loop.
28071SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28072 unsigned Iterations,
28073 SDNodeFlags Flags, bool Reciprocal) {
28074 EVT VT = Arg.getValueType();
28075 SDLoc DL(Arg);
28076 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28077
28078 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28079 // this entire sequence requires only one FP constant.
28080 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28081 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28082
28083 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28084 for (unsigned i = 0; i < Iterations; ++i) {
28085 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28086 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28087 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28088 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28089 }
28090
28091 // If non-reciprocal square root is requested, multiply the result by Arg.
28092 if (!Reciprocal)
28093 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28094
28095 return Est;
28096}
28097
28098/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28099/// For the reciprocal sqrt, we need to find the zero of the function:
28100/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28101/// =>
28102/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28103SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28104 unsigned Iterations,
28105 SDNodeFlags Flags, bool Reciprocal) {
28106 EVT VT = Arg.getValueType();
28107 SDLoc DL(Arg);
28108 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28109 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28110
28111 // This routine must enter the loop below to work correctly
28112 // when (Reciprocal == false).
28113 assert(Iterations > 0);
28114
28115 // Newton iterations for reciprocal square root:
28116 // E = (E * -0.5) * ((A * E) * E + -3.0)
28117 for (unsigned i = 0; i < Iterations; ++i) {
28118 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28119 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28120 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28121
28122 // When calculating a square root at the last iteration build:
28123 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28124 // (notice a common subexpression)
28125 SDValue LHS;
28126 if (Reciprocal || (i + 1) < Iterations) {
28127 // RSQRT: LHS = (E * -0.5)
28128 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28129 } else {
28130 // SQRT: LHS = (A * E) * -0.5
28131 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28132 }
28133
28134 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28135 }
28136
28137 return Est;
28138}
28139
28140/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28141/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28142/// Op can be zero.
28143SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28144 bool Reciprocal) {
28145 if (LegalDAG)
28146 return SDValue();
28147
28148 // TODO: Handle extended types?
28149 EVT VT = Op.getValueType();
28150 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28151 VT.getScalarType() != MVT::f64)
28152 return SDValue();
28153
28154 // If estimates are explicitly disabled for this function, we're done.
28156 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28157 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28158 return SDValue();
28159
28160 // Estimates may be explicitly enabled for this type with a custom number of
28161 // refinement steps.
28162 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28163
28164 bool UseOneConstNR = false;
28165 if (SDValue Est =
28166 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28167 Reciprocal)) {
28168 AddToWorklist(Est.getNode());
28169
28170 if (Iterations > 0)
28171 Est = UseOneConstNR
28172 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28173 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28174 if (!Reciprocal) {
28175 SDLoc DL(Op);
28176 // Try the target specific test first.
28177 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28178
28179 // The estimate is now completely wrong if the input was exactly 0.0 or
28180 // possibly a denormal. Force the answer to 0.0 or value provided by
28181 // target for those cases.
28182 Est = DAG.getNode(
28183 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28184 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28185 }
28186 return Est;
28187 }
28188
28189 return SDValue();
28190}
28191
28192SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28193 return buildSqrtEstimateImpl(Op, Flags, true);
28194}
28195
28196SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28197 return buildSqrtEstimateImpl(Op, Flags, false);
28198}
28199
28200/// Return true if there is any possibility that the two addresses overlap.
28201bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28202
28203 struct MemUseCharacteristics {
28204 bool IsVolatile;
28205 bool IsAtomic;
28207 int64_t Offset;
28208 LocationSize NumBytes;
28209 MachineMemOperand *MMO;
28210 };
28211
28212 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28213 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28214 int64_t Offset = 0;
28215 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28216 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28217 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28218 ? -1 * C->getSExtValue()
28219 : 0;
28220 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28221 return {LSN->isVolatile(), LSN->isAtomic(),
28222 LSN->getBasePtr(), Offset /*base offset*/,
28223 LocationSize::precise(Size), LSN->getMemOperand()};
28224 }
28225 if (const auto *LN = cast<LifetimeSDNode>(N))
28226 return {false /*isVolatile*/,
28227 /*isAtomic*/ false,
28228 LN->getOperand(1),
28229 (LN->hasOffset()) ? LN->getOffset() : 0,
28230 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28232 (MachineMemOperand *)nullptr};
28233 // Default.
28234 return {false /*isvolatile*/,
28235 /*isAtomic*/ false,
28236 SDValue(),
28237 (int64_t)0 /*offset*/,
28239 (MachineMemOperand *)nullptr};
28240 };
28241
28242 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28243 MUC1 = getCharacteristics(Op1);
28244
28245 // If they are to the same address, then they must be aliases.
28246 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28247 MUC0.Offset == MUC1.Offset)
28248 return true;
28249
28250 // If they are both volatile then they cannot be reordered.
28251 if (MUC0.IsVolatile && MUC1.IsVolatile)
28252 return true;
28253
28254 // Be conservative about atomics for the moment
28255 // TODO: This is way overconservative for unordered atomics (see D66309)
28256 if (MUC0.IsAtomic && MUC1.IsAtomic)
28257 return true;
28258
28259 if (MUC0.MMO && MUC1.MMO) {
28260 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28261 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28262 return false;
28263 }
28264
28265 // If NumBytes is scalable and offset is not 0, conservatively return may
28266 // alias
28267 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28268 MUC0.Offset != 0) ||
28269 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28270 MUC1.Offset != 0))
28271 return true;
28272 // Try to prove that there is aliasing, or that there is no aliasing. Either
28273 // way, we can return now. If nothing can be proved, proceed with more tests.
28274 bool IsAlias;
28275 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28276 DAG, IsAlias))
28277 return IsAlias;
28278
28279 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28280 // either are not known.
28281 if (!MUC0.MMO || !MUC1.MMO)
28282 return true;
28283
28284 // If one operation reads from invariant memory, and the other may store, they
28285 // cannot alias. These should really be checking the equivalent of mayWrite,
28286 // but it only matters for memory nodes other than load /store.
28287 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28288 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28289 return false;
28290
28291 // If we know required SrcValue1 and SrcValue2 have relatively large
28292 // alignment compared to the size and offset of the access, we may be able
28293 // to prove they do not alias. This check is conservative for now to catch
28294 // cases created by splitting vector types, it only works when the offsets are
28295 // multiples of the size of the data.
28296 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28297 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28298 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28299 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28300 LocationSize Size0 = MUC0.NumBytes;
28301 LocationSize Size1 = MUC1.NumBytes;
28302
28303 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28304 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28305 !Size1.isScalable() && Size0 == Size1 &&
28306 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28307 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28308 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28309 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28310 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28311
28312 // There is no overlap between these relatively aligned accesses of
28313 // similar size. Return no alias.
28314 if ((OffAlign0 + static_cast<int64_t>(
28315 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28316 (OffAlign1 + static_cast<int64_t>(
28317 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28318 return false;
28319 }
28320
28321 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28323 : DAG.getSubtarget().useAA();
28324#ifndef NDEBUG
28325 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28327 UseAA = false;
28328#endif
28329
28330 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28331 Size0.hasValue() && Size1.hasValue() &&
28332 // Can't represent a scalable size + fixed offset in LocationSize
28333 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28334 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28335 // Use alias analysis information.
28336 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28337 int64_t Overlap0 =
28338 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28339 int64_t Overlap1 =
28340 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28341 LocationSize Loc0 =
28342 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28343 LocationSize Loc1 =
28344 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28345 if (AA->isNoAlias(
28346 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28347 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28348 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28349 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28350 return false;
28351 }
28352
28353 // Otherwise we have to assume they alias.
28354 return true;
28355}
28356
28357/// Walk up chain skipping non-aliasing memory nodes,
28358/// looking for aliasing nodes and adding them to the Aliases vector.
28359void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28360 SmallVectorImpl<SDValue> &Aliases) {
28361 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28362 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28363
28364 // Get alias information for node.
28365 // TODO: relax aliasing for unordered atomics (see D66309)
28366 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28367
28368 // Starting off.
28369 Chains.push_back(OriginalChain);
28370 unsigned Depth = 0;
28371
28372 // Attempt to improve chain by a single step
28373 auto ImproveChain = [&](SDValue &C) -> bool {
28374 switch (C.getOpcode()) {
28375 case ISD::EntryToken:
28376 // No need to mark EntryToken.
28377 C = SDValue();
28378 return true;
28379 case ISD::LOAD:
28380 case ISD::STORE: {
28381 // Get alias information for C.
28382 // TODO: Relax aliasing for unordered atomics (see D66309)
28383 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28384 cast<LSBaseSDNode>(C.getNode())->isSimple();
28385 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28386 // Look further up the chain.
28387 C = C.getOperand(0);
28388 return true;
28389 }
28390 // Alias, so stop here.
28391 return false;
28392 }
28393
28394 case ISD::CopyFromReg:
28395 // Always forward past CopyFromReg.
28396 C = C.getOperand(0);
28397 return true;
28398
28400 case ISD::LIFETIME_END: {
28401 // We can forward past any lifetime start/end that can be proven not to
28402 // alias the memory access.
28403 if (!mayAlias(N, C.getNode())) {
28404 // Look further up the chain.
28405 C = C.getOperand(0);
28406 return true;
28407 }
28408 return false;
28409 }
28410 default:
28411 return false;
28412 }
28413 };
28414
28415 // Look at each chain and determine if it is an alias. If so, add it to the
28416 // aliases list. If not, then continue up the chain looking for the next
28417 // candidate.
28418 while (!Chains.empty()) {
28419 SDValue Chain = Chains.pop_back_val();
28420
28421 // Don't bother if we've seen Chain before.
28422 if (!Visited.insert(Chain.getNode()).second)
28423 continue;
28424
28425 // For TokenFactor nodes, look at each operand and only continue up the
28426 // chain until we reach the depth limit.
28427 //
28428 // FIXME: The depth check could be made to return the last non-aliasing
28429 // chain we found before we hit a tokenfactor rather than the original
28430 // chain.
28431 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28432 Aliases.clear();
28433 Aliases.push_back(OriginalChain);
28434 return;
28435 }
28436
28437 if (Chain.getOpcode() == ISD::TokenFactor) {
28438 // We have to check each of the operands of the token factor for "small"
28439 // token factors, so we queue them up. Adding the operands to the queue
28440 // (stack) in reverse order maintains the original order and increases the
28441 // likelihood that getNode will find a matching token factor (CSE.)
28442 if (Chain.getNumOperands() > 16) {
28443 Aliases.push_back(Chain);
28444 continue;
28445 }
28446 for (unsigned n = Chain.getNumOperands(); n;)
28447 Chains.push_back(Chain.getOperand(--n));
28448 ++Depth;
28449 continue;
28450 }
28451 // Everything else
28452 if (ImproveChain(Chain)) {
28453 // Updated Chain Found, Consider new chain if one exists.
28454 if (Chain.getNode())
28455 Chains.push_back(Chain);
28456 ++Depth;
28457 continue;
28458 }
28459 // No Improved Chain Possible, treat as Alias.
28460 Aliases.push_back(Chain);
28461 }
28462}
28463
28464/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28465/// (aliasing node.)
28466SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28467 if (OptLevel == CodeGenOptLevel::None)
28468 return OldChain;
28469
28470 // Ops for replacing token factor.
28472
28473 // Accumulate all the aliases to this node.
28474 GatherAllAliases(N, OldChain, Aliases);
28475
28476 // If no operands then chain to entry token.
28477 if (Aliases.empty())
28478 return DAG.getEntryNode();
28479
28480 // If a single operand then chain to it. We don't need to revisit it.
28481 if (Aliases.size() == 1)
28482 return Aliases[0];
28483
28484 // Construct a custom tailored token factor.
28485 return DAG.getTokenFactor(SDLoc(N), Aliases);
28486}
28487
28488// This function tries to collect a bunch of potentially interesting
28489// nodes to improve the chains of, all at once. This might seem
28490// redundant, as this function gets called when visiting every store
28491// node, so why not let the work be done on each store as it's visited?
28492//
28493// I believe this is mainly important because mergeConsecutiveStores
28494// is unable to deal with merging stores of different sizes, so unless
28495// we improve the chains of all the potential candidates up-front
28496// before running mergeConsecutiveStores, it might only see some of
28497// the nodes that will eventually be candidates, and then not be able
28498// to go from a partially-merged state to the desired final
28499// fully-merged state.
28500
28501bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28502 SmallVector<StoreSDNode *, 8> ChainedStores;
28503 StoreSDNode *STChain = St;
28504 // Intervals records which offsets from BaseIndex have been covered. In
28505 // the common case, every store writes to the immediately previous address
28506 // space and thus merged with the previous interval at insertion time.
28507
28508 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28510 IMap::Allocator A;
28511 IMap Intervals(A);
28512
28513 // This holds the base pointer, index, and the offset in bytes from the base
28514 // pointer.
28516
28517 // We must have a base and an offset.
28518 if (!BasePtr.getBase().getNode())
28519 return false;
28520
28521 // Do not handle stores to undef base pointers.
28522 if (BasePtr.getBase().isUndef())
28523 return false;
28524
28525 // Do not handle stores to opaque types
28526 if (St->getMemoryVT().isZeroSized())
28527 return false;
28528
28529 // BaseIndexOffset assumes that offsets are fixed-size, which
28530 // is not valid for scalable vectors where the offsets are
28531 // scaled by `vscale`, so bail out early.
28532 if (St->getMemoryVT().isScalableVT())
28533 return false;
28534
28535 // Add ST's interval.
28536 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28537 std::monostate{});
28538
28539 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28540 if (Chain->getMemoryVT().isScalableVector())
28541 return false;
28542
28543 // If the chain has more than one use, then we can't reorder the mem ops.
28544 if (!SDValue(Chain, 0)->hasOneUse())
28545 break;
28546 // TODO: Relax for unordered atomics (see D66309)
28547 if (!Chain->isSimple() || Chain->isIndexed())
28548 break;
28549
28550 // Find the base pointer and offset for this memory node.
28551 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28552 // Check that the base pointer is the same as the original one.
28553 int64_t Offset;
28554 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28555 break;
28556 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28557 // Make sure we don't overlap with other intervals by checking the ones to
28558 // the left or right before inserting.
28559 auto I = Intervals.find(Offset);
28560 // If there's a next interval, we should end before it.
28561 if (I != Intervals.end() && I.start() < (Offset + Length))
28562 break;
28563 // If there's a previous interval, we should start after it.
28564 if (I != Intervals.begin() && (--I).stop() <= Offset)
28565 break;
28566 Intervals.insert(Offset, Offset + Length, std::monostate{});
28567
28568 ChainedStores.push_back(Chain);
28569 STChain = Chain;
28570 }
28571
28572 // If we didn't find a chained store, exit.
28573 if (ChainedStores.empty())
28574 return false;
28575
28576 // Improve all chained stores (St and ChainedStores members) starting from
28577 // where the store chain ended and return single TokenFactor.
28578 SDValue NewChain = STChain->getChain();
28580 for (unsigned I = ChainedStores.size(); I;) {
28581 StoreSDNode *S = ChainedStores[--I];
28582 SDValue BetterChain = FindBetterChain(S, NewChain);
28583 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28584 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28585 TFOps.push_back(SDValue(S, 0));
28586 ChainedStores[I] = S;
28587 }
28588
28589 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28590 SDValue BetterChain = FindBetterChain(St, NewChain);
28591 SDValue NewST;
28592 if (St->isTruncatingStore())
28593 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28594 St->getBasePtr(), St->getMemoryVT(),
28595 St->getMemOperand());
28596 else
28597 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28598 St->getBasePtr(), St->getMemOperand());
28599
28600 TFOps.push_back(NewST);
28601
28602 // If we improved every element of TFOps, then we've lost the dependence on
28603 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28604 // the beginning to keep relative order consistent with FindBetterChains.
28605 auto hasImprovedChain = [&](SDValue ST) -> bool {
28606 return ST->getOperand(0) != NewChain;
28607 };
28608 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28609 if (AddNewChain)
28610 TFOps.insert(TFOps.begin(), NewChain);
28611
28612 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28613 CombineTo(St, TF);
28614
28615 // Add TF and its operands to the worklist.
28616 AddToWorklist(TF.getNode());
28617 for (const SDValue &Op : TF->ops())
28618 AddToWorklist(Op.getNode());
28619 AddToWorklist(STChain);
28620 return true;
28621}
28622
28623bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28624 if (OptLevel == CodeGenOptLevel::None)
28625 return false;
28626
28628
28629 // We must have a base and an offset.
28630 if (!BasePtr.getBase().getNode())
28631 return false;
28632
28633 // Do not handle stores to undef base pointers.
28634 if (BasePtr.getBase().isUndef())
28635 return false;
28636
28637 // Directly improve a chain of disjoint stores starting at St.
28638 if (parallelizeChainedStores(St))
28639 return true;
28640
28641 // Improve St's Chain..
28642 SDValue BetterChain = FindBetterChain(St, St->getChain());
28643 if (St->getChain() != BetterChain) {
28644 replaceStoreChain(St, BetterChain);
28645 return true;
28646 }
28647 return false;
28648}
28649
28650/// This is the entry point for the file.
28652 CodeGenOptLevel OptLevel) {
28653 /// This is the main entry point to this class.
28654 DAGCombiner(*this, AA, OptLevel).Run(Level);
28655}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1113
bool isNegative() const
Definition: APFloat.h:1348
bool isNormal() const
Definition: APFloat.h:1352
bool isDenormal() const
Definition: APFloat.h:1349
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1331
const fltSemantics & getSemantics() const
Definition: APFloat.h:1356
bool isNaN() const
Definition: APFloat.h:1346
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:991
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
bool isLargest() const
Definition: APFloat.h:1364
bool isIEEE() const
Definition: APFloat.h:1366
bool isInfinity() const
Definition: APFloat.h:1345
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:213
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:428
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:208
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1628
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1471
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
APInt abs() const
Get the absolute value.
Definition: APInt.h:1752
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:350
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1161
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:237
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:445
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1447
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1090
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:308
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1228
int32_t exactLogBase2() const
Definition: APInt.h:1740
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1597
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1556
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1490
unsigned countLeadingZeros() const
Definition: APInt.h:1564
unsigned logBase2() const
Definition: APInt.h:1718
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:489
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:454
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:450
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:467
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1129
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1236
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:419
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:285
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:275
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:179
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:368
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:265
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:837
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:830
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1614
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:345
iterator end()
Definition: DenseMap.h:84
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:364
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:968
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:565
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:488
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:673
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:557
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:493
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:574
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:892
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:922
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:418
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:229
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:251
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2193
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2198
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2203
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2208
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:764
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:737
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:484
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1391
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:567
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:728
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1262
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:495
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:491
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:804
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:551
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1376
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1380
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:702
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:834
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1390
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:485
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:927
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1431
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:670
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:628
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1373
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:736
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1377
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:772
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:944
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1098
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:654
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:741
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1258
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1392
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:635
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1385
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:659
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:608
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:581
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:999
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:543
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ TargetConstantFP
Definition: ISDOpcodes.h:165
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:870
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:756
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1350
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:986
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ TargetFrameIndex
Definition: ISDOpcodes.h:172
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:823
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:682
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1325
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:902
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:750
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1212
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1393
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:936
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1005
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:850
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1028
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:267
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:665
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1374
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:532
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:959
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:883
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:845
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:869
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1381
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:800
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1091
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:501
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1161
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1033
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:320
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:523
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1520
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1630
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1605
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1625
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1446
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1516
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1516
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1587
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1503
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1534
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1599
@ VecLoad
Definition: NVPTX.h:88
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:893
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4590
constexpr double e
Definition: MathExtras.h:31
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1540
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2058
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:511
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1522
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1490
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:325
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:321
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:317
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:262
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:331
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:269
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:258
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:79
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:310
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...